Moar parsers

This commit is contained in:
2025-10-06 23:11:50 +02:00
parent a254b306f2
commit 5f0f4aa96b
14 changed files with 419 additions and 136 deletions

View File

@@ -4,6 +4,8 @@ import (
"bufio"
"io"
"strings"
"git.maze.io/maze/styx/internal/sliceutil"
)
func init() {
@@ -49,5 +51,5 @@ func (adblockDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored
if err = scanner.Err(); err != nil {
return
}
return unique(domains), ignored, nil
return sliceutil.Unique(domains), ignored, nil
}

View File

@@ -5,6 +5,7 @@ import (
"io"
"strings"
"git.maze.io/maze/styx/internal/sliceutil"
"github.com/miekg/dns"
)
@@ -41,7 +42,7 @@ func (dnsmasqDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored
if err = scanner.Err(); err != nil {
return
}
return unique(domains), ignored, nil
return sliceutil.Unique(domains), ignored, nil
}
type mosDNSDomainsParser struct{}
@@ -69,7 +70,7 @@ func (mosDNSDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored
if err = scanner.Err(); err != nil {
return
}
return unique(domains), ignored, nil
return sliceutil.Unique(domains), ignored, nil
}
type smartDNSDomainsParser struct{}
@@ -96,7 +97,7 @@ func (smartDNSDomainsParser) ParseDomains(r io.Reader) (domains []string, ignore
if err = scanner.Err(); err != nil {
return
}
return unique(domains), ignored, nil
return sliceutil.Unique(domains), ignored, nil
}
type unboundDomainsParser struct{}
@@ -135,5 +136,5 @@ func (unboundDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored
if err = scanner.Err(); err != nil {
return
}
return unique(domains), ignored, nil
return sliceutil.Unique(domains), ignored, nil
}

View File

@@ -1,40 +0,0 @@
package parser
import (
"bufio"
"io"
"net"
"strings"
)
func init() {
domainsParsers = append(domainsParsers, domainsParser{})
}
type domainsParser struct{}
func (domainsParser) CanHandle(line string) bool {
return isDomainName(line) &&
!strings.ContainsRune(line, ' ') &&
!strings.ContainsRune(line, ':') &&
net.ParseIP(line) == nil
}
func (domainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if isComment(line) {
continue
}
if isDomainName(line) {
domains = append(domains, line)
continue
}
ignored++
}
if err = scanner.Err(); err != nil {
return
}
return unique(domains), ignored, nil
}

View File

@@ -1,31 +0,0 @@
package parser
import (
"reflect"
"sort"
"strings"
"testing"
)
func TestParseDomains(t *testing.T) {
test := `# This is a comment
facebook.com
tiktok.com
bogus ignored
youtube.com`
want := []string{"facebook.com", "tiktok.com", "youtube.com"}
parsed, ignored, err := ParseDomains(strings.NewReader(test))
if err != nil {
t.Fatal(err)
return
}
sort.Strings(parsed)
if !reflect.DeepEqual(parsed, want) {
t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed)
}
if ignored != 1 {
t.Errorf("expected 1 ignored, got %d", ignored)
}
}

View File

@@ -0,0 +1,43 @@
package parser
import (
"bufio"
"io"
"net/netip"
"strings"
"git.maze.io/maze/styx/internal/sliceutil"
)
func init() {
RegisterNetworksParser(mikroTikNetworksParser{})
}
type mikroTikNetworksParser struct{}
func (mikroTikNetworksParser) CanHandle(line string) bool {
return line == "/ip firewall address-list" ||
strings.HasPrefix(line, "add address=")
}
func (mikroTikNetworksParser) ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if isComment(line) || line == "/ip firewall address-list" {
continue
}
if strings.HasPrefix(line, "add address=") {
part := strings.Fields(line[12:])
if prefix, err := netip.ParsePrefix(part[0]); err == nil {
prefixes = append(prefixes, prefix)
continue
}
}
ignored++
}
if err = scanner.Err(); err != nil {
return
}
return sliceutil.Unique(prefixes), ignored, nil
}

View File

@@ -0,0 +1,51 @@
package parser
import (
"net/netip"
"reflect"
"sort"
"strings"
"testing"
)
func TestMikroTikNetworksParser(t *testing.T) {
test := `
# --------------------------------------------
# IPv4 prefix list of ALIBABA
# --------------------------------------------
# Source: ipapi.is
# --------------------------------------------
# Last Update - Mon Oct 6 06:00:38 +06 2025
# --------------------------------------------
# Total Prefixes: 3764
# --------------------------------------------
# Maintainer: Sakib Mahmud
# --------------------------------------------
/ip firewall address-list
add address=5.181.224.0/23 list=ALIBABA
add address=8.208.0.0/16 list=ALIBABA
add address=8.208.0.0/17 list=ALIBABA
`
want := []netip.Prefix{
netip.MustParsePrefix("5.181.224.0/23"),
netip.MustParsePrefix("8.208.0.0/16"),
netip.MustParsePrefix("8.208.0.0/17"),
}
parsed, ignored, err := ParseNetworks(strings.NewReader(test))
if err != nil {
t.Fatal(err)
return
}
sort.SliceStable(parsed, func(i, j int) bool {
return parsed[i].Addr().Less(parsed[j].Addr())
})
if !reflect.DeepEqual(parsed, want) {
t.Errorf("expected ParseNetworks(mikrotik.rsc) to return %v, got %v", want, parsed)
}
if ignored != 0 {
t.Errorf("expected 0 ignored, got %d", ignored)
}
}

View File

@@ -3,22 +3,25 @@ package parser
import (
"bufio"
"io"
"net"
"net/netip"
"strings"
"git.maze.io/maze/styx/internal/sliceutil"
)
func init() {
RegisterDomainsParser(hostsParser{})
RegisterDomainsParser(hostsDomainsParser{})
RegisterNetworksParser(hostsNetworksParser{})
}
type hostsParser struct{}
type hostsDomainsParser struct{}
func (hostsParser) CanHandle(line string) bool {
func (hostsDomainsParser) CanHandle(line string) bool {
part := strings.Fields(line)
return len(part) >= 2 && net.ParseIP(part[0]) != nil
return len(part) >= 2 && isIP(part[0])
}
func (hostsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
func (hostsDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
@@ -27,7 +30,7 @@ func (hostsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err
}
part := strings.Fields(line)
if len(part) >= 2 && net.ParseIP(part[0]) != nil {
if len(part) >= 2 && isIP(part[0]) {
domains = append(domains, part[1:]...)
continue
}
@@ -37,5 +40,36 @@ func (hostsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err
if err = scanner.Err(); err != nil {
return
}
return unique(domains), ignored, nil
return sliceutil.Unique(domains), ignored, nil
}
type hostsNetworksParser struct{}
func (hostsNetworksParser) CanHandle(line string) bool {
part := strings.Fields(line)
return len(part) >= 2 && isIP(part[0])
}
func (hostsNetworksParser) ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if isComment(line) {
continue
}
part := strings.Fields(line)
if len(part) >= 2 && isIP(part[0]) {
if addr, err := netip.ParseAddr(part[0]); err == nil && !addr.IsUnspecified() && addr.IsValid() {
prefixes = append(prefixes, addrToPrefix(addr))
}
continue
}
ignored++
}
if err = scanner.Err(); err != nil {
return
}
return sliceutil.Unique(prefixes), ignored, nil
}

View File

@@ -1,14 +1,14 @@
package parser
import (
"net/netip"
"reflect"
"sort"
"strings"
"testing"
)
func TestParseHosts(t *testing.T) {
test := `##
const testHosts = `##
# Host Database
#
# localhost is used to configure the loopback interface
@@ -20,15 +20,16 @@ func TestParseHosts(t *testing.T) {
ff00::1 multicast
1.2.3.4
`
want := []string{"broadcasthost", "dragon", "dragon.local", "dragon.maze.network", "localhost", "multicast"}
parsed, ignored, err := ParseDomains(strings.NewReader(test))
func TestParseHosts(t *testing.T) {
parsed, ignored, err := ParseDomains(strings.NewReader(testHosts))
if err != nil {
t.Fatal(err)
return
}
sort.Strings(parsed)
want := []string{"broadcasthost", "dragon", "dragon.local", "dragon.maze.network", "localhost", "multicast"}
if !reflect.DeepEqual(parsed, want) {
t.Errorf("expected ParseDomains(hosts) to return %v, got %v", want, parsed)
}
@@ -36,3 +37,27 @@ ff00::1 multicast
t.Errorf("expected 1 ignored, got %d", ignored)
}
}
func TestParseHostsNetworks(t *testing.T) {
parsed, ignored, err := ParseNetworks(strings.NewReader(testHosts))
if err != nil {
t.Fatal(err)
return
}
sort.SliceStable(parsed, func(i, j int) bool {
return parsed[i].Addr().Less(parsed[j].Addr())
})
want := []netip.Prefix{
netip.MustParsePrefix("127.0.0.1/32"),
netip.MustParsePrefix("255.255.255.255/32"),
netip.MustParsePrefix("::1/128"),
netip.MustParsePrefix("ff00::1/128"),
}
if !reflect.DeepEqual(parsed, want) {
t.Errorf("expected ParseNetworks(hosts) to return %v, got %v", want, parsed)
}
if ignored != 1 {
t.Errorf("expected 1 ignored, got %d", ignored)
}
}

72
dataset/parser/list.go Normal file
View File

@@ -0,0 +1,72 @@
package parser
import (
"bufio"
"io"
"net"
"net/netip"
"strings"
"git.maze.io/maze/styx/internal/sliceutil"
)
func init() {
RegisterDomainsParser(listDomainsParser{})
RegisterNetworksParser(listNetworksParser{})
}
type listDomainsParser struct{}
func (listDomainsParser) CanHandle(line string) bool {
return isDomainName(line) &&
!strings.ContainsRune(line, ' ') &&
!strings.ContainsRune(line, ':') &&
net.ParseIP(line) == nil
}
func (listDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if isComment(line) {
continue
}
if isDomainName(line) {
domains = append(domains, line)
continue
}
ignored++
}
if err = scanner.Err(); err != nil {
return
}
return sliceutil.Unique(domains), ignored, nil
}
type listNetworksParser struct{}
func (listNetworksParser) CanHandle(line string) bool {
return isPrefix(line) || isIP(line)
}
func (listNetworksParser) ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if isComment(line) {
continue
}
if prefix, err := netip.ParsePrefix(line); err == nil {
prefixes = append(prefixes, prefix)
continue
} else if addr, err := netip.ParseAddr(line); err == nil && !addr.IsUnspecified() && addr.IsValid() {
prefixes = append(prefixes, addrToPrefix(addr))
continue
}
ignored++
}
if err = scanner.Err(); err != nil {
return
}
return sliceutil.Unique(prefixes), ignored, nil
}

View File

@@ -0,0 +1,63 @@
package parser
import (
"net/netip"
"reflect"
"sort"
"strings"
"testing"
)
func TestParseDomains(t *testing.T) {
test := `# This is a comment
facebook.com
tiktok.com
bogus ignored
youtube.com`
want := []string{"facebook.com", "tiktok.com", "youtube.com"}
parsed, ignored, err := ParseDomains(strings.NewReader(test))
if err != nil {
t.Fatal(err)
return
}
sort.Strings(parsed)
if !reflect.DeepEqual(parsed, want) {
t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed)
}
if ignored != 1 {
t.Errorf("expected 1 ignored, got %d", ignored)
}
}
func TestParseNetworks(t *testing.T) {
test := `# This is a comment
127.0.0.1
10.42.66.0/24
bogus ignored
::ffff:abcd:ef
fe80:0:0::0/8`
want := []netip.Prefix{
netip.MustParsePrefix("10.42.66.0/24"),
netip.MustParsePrefix("127.0.0.1/32"),
netip.MustParsePrefix("171.205.0.239/32"),
netip.MustParsePrefix("fe80::/8"),
}
parsed, ignored, err := ParseNetworks(strings.NewReader(test))
if err != nil {
t.Fatal(err)
return
}
sort.SliceStable(parsed, func(i, j int) bool {
return parsed[i].Addr().Less(parsed[j].Addr())
})
if !reflect.DeepEqual(parsed, want) {
t.Errorf("expected ParseNetworks(prefixes) to return %v, got %v", want, parsed)
}
if ignored != 1 {
t.Errorf("expected 1 ignored, got %d", ignored)
}
}

View File

@@ -5,7 +5,7 @@ import (
"bytes"
"errors"
"io"
"log"
"net/netip"
"strings"
"github.com/miekg/dns"
@@ -22,12 +22,24 @@ type DomainsParser interface {
ParseDomains(io.Reader) (domains []string, ignored int, err error)
}
var domainsParsers []DomainsParser
type NetworksParser interface {
Parser
ParseNetworks(io.Reader) (prefixes []netip.Prefix, ignored int, err error)
}
var (
domainsParsers []DomainsParser
networksParsers []NetworksParser
)
func RegisterDomainsParser(parser DomainsParser) {
domainsParsers = append(domainsParsers, parser)
}
func RegisterNetworksParser(parser NetworksParser) {
networksParsers = append(networksParsers, parser)
}
func ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
var (
buffer = new(bytes.Buffer)
@@ -42,7 +54,7 @@ func ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
}
for _, parser = range domainsParsers {
if parser.CanHandle(line) {
log.Printf("using parser %T", parser)
// log.Printf("using parser %T", parser)
return parser.ParseDomains(io.MultiReader(buffer, r))
}
}
@@ -51,26 +63,60 @@ func ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
return nil, 0, ErrNoParser
}
func ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) {
var (
buffer = new(bytes.Buffer)
scanner = bufio.NewScanner(io.TeeReader(r, buffer))
line string
parser NetworksParser
)
for scanner.Scan() {
line = strings.TrimSpace(scanner.Text())
if isComment(line) {
continue
}
for _, parser = range networksParsers {
if parser.CanHandle(line) {
// log.Printf("using parser %T", parser)
return parser.ParseNetworks(io.MultiReader(buffer, r))
}
}
break
}
return nil, 0, ErrNoParser
}
func isComment(line string) bool {
return line == "" || line[0] == '#' || line[0] == '!'
}
func isDomainName(name string) bool {
n, ok := dns.IsDomainName(name)
func isDomainName(s string) bool {
n, ok := dns.IsDomainName(s)
return n >= 2 && ok
}
func unique(strings []string) []string {
if strings == nil {
return nil
}
v := make(map[string]struct{})
for _, s := range strings {
v[s] = struct{}{}
}
o := make([]string, 0, len(v))
for k := range v {
o = append(o, k)
}
return o
func isIP(s string) bool {
_, err := netip.ParseAddr(s)
return err == nil
}
func isPrefix(s string) bool {
_, err := netip.ParsePrefix(s)
return err == nil
}
func addrToPrefix(addr netip.Addr) netip.Prefix {
switch {
case addr.Is4():
prefix, _ := addr.Prefix(32)
return prefix
case addr.Is4In6():
prefix, _ := addr.Unmap().Prefix(32)
return prefix
case addr.Is6():
prefix, _ := addr.Prefix(128)
return prefix
default:
return netip.Prefix{}
}
}

View File

@@ -1,31 +1 @@
package parser
import (
"reflect"
"sort"
"testing"
)
func TestUnique(t *testing.T) {
tests := []struct {
Name string
Test []string
Want []string
}{
{"nil", nil, nil},
{"single", []string{"test"}, []string{"test"}},
{"duplicate", []string{"test", "test"}, []string{"test"}},
{"multiple", []string{"a", "a", "b", "b", "b", "c"}, []string{"a", "b", "c"}},
}
for _, test := range tests {
t.Run(test.Name, func(it *testing.T) {
v := unique(test.Test)
if v != nil {
sort.Strings(v)
}
if !reflect.DeepEqual(v, test.Want) {
it.Errorf("expected unique(%v) to return %v, got %v", test.Test, test.Want, v)
}
})
}
}