From 5f0f4aa96b7a2a4fd8fdc85c613dd777bfaff756 Mon Sep 17 00:00:00 2001 From: maze Date: Mon, 6 Oct 2025 23:11:50 +0200 Subject: [PATCH] Moar parsers --- dataset/parser/adblock.go | 4 +- dataset/parser/dns.go | 9 ++-- dataset/parser/domains.go | 40 --------------- dataset/parser/domains_test.go | 31 ------------ dataset/parser/firewall.go | 43 ++++++++++++++++ dataset/parser/firewall_test.go | 51 +++++++++++++++++++ dataset/parser/hosts.go | 50 ++++++++++++++++--- dataset/parser/hosts_test.go | 33 +++++++++++-- dataset/parser/list.go | 72 +++++++++++++++++++++++++++ dataset/parser/list_test.go | 63 ++++++++++++++++++++++++ dataset/parser/parser.go | 82 ++++++++++++++++++++++++------- dataset/parser/parser_test.go | 30 ----------- internal/sliceutil/unique.go | 16 ++++++ internal/sliceutil/unique_test.go | 31 ++++++++++++ 14 files changed, 419 insertions(+), 136 deletions(-) delete mode 100644 dataset/parser/domains.go delete mode 100644 dataset/parser/domains_test.go create mode 100644 dataset/parser/firewall.go create mode 100644 dataset/parser/firewall_test.go create mode 100644 dataset/parser/list.go create mode 100644 dataset/parser/list_test.go create mode 100644 internal/sliceutil/unique.go create mode 100644 internal/sliceutil/unique_test.go diff --git a/dataset/parser/adblock.go b/dataset/parser/adblock.go index 1b35df6..25ff582 100644 --- a/dataset/parser/adblock.go +++ b/dataset/parser/adblock.go @@ -4,6 +4,8 @@ import ( "bufio" "io" "strings" + + "git.maze.io/maze/styx/internal/sliceutil" ) func init() { @@ -49,5 +51,5 @@ func (adblockDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored if err = scanner.Err(); err != nil { return } - return unique(domains), ignored, nil + return sliceutil.Unique(domains), ignored, nil } diff --git a/dataset/parser/dns.go b/dataset/parser/dns.go index 8b331f7..4dc8c60 100644 --- a/dataset/parser/dns.go +++ b/dataset/parser/dns.go @@ -5,6 +5,7 @@ import ( "io" "strings" + "git.maze.io/maze/styx/internal/sliceutil" "github.com/miekg/dns" ) @@ -41,7 +42,7 @@ func (dnsmasqDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored if err = scanner.Err(); err != nil { return } - return unique(domains), ignored, nil + return sliceutil.Unique(domains), ignored, nil } type mosDNSDomainsParser struct{} @@ -69,7 +70,7 @@ func (mosDNSDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored if err = scanner.Err(); err != nil { return } - return unique(domains), ignored, nil + return sliceutil.Unique(domains), ignored, nil } type smartDNSDomainsParser struct{} @@ -96,7 +97,7 @@ func (smartDNSDomainsParser) ParseDomains(r io.Reader) (domains []string, ignore if err = scanner.Err(); err != nil { return } - return unique(domains), ignored, nil + return sliceutil.Unique(domains), ignored, nil } type unboundDomainsParser struct{} @@ -135,5 +136,5 @@ func (unboundDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored if err = scanner.Err(); err != nil { return } - return unique(domains), ignored, nil + return sliceutil.Unique(domains), ignored, nil } diff --git a/dataset/parser/domains.go b/dataset/parser/domains.go deleted file mode 100644 index 31b38f9..0000000 --- a/dataset/parser/domains.go +++ /dev/null @@ -1,40 +0,0 @@ -package parser - -import ( - "bufio" - "io" - "net" - "strings" -) - -func init() { - domainsParsers = append(domainsParsers, domainsParser{}) -} - -type domainsParser struct{} - -func (domainsParser) CanHandle(line string) bool { - return isDomainName(line) && - !strings.ContainsRune(line, ' ') && - !strings.ContainsRune(line, ':') && - net.ParseIP(line) == nil -} - -func (domainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) { - scanner := bufio.NewScanner(r) - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if isComment(line) { - continue - } - if isDomainName(line) { - domains = append(domains, line) - continue - } - ignored++ - } - if err = scanner.Err(); err != nil { - return - } - return unique(domains), ignored, nil -} diff --git a/dataset/parser/domains_test.go b/dataset/parser/domains_test.go deleted file mode 100644 index 38c6bd7..0000000 --- a/dataset/parser/domains_test.go +++ /dev/null @@ -1,31 +0,0 @@ -package parser - -import ( - "reflect" - "sort" - "strings" - "testing" -) - -func TestParseDomains(t *testing.T) { - test := `# This is a comment -facebook.com -tiktok.com -bogus ignored -youtube.com` - want := []string{"facebook.com", "tiktok.com", "youtube.com"} - - parsed, ignored, err := ParseDomains(strings.NewReader(test)) - if err != nil { - t.Fatal(err) - return - } - - sort.Strings(parsed) - if !reflect.DeepEqual(parsed, want) { - t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed) - } - if ignored != 1 { - t.Errorf("expected 1 ignored, got %d", ignored) - } -} diff --git a/dataset/parser/firewall.go b/dataset/parser/firewall.go new file mode 100644 index 0000000..f151361 --- /dev/null +++ b/dataset/parser/firewall.go @@ -0,0 +1,43 @@ +package parser + +import ( + "bufio" + "io" + "net/netip" + "strings" + + "git.maze.io/maze/styx/internal/sliceutil" +) + +func init() { + RegisterNetworksParser(mikroTikNetworksParser{}) +} + +type mikroTikNetworksParser struct{} + +func (mikroTikNetworksParser) CanHandle(line string) bool { + return line == "/ip firewall address-list" || + strings.HasPrefix(line, "add address=") +} + +func (mikroTikNetworksParser) ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if isComment(line) || line == "/ip firewall address-list" { + continue + } + if strings.HasPrefix(line, "add address=") { + part := strings.Fields(line[12:]) + if prefix, err := netip.ParsePrefix(part[0]); err == nil { + prefixes = append(prefixes, prefix) + continue + } + } + ignored++ + } + if err = scanner.Err(); err != nil { + return + } + return sliceutil.Unique(prefixes), ignored, nil +} diff --git a/dataset/parser/firewall_test.go b/dataset/parser/firewall_test.go new file mode 100644 index 0000000..a36961b --- /dev/null +++ b/dataset/parser/firewall_test.go @@ -0,0 +1,51 @@ +package parser + +import ( + "net/netip" + "reflect" + "sort" + "strings" + "testing" +) + +func TestMikroTikNetworksParser(t *testing.T) { + test := ` +# -------------------------------------------- +# IPv4 prefix list of ALIBABA +# -------------------------------------------- +# Source: ipapi.is +# -------------------------------------------- +# Last Update - Mon Oct 6 06:00:38 +06 2025 +# -------------------------------------------- +# Total Prefixes: 3764 +# -------------------------------------------- +# Maintainer: Sakib Mahmud +# -------------------------------------------- +/ip firewall address-list +add address=5.181.224.0/23 list=ALIBABA +add address=8.208.0.0/16 list=ALIBABA +add address=8.208.0.0/17 list=ALIBABA +` + want := []netip.Prefix{ + netip.MustParsePrefix("5.181.224.0/23"), + netip.MustParsePrefix("8.208.0.0/16"), + netip.MustParsePrefix("8.208.0.0/17"), + } + + parsed, ignored, err := ParseNetworks(strings.NewReader(test)) + if err != nil { + t.Fatal(err) + return + } + + sort.SliceStable(parsed, func(i, j int) bool { + return parsed[i].Addr().Less(parsed[j].Addr()) + }) + if !reflect.DeepEqual(parsed, want) { + t.Errorf("expected ParseNetworks(mikrotik.rsc) to return %v, got %v", want, parsed) + } + if ignored != 0 { + t.Errorf("expected 0 ignored, got %d", ignored) + } + +} diff --git a/dataset/parser/hosts.go b/dataset/parser/hosts.go index 2512424..4b892bd 100644 --- a/dataset/parser/hosts.go +++ b/dataset/parser/hosts.go @@ -3,22 +3,25 @@ package parser import ( "bufio" "io" - "net" + "net/netip" "strings" + + "git.maze.io/maze/styx/internal/sliceutil" ) func init() { - RegisterDomainsParser(hostsParser{}) + RegisterDomainsParser(hostsDomainsParser{}) + RegisterNetworksParser(hostsNetworksParser{}) } -type hostsParser struct{} +type hostsDomainsParser struct{} -func (hostsParser) CanHandle(line string) bool { +func (hostsDomainsParser) CanHandle(line string) bool { part := strings.Fields(line) - return len(part) >= 2 && net.ParseIP(part[0]) != nil + return len(part) >= 2 && isIP(part[0]) } -func (hostsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) { +func (hostsDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) { scanner := bufio.NewScanner(r) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) @@ -27,7 +30,7 @@ func (hostsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err } part := strings.Fields(line) - if len(part) >= 2 && net.ParseIP(part[0]) != nil { + if len(part) >= 2 && isIP(part[0]) { domains = append(domains, part[1:]...) continue } @@ -37,5 +40,36 @@ func (hostsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err if err = scanner.Err(); err != nil { return } - return unique(domains), ignored, nil + return sliceutil.Unique(domains), ignored, nil +} + +type hostsNetworksParser struct{} + +func (hostsNetworksParser) CanHandle(line string) bool { + part := strings.Fields(line) + return len(part) >= 2 && isIP(part[0]) +} + +func (hostsNetworksParser) ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if isComment(line) { + continue + } + + part := strings.Fields(line) + if len(part) >= 2 && isIP(part[0]) { + if addr, err := netip.ParseAddr(part[0]); err == nil && !addr.IsUnspecified() && addr.IsValid() { + prefixes = append(prefixes, addrToPrefix(addr)) + } + continue + } + + ignored++ + } + if err = scanner.Err(); err != nil { + return + } + return sliceutil.Unique(prefixes), ignored, nil } diff --git a/dataset/parser/hosts_test.go b/dataset/parser/hosts_test.go index 38a434d..3c8e31f 100644 --- a/dataset/parser/hosts_test.go +++ b/dataset/parser/hosts_test.go @@ -1,14 +1,14 @@ package parser import ( + "net/netip" "reflect" "sort" "strings" "testing" ) -func TestParseHosts(t *testing.T) { - test := `## +const testHosts = `## # Host Database # # localhost is used to configure the loopback interface @@ -20,15 +20,16 @@ func TestParseHosts(t *testing.T) { ff00::1 multicast 1.2.3.4 ` - want := []string{"broadcasthost", "dragon", "dragon.local", "dragon.maze.network", "localhost", "multicast"} - parsed, ignored, err := ParseDomains(strings.NewReader(test)) +func TestParseHosts(t *testing.T) { + parsed, ignored, err := ParseDomains(strings.NewReader(testHosts)) if err != nil { t.Fatal(err) return } sort.Strings(parsed) + want := []string{"broadcasthost", "dragon", "dragon.local", "dragon.maze.network", "localhost", "multicast"} if !reflect.DeepEqual(parsed, want) { t.Errorf("expected ParseDomains(hosts) to return %v, got %v", want, parsed) } @@ -36,3 +37,27 @@ ff00::1 multicast t.Errorf("expected 1 ignored, got %d", ignored) } } + +func TestParseHostsNetworks(t *testing.T) { + parsed, ignored, err := ParseNetworks(strings.NewReader(testHosts)) + if err != nil { + t.Fatal(err) + return + } + + sort.SliceStable(parsed, func(i, j int) bool { + return parsed[i].Addr().Less(parsed[j].Addr()) + }) + want := []netip.Prefix{ + netip.MustParsePrefix("127.0.0.1/32"), + netip.MustParsePrefix("255.255.255.255/32"), + netip.MustParsePrefix("::1/128"), + netip.MustParsePrefix("ff00::1/128"), + } + if !reflect.DeepEqual(parsed, want) { + t.Errorf("expected ParseNetworks(hosts) to return %v, got %v", want, parsed) + } + if ignored != 1 { + t.Errorf("expected 1 ignored, got %d", ignored) + } +} diff --git a/dataset/parser/list.go b/dataset/parser/list.go new file mode 100644 index 0000000..e113565 --- /dev/null +++ b/dataset/parser/list.go @@ -0,0 +1,72 @@ +package parser + +import ( + "bufio" + "io" + "net" + "net/netip" + "strings" + + "git.maze.io/maze/styx/internal/sliceutil" +) + +func init() { + RegisterDomainsParser(listDomainsParser{}) + RegisterNetworksParser(listNetworksParser{}) +} + +type listDomainsParser struct{} + +func (listDomainsParser) CanHandle(line string) bool { + return isDomainName(line) && + !strings.ContainsRune(line, ' ') && + !strings.ContainsRune(line, ':') && + net.ParseIP(line) == nil +} + +func (listDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if isComment(line) { + continue + } + if isDomainName(line) { + domains = append(domains, line) + continue + } + ignored++ + } + if err = scanner.Err(); err != nil { + return + } + return sliceutil.Unique(domains), ignored, nil +} + +type listNetworksParser struct{} + +func (listNetworksParser) CanHandle(line string) bool { + return isPrefix(line) || isIP(line) +} + +func (listNetworksParser) ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) { + scanner := bufio.NewScanner(r) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if isComment(line) { + continue + } + if prefix, err := netip.ParsePrefix(line); err == nil { + prefixes = append(prefixes, prefix) + continue + } else if addr, err := netip.ParseAddr(line); err == nil && !addr.IsUnspecified() && addr.IsValid() { + prefixes = append(prefixes, addrToPrefix(addr)) + continue + } + ignored++ + } + if err = scanner.Err(); err != nil { + return + } + return sliceutil.Unique(prefixes), ignored, nil +} diff --git a/dataset/parser/list_test.go b/dataset/parser/list_test.go new file mode 100644 index 0000000..9cca296 --- /dev/null +++ b/dataset/parser/list_test.go @@ -0,0 +1,63 @@ +package parser + +import ( + "net/netip" + "reflect" + "sort" + "strings" + "testing" +) + +func TestParseDomains(t *testing.T) { + test := `# This is a comment +facebook.com +tiktok.com +bogus ignored +youtube.com` + want := []string{"facebook.com", "tiktok.com", "youtube.com"} + + parsed, ignored, err := ParseDomains(strings.NewReader(test)) + if err != nil { + t.Fatal(err) + return + } + + sort.Strings(parsed) + if !reflect.DeepEqual(parsed, want) { + t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed) + } + if ignored != 1 { + t.Errorf("expected 1 ignored, got %d", ignored) + } +} + +func TestParseNetworks(t *testing.T) { + test := `# This is a comment +127.0.0.1 +10.42.66.0/24 +bogus ignored +::ffff:abcd:ef +fe80:0:0::0/8` + want := []netip.Prefix{ + netip.MustParsePrefix("10.42.66.0/24"), + netip.MustParsePrefix("127.0.0.1/32"), + netip.MustParsePrefix("171.205.0.239/32"), + netip.MustParsePrefix("fe80::/8"), + } + + parsed, ignored, err := ParseNetworks(strings.NewReader(test)) + if err != nil { + t.Fatal(err) + return + } + + sort.SliceStable(parsed, func(i, j int) bool { + return parsed[i].Addr().Less(parsed[j].Addr()) + }) + if !reflect.DeepEqual(parsed, want) { + t.Errorf("expected ParseNetworks(prefixes) to return %v, got %v", want, parsed) + } + if ignored != 1 { + t.Errorf("expected 1 ignored, got %d", ignored) + } +} diff --git a/dataset/parser/parser.go b/dataset/parser/parser.go index e398f27..2f67240 100644 --- a/dataset/parser/parser.go +++ b/dataset/parser/parser.go @@ -5,7 +5,7 @@ import ( "bytes" "errors" "io" - "log" + "net/netip" "strings" "github.com/miekg/dns" @@ -22,12 +22,24 @@ type DomainsParser interface { ParseDomains(io.Reader) (domains []string, ignored int, err error) } -var domainsParsers []DomainsParser +type NetworksParser interface { + Parser + ParseNetworks(io.Reader) (prefixes []netip.Prefix, ignored int, err error) +} + +var ( + domainsParsers []DomainsParser + networksParsers []NetworksParser +) func RegisterDomainsParser(parser DomainsParser) { domainsParsers = append(domainsParsers, parser) } +func RegisterNetworksParser(parser NetworksParser) { + networksParsers = append(networksParsers, parser) +} + func ParseDomains(r io.Reader) (domains []string, ignored int, err error) { var ( buffer = new(bytes.Buffer) @@ -42,7 +54,7 @@ func ParseDomains(r io.Reader) (domains []string, ignored int, err error) { } for _, parser = range domainsParsers { if parser.CanHandle(line) { - log.Printf("using parser %T", parser) + // log.Printf("using parser %T", parser) return parser.ParseDomains(io.MultiReader(buffer, r)) } } @@ -51,26 +63,60 @@ func ParseDomains(r io.Reader) (domains []string, ignored int, err error) { return nil, 0, ErrNoParser } +func ParseNetworks(r io.Reader) (prefixes []netip.Prefix, ignored int, err error) { + var ( + buffer = new(bytes.Buffer) + scanner = bufio.NewScanner(io.TeeReader(r, buffer)) + line string + parser NetworksParser + ) + for scanner.Scan() { + line = strings.TrimSpace(scanner.Text()) + if isComment(line) { + continue + } + for _, parser = range networksParsers { + if parser.CanHandle(line) { + // log.Printf("using parser %T", parser) + return parser.ParseNetworks(io.MultiReader(buffer, r)) + } + } + break + } + return nil, 0, ErrNoParser +} + func isComment(line string) bool { return line == "" || line[0] == '#' || line[0] == '!' } -func isDomainName(name string) bool { - n, ok := dns.IsDomainName(name) +func isDomainName(s string) bool { + n, ok := dns.IsDomainName(s) return n >= 2 && ok } -func unique(strings []string) []string { - if strings == nil { - return nil - } - v := make(map[string]struct{}) - for _, s := range strings { - v[s] = struct{}{} - } - o := make([]string, 0, len(v)) - for k := range v { - o = append(o, k) - } - return o +func isIP(s string) bool { + _, err := netip.ParseAddr(s) + return err == nil +} + +func isPrefix(s string) bool { + _, err := netip.ParsePrefix(s) + return err == nil +} + +func addrToPrefix(addr netip.Addr) netip.Prefix { + switch { + case addr.Is4(): + prefix, _ := addr.Prefix(32) + return prefix + case addr.Is4In6(): + prefix, _ := addr.Unmap().Prefix(32) + return prefix + case addr.Is6(): + prefix, _ := addr.Prefix(128) + return prefix + default: + return netip.Prefix{} + } } diff --git a/dataset/parser/parser_test.go b/dataset/parser/parser_test.go index fd469e2..0bfe2c2 100644 --- a/dataset/parser/parser_test.go +++ b/dataset/parser/parser_test.go @@ -1,31 +1 @@ package parser - -import ( - "reflect" - "sort" - "testing" -) - -func TestUnique(t *testing.T) { - tests := []struct { - Name string - Test []string - Want []string - }{ - {"nil", nil, nil}, - {"single", []string{"test"}, []string{"test"}}, - {"duplicate", []string{"test", "test"}, []string{"test"}}, - {"multiple", []string{"a", "a", "b", "b", "b", "c"}, []string{"a", "b", "c"}}, - } - for _, test := range tests { - t.Run(test.Name, func(it *testing.T) { - v := unique(test.Test) - if v != nil { - sort.Strings(v) - } - if !reflect.DeepEqual(v, test.Want) { - it.Errorf("expected unique(%v) to return %v, got %v", test.Test, test.Want, v) - } - }) - } -} diff --git a/internal/sliceutil/unique.go b/internal/sliceutil/unique.go new file mode 100644 index 0000000..f533fbc --- /dev/null +++ b/internal/sliceutil/unique.go @@ -0,0 +1,16 @@ +package sliceutil + +func Unique[T comparable](values []T) []T { + if values == nil { + return nil + } + v := make(map[T]struct{}) + for _, s := range values { + v[s] = struct{}{} + } + o := make([]T, 0, len(v)) + for k := range v { + o = append(o, k) + } + return o +} diff --git a/internal/sliceutil/unique_test.go b/internal/sliceutil/unique_test.go new file mode 100644 index 0000000..6bd862b --- /dev/null +++ b/internal/sliceutil/unique_test.go @@ -0,0 +1,31 @@ +package sliceutil + +import ( + "reflect" + "sort" + "testing" +) + +func TestUnique(t *testing.T) { + tests := []struct { + Name string + Test []string + Want []string + }{ + {"nil", nil, nil}, + {"single", []string{"test"}, []string{"test"}}, + {"duplicate", []string{"test", "test"}, []string{"test"}}, + {"multiple", []string{"a", "a", "b", "b", "b", "c"}, []string{"a", "b", "c"}}, + } + for _, test := range tests { + t.Run(test.Name, func(it *testing.T) { + v := Unique(test.Test) + if v != nil { + sort.Strings(v) + } + if !reflect.DeepEqual(v, test.Want) { + it.Errorf("expected unique(%v) to return %v, got %v", test.Test, test.Want, v) + } + }) + } +}