Checkpoint
This commit is contained in:
53
dataset/parser/adblock.go
Normal file
53
dataset/parser/adblock.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterDomainsParser(adblockDomainsParser{})
|
||||
}
|
||||
|
||||
type adblockDomainsParser struct{}
|
||||
|
||||
func (adblockDomainsParser) CanHandle(line string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(line), `[adblock`) ||
|
||||
strings.HasPrefix(line, "@@") || // exception
|
||||
strings.HasPrefix(line, "||") || // blah
|
||||
line[0] == '*'
|
||||
}
|
||||
|
||||
func (adblockDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Common AdBlock patterns:
|
||||
// ||domain.com^
|
||||
// |http://domain.com|
|
||||
// domain.com/path
|
||||
// *domain.com*
|
||||
switch {
|
||||
case strings.HasPrefix(line, `||`): // domain anchor
|
||||
if i := strings.IndexByte(line, '^'); i != -1 {
|
||||
domains = append(domains, line[2:i])
|
||||
continue
|
||||
}
|
||||
case strings.HasPrefix(line, `|`) && strings.HasSuffix(line, `|`):
|
||||
domains = append(domains, line[1:len(line)-2])
|
||||
continue
|
||||
case strings.HasPrefix(line, `[`):
|
||||
continue
|
||||
}
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
41
dataset/parser/adblock_test.go
Normal file
41
dataset/parser/adblock_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestAdBlockParser(t *testing.T) {
|
||||
test := `[Adblock Plus 2.0]
|
||||
! Title: AdRules DNS List
|
||||
! Homepage: https://github.com/Cats-Team/AdRules
|
||||
! Powerd by Cats-Team
|
||||
! Expires: 1 (update frequency)
|
||||
! Description: The DNS Filters
|
||||
! Total count: 145270
|
||||
! Update: 2025-10-07 02:05:08(GMT+8)
|
||||
/^.+stat\.kugou\.com/
|
||||
/^admarvel\./
|
||||
||*-ad-sign.byteimg.com^
|
||||
||*-ad.a.yximgs.com^
|
||||
||*-applog.fqnovel.com^
|
||||
||*-datareceiver.aki-game.net^
|
||||
||*.exaapi.com^`
|
||||
want := []string{"*-ad-sign.byteimg.com", "*-ad.a.yximgs.com", "*-applog.fqnovel.com", "*-datareceiver.aki-game.net", "*.exaapi.com"}
|
||||
|
||||
parsed, ignored, err := ParseDomains(strings.NewReader(test))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
sort.Strings(parsed)
|
||||
if !reflect.DeepEqual(parsed, want) {
|
||||
t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed)
|
||||
}
|
||||
if ignored != 2 {
|
||||
t.Errorf("expected 2 ignored, got %d", ignored)
|
||||
}
|
||||
}
|
139
dataset/parser/dns.go
Normal file
139
dataset/parser/dns.go
Normal file
@@ -0,0 +1,139 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterDomainsParser(dnsmasqDomainsParser{})
|
||||
RegisterDomainsParser(mosDNSDomainsParser{})
|
||||
RegisterDomainsParser(smartDNSDomainsParser{})
|
||||
RegisterDomainsParser(unboundDomainsParser{})
|
||||
}
|
||||
|
||||
type dnsmasqDomainsParser struct{}
|
||||
|
||||
func (dnsmasqDomainsParser) CanHandle(line string) bool {
|
||||
return strings.HasPrefix(line, "address=/")
|
||||
}
|
||||
|
||||
func (dnsmasqDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(line, "address=/"):
|
||||
part := strings.FieldsFunc(line, func(r rune) bool { return r == '/' })
|
||||
if len(part) >= 3 && isDomainName(part[1]) {
|
||||
domains = append(domains, part[1])
|
||||
continue
|
||||
}
|
||||
}
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
||||
|
||||
type mosDNSDomainsParser struct{}
|
||||
|
||||
func (mosDNSDomainsParser) CanHandle(line string) bool {
|
||||
if strings.HasPrefix(line, "domain:") {
|
||||
return isDomainName(line[7:])
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (mosDNSDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(line, "domain:") {
|
||||
domains = append(domains, line[7:])
|
||||
continue
|
||||
}
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
||||
|
||||
type smartDNSDomainsParser struct{}
|
||||
|
||||
func (smartDNSDomainsParser) CanHandle(line string) bool {
|
||||
return strings.HasPrefix(line, "address /")
|
||||
}
|
||||
|
||||
func (smartDNSDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(line, "address /") {
|
||||
if i := strings.IndexByte(line[9:], '/'); i > -1 {
|
||||
domains = append(domains, line[9:i+9])
|
||||
continue
|
||||
}
|
||||
}
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
||||
|
||||
type unboundDomainsParser struct{}
|
||||
|
||||
func (unboundDomainsParser) CanHandle(line string) bool {
|
||||
return strings.HasPrefix(line, "local-data:") ||
|
||||
strings.HasPrefix(line, "local-zone:")
|
||||
}
|
||||
|
||||
func (unboundDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
switch {
|
||||
case strings.HasPrefix(line, "local-data:"):
|
||||
record := strings.Trim(strings.TrimSpace(line[11:]), `"`)
|
||||
if rr, err := dns.NewRR(record); err == nil {
|
||||
switch rr.Header().Rrtype {
|
||||
case dns.TypeA, dns.TypeAAAA, dns.TypeCNAME:
|
||||
domains = append(domains, strings.Trim(rr.Header().Name, `.`))
|
||||
continue
|
||||
}
|
||||
}
|
||||
case strings.HasPrefix(line, "local-zone:") && strings.HasSuffix(line, " reject"):
|
||||
line = strings.Trim(strings.TrimSpace(line[11:]), `"`)
|
||||
if i := strings.IndexByte(line, '"'); i > -1 {
|
||||
domains = append(domains, line[:i])
|
||||
continue
|
||||
}
|
||||
}
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
106
dataset/parser/dns_test.go
Normal file
106
dataset/parser/dns_test.go
Normal file
@@ -0,0 +1,106 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDNSMasqParser(t *testing.T) {
|
||||
tests := []struct {
|
||||
Name string
|
||||
Test string
|
||||
Want []string
|
||||
WantIgnored int
|
||||
}{
|
||||
{
|
||||
"data",
|
||||
`
|
||||
local-data: "junk1.doubleclick.net A 127.0.0.1"
|
||||
local-data: "junk2.doubleclick.net A 127.0.0.1"
|
||||
local-data: "junk2.doubleclick.net CNAME doubleclick.net."
|
||||
local-data: "junk6.doubleclick.net AAAA ::1"
|
||||
local-data: "doubleclick.net A 127.0.0.1"
|
||||
local-data: "ad.junk1.doubleclick.net A 127.0.0.1"
|
||||
local-data: "adjunk.google.com A 127.0.0.1"`,
|
||||
[]string{"ad.junk1.doubleclick.net", "adjunk.google.com", "doubleclick.net", "junk1.doubleclick.net", "junk2.doubleclick.net", "junk6.doubleclick.net"},
|
||||
0,
|
||||
},
|
||||
{
|
||||
"zone",
|
||||
`
|
||||
local-zone: "doubleclick.net" reject
|
||||
local-zone: "adjunk.google.com" reject`,
|
||||
[]string{"adjunk.google.com", "doubleclick.net"},
|
||||
0,
|
||||
},
|
||||
{
|
||||
"address",
|
||||
`
|
||||
address=/ziyu.net/0.0.0.0
|
||||
address=/zlp6s.pw/0.0.0.0
|
||||
address=/zm232.com/0.0.0.0
|
||||
`,
|
||||
[]string{"ziyu.net", "zlp6s.pw", "zm232.com"},
|
||||
0,
|
||||
},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.Name, func(it *testing.T) {
|
||||
parsed, ignored, err := ParseDomains(strings.NewReader(test.Test))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
sort.Strings(parsed)
|
||||
if !reflect.DeepEqual(parsed, test.Want) {
|
||||
t.Errorf("expected ParseDomains(dnsmasq) to return\n\t%v, got\n\t%v", test.Want, parsed)
|
||||
}
|
||||
if ignored != test.WantIgnored {
|
||||
t.Errorf("expected %d ignored, got %d", test.WantIgnored, ignored)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestMOSDNSParser(t *testing.T) {
|
||||
test := `domain:0019x.com
|
||||
domain:002777.xyz
|
||||
domain:003store.com
|
||||
domain:00404850.xyz`
|
||||
want := []string{"0019x.com", "002777.xyz", "003store.com", "00404850.xyz"}
|
||||
|
||||
parsed, _, err := ParseDomains(strings.NewReader(test))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
sort.Strings(parsed)
|
||||
if !reflect.DeepEqual(parsed, want) {
|
||||
t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSmartDNSParser(t *testing.T) {
|
||||
test := `# Title:AdRules SmartDNS List
|
||||
# Update: 2025-10-07 02:05:08(GMT+8)
|
||||
address /0.myikas.com/#
|
||||
address /0.net.easyjet.com/#
|
||||
address /0.nextyourcontent.com/#
|
||||
address /0019x.com/#`
|
||||
want := []string{"0.myikas.com", "0.net.easyjet.com", "0.nextyourcontent.com", "0019x.com"}
|
||||
|
||||
parsed, _, err := ParseDomains(strings.NewReader(test))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
sort.Strings(parsed)
|
||||
if !reflect.DeepEqual(parsed, want) {
|
||||
t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed)
|
||||
}
|
||||
}
|
40
dataset/parser/domains.go
Normal file
40
dataset/parser/domains.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func init() {
|
||||
domainsParsers = append(domainsParsers, domainsParser{})
|
||||
}
|
||||
|
||||
type domainsParser struct{}
|
||||
|
||||
func (domainsParser) CanHandle(line string) bool {
|
||||
return isDomainName(line) &&
|
||||
!strings.ContainsRune(line, ' ') &&
|
||||
!strings.ContainsRune(line, ':') &&
|
||||
net.ParseIP(line) == nil
|
||||
}
|
||||
|
||||
func (domainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
if isDomainName(line) {
|
||||
domains = append(domains, line)
|
||||
continue
|
||||
}
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
31
dataset/parser/domains_test.go
Normal file
31
dataset/parser/domains_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseDomains(t *testing.T) {
|
||||
test := `# This is a comment
|
||||
facebook.com
|
||||
tiktok.com
|
||||
bogus ignored
|
||||
youtube.com`
|
||||
want := []string{"facebook.com", "tiktok.com", "youtube.com"}
|
||||
|
||||
parsed, ignored, err := ParseDomains(strings.NewReader(test))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
sort.Strings(parsed)
|
||||
if !reflect.DeepEqual(parsed, want) {
|
||||
t.Errorf("expected ParseDomains(domains) to return %v, got %v", want, parsed)
|
||||
}
|
||||
if ignored != 1 {
|
||||
t.Errorf("expected 1 ignored, got %d", ignored)
|
||||
}
|
||||
}
|
41
dataset/parser/hosts.go
Normal file
41
dataset/parser/hosts.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"net"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterDomainsParser(hostsParser{})
|
||||
}
|
||||
|
||||
type hostsParser struct{}
|
||||
|
||||
func (hostsParser) CanHandle(line string) bool {
|
||||
part := strings.Fields(line)
|
||||
return len(part) >= 2 && net.ParseIP(part[0]) != nil
|
||||
}
|
||||
|
||||
func (hostsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
|
||||
part := strings.Fields(line)
|
||||
if len(part) >= 2 && net.ParseIP(part[0]) != nil {
|
||||
domains = append(domains, part[1:]...)
|
||||
continue
|
||||
}
|
||||
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
38
dataset/parser/hosts_test.go
Normal file
38
dataset/parser/hosts_test.go
Normal file
@@ -0,0 +1,38 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseHosts(t *testing.T) {
|
||||
test := `##
|
||||
# Host Database
|
||||
#
|
||||
# localhost is used to configure the loopback interface
|
||||
# when the system is booting. Do not change this entry.
|
||||
##
|
||||
127.0.0.1 localhost dragon dragon.local dragon.maze.network
|
||||
255.255.255.255 broadcasthost
|
||||
::1 localhost
|
||||
ff00::1 multicast
|
||||
1.2.3.4
|
||||
`
|
||||
want := []string{"broadcasthost", "dragon", "dragon.local", "dragon.maze.network", "localhost", "multicast"}
|
||||
|
||||
parsed, ignored, err := ParseDomains(strings.NewReader(test))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
return
|
||||
}
|
||||
|
||||
sort.Strings(parsed)
|
||||
if !reflect.DeepEqual(parsed, want) {
|
||||
t.Errorf("expected ParseDomains(hosts) to return %v, got %v", want, parsed)
|
||||
}
|
||||
if ignored != 1 {
|
||||
t.Errorf("expected 1 ignored, got %d", ignored)
|
||||
}
|
||||
}
|
76
dataset/parser/parser.go
Normal file
76
dataset/parser/parser.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
var ErrNoParser = errors.New("no suitable parser could be found")
|
||||
|
||||
type Parser interface {
|
||||
CanHandle(line string) bool
|
||||
}
|
||||
|
||||
type DomainsParser interface {
|
||||
Parser
|
||||
ParseDomains(io.Reader) (domains []string, ignored int, err error)
|
||||
}
|
||||
|
||||
var domainsParsers []DomainsParser
|
||||
|
||||
func RegisterDomainsParser(parser DomainsParser) {
|
||||
domainsParsers = append(domainsParsers, parser)
|
||||
}
|
||||
|
||||
func ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
var (
|
||||
buffer = new(bytes.Buffer)
|
||||
scanner = bufio.NewScanner(io.TeeReader(r, buffer))
|
||||
line string
|
||||
parser DomainsParser
|
||||
)
|
||||
for scanner.Scan() {
|
||||
line = strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
for _, parser = range domainsParsers {
|
||||
if parser.CanHandle(line) {
|
||||
log.Printf("using parser %T", parser)
|
||||
return parser.ParseDomains(io.MultiReader(buffer, r))
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
return nil, 0, ErrNoParser
|
||||
}
|
||||
|
||||
func isComment(line string) bool {
|
||||
return line == "" || line[0] == '#' || line[0] == '!'
|
||||
}
|
||||
|
||||
func isDomainName(name string) bool {
|
||||
n, ok := dns.IsDomainName(name)
|
||||
return n >= 2 && ok
|
||||
}
|
||||
|
||||
func unique(strings []string) []string {
|
||||
if strings == nil {
|
||||
return nil
|
||||
}
|
||||
v := make(map[string]struct{})
|
||||
for _, s := range strings {
|
||||
v[s] = struct{}{}
|
||||
}
|
||||
o := make([]string, 0, len(v))
|
||||
for k := range v {
|
||||
o = append(o, k)
|
||||
}
|
||||
return o
|
||||
}
|
31
dataset/parser/parser_test.go
Normal file
31
dataset/parser/parser_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"sort"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestUnique(t *testing.T) {
|
||||
tests := []struct {
|
||||
Name string
|
||||
Test []string
|
||||
Want []string
|
||||
}{
|
||||
{"nil", nil, nil},
|
||||
{"single", []string{"test"}, []string{"test"}},
|
||||
{"duplicate", []string{"test", "test"}, []string{"test"}},
|
||||
{"multiple", []string{"a", "a", "b", "b", "b", "c"}, []string{"a", "b", "c"}},
|
||||
}
|
||||
for _, test := range tests {
|
||||
t.Run(test.Name, func(it *testing.T) {
|
||||
v := unique(test.Test)
|
||||
if v != nil {
|
||||
sort.Strings(v)
|
||||
}
|
||||
if !reflect.DeepEqual(v, test.Want) {
|
||||
it.Errorf("expected unique(%v) to return %v, got %v", test.Test, test.Want, v)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user