Checkpoint

This commit is contained in:
2025-10-06 22:25:23 +02:00
parent a23259cfdc
commit a254b306f2
48 changed files with 3327 additions and 212 deletions

76
dataset/parser/parser.go Normal file
View File

@@ -0,0 +1,76 @@
package parser
import (
"bufio"
"bytes"
"errors"
"io"
"log"
"strings"
"github.com/miekg/dns"
)
var ErrNoParser = errors.New("no suitable parser could be found")
type Parser interface {
CanHandle(line string) bool
}
type DomainsParser interface {
Parser
ParseDomains(io.Reader) (domains []string, ignored int, err error)
}
var domainsParsers []DomainsParser
func RegisterDomainsParser(parser DomainsParser) {
domainsParsers = append(domainsParsers, parser)
}
func ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
var (
buffer = new(bytes.Buffer)
scanner = bufio.NewScanner(io.TeeReader(r, buffer))
line string
parser DomainsParser
)
for scanner.Scan() {
line = strings.TrimSpace(scanner.Text())
if isComment(line) {
continue
}
for _, parser = range domainsParsers {
if parser.CanHandle(line) {
log.Printf("using parser %T", parser)
return parser.ParseDomains(io.MultiReader(buffer, r))
}
}
break
}
return nil, 0, ErrNoParser
}
func isComment(line string) bool {
return line == "" || line[0] == '#' || line[0] == '!'
}
func isDomainName(name string) bool {
n, ok := dns.IsDomainName(name)
return n >= 2 && ok
}
func unique(strings []string) []string {
if strings == nil {
return nil
}
v := make(map[string]struct{})
for _, s := range strings {
v[s] = struct{}{}
}
o := make([]string, 0, len(v))
for k := range v {
o = append(o, k)
}
return o
}