Files
styx/dataset/parser/adblock.go
2025-10-06 23:11:50 +02:00

56 lines
1.2 KiB
Go

package parser
import (
"bufio"
"io"
"strings"
"git.maze.io/maze/styx/internal/sliceutil"
)
func init() {
RegisterDomainsParser(adblockDomainsParser{})
}
type adblockDomainsParser struct{}
func (adblockDomainsParser) CanHandle(line string) bool {
return strings.HasPrefix(strings.ToLower(line), `[adblock`) ||
strings.HasPrefix(line, "@@") || // exception
strings.HasPrefix(line, "||") || // blah
line[0] == '*'
}
func (adblockDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if isComment(line) {
continue
}
// Common AdBlock patterns:
// ||domain.com^
// |http://domain.com|
// domain.com/path
// *domain.com*
switch {
case strings.HasPrefix(line, `||`): // domain anchor
if i := strings.IndexByte(line, '^'); i != -1 {
domains = append(domains, line[2:i])
continue
}
case strings.HasPrefix(line, `|`) && strings.HasSuffix(line, `|`):
domains = append(domains, line[1:len(line)-2])
continue
case strings.HasPrefix(line, `[`):
continue
}
ignored++
}
if err = scanner.Err(); err != nil {
return
}
return sliceutil.Unique(domains), ignored, nil
}