Checkpoint
This commit is contained in:
53
dataset/parser/adblock.go
Normal file
53
dataset/parser/adblock.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func init() {
|
||||
RegisterDomainsParser(adblockDomainsParser{})
|
||||
}
|
||||
|
||||
type adblockDomainsParser struct{}
|
||||
|
||||
func (adblockDomainsParser) CanHandle(line string) bool {
|
||||
return strings.HasPrefix(strings.ToLower(line), `[adblock`) ||
|
||||
strings.HasPrefix(line, "@@") || // exception
|
||||
strings.HasPrefix(line, "||") || // blah
|
||||
line[0] == '*'
|
||||
}
|
||||
|
||||
func (adblockDomainsParser) ParseDomains(r io.Reader) (domains []string, ignored int, err error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
for scanner.Scan() {
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
if isComment(line) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Common AdBlock patterns:
|
||||
// ||domain.com^
|
||||
// |http://domain.com|
|
||||
// domain.com/path
|
||||
// *domain.com*
|
||||
switch {
|
||||
case strings.HasPrefix(line, `||`): // domain anchor
|
||||
if i := strings.IndexByte(line, '^'); i != -1 {
|
||||
domains = append(domains, line[2:i])
|
||||
continue
|
||||
}
|
||||
case strings.HasPrefix(line, `|`) && strings.HasSuffix(line, `|`):
|
||||
domains = append(domains, line[1:len(line)-2])
|
||||
continue
|
||||
case strings.HasPrefix(line, `[`):
|
||||
continue
|
||||
}
|
||||
ignored++
|
||||
}
|
||||
if err = scanner.Err(); err != nil {
|
||||
return
|
||||
}
|
||||
return unique(domains), ignored, nil
|
||||
}
|
Reference in New Issue
Block a user