Checkpoint
This commit is contained in:
99
dataset/domain.go
Normal file
99
dataset/domain.go
Normal file
@@ -0,0 +1,99 @@
|
||||
package dataset
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/miekg/dns"
|
||||
)
|
||||
|
||||
type DomainTree struct {
|
||||
root *domainTreeNode
|
||||
}
|
||||
|
||||
type domainTreeNode struct {
|
||||
leaf map[string]*domainTreeNode
|
||||
isEnd bool
|
||||
}
|
||||
|
||||
func NewDomainList(domains ...string) *DomainTree {
|
||||
tree := &DomainTree{
|
||||
root: &domainTreeNode{leaf: make(map[string]*domainTreeNode)},
|
||||
}
|
||||
for _, domain := range domains {
|
||||
tree.Add(domain)
|
||||
}
|
||||
return tree
|
||||
}
|
||||
|
||||
func (tree *DomainTree) Add(domain string) {
|
||||
domain = normalizeDomain(domain)
|
||||
if domain == "" {
|
||||
return
|
||||
}
|
||||
|
||||
labels := dns.SplitDomainName(domain)
|
||||
if len(labels) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
node := tree.root
|
||||
for i := len(labels) - 1; i >= 0; i-- {
|
||||
label := labels[i]
|
||||
if label == "" {
|
||||
continue
|
||||
}
|
||||
if node.leaf == nil {
|
||||
node.leaf = make(map[string]*domainTreeNode)
|
||||
}
|
||||
if node.leaf[label] == nil {
|
||||
node.leaf[label] = &domainTreeNode{}
|
||||
}
|
||||
node = node.leaf[label]
|
||||
}
|
||||
node.isEnd = true
|
||||
}
|
||||
|
||||
func (tree *DomainTree) Contains(domain string) bool {
|
||||
domain = normalizeDomain(domain)
|
||||
if domain == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
labels := dns.SplitDomainName(domain)
|
||||
if len(labels) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
node := tree.root
|
||||
for i := len(labels) - 1; i >= 0; i-- {
|
||||
if node.isEnd {
|
||||
return true
|
||||
}
|
||||
|
||||
if node.leaf == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
label := labels[i]
|
||||
if node = node.leaf[label]; node == nil {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return node.isEnd
|
||||
}
|
||||
|
||||
func normalizeDomain(domain string) string {
|
||||
domain = strings.ToLower(strings.TrimSpace(domain))
|
||||
if domain == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Remove trailing dot if present, dns.Fqdn will add it back properly
|
||||
domain = strings.TrimSuffix(domain, ".")
|
||||
|
||||
if domain == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
return dns.Fqdn(domain)
|
||||
}
|
5
dataset/domain_data.go
Normal file
5
dataset/domain_data.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package dataset
|
||||
|
||||
var Domains = map[string]*DomainTree{
|
||||
"example": NewDomainList("example.org", "example.net", "example.com"),
|
||||
}
|
276
dataset/domain_test.go
Normal file
276
dataset/domain_test.go
Normal file
@@ -0,0 +1,276 @@
|
||||
package dataset
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDomainList(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
domains []string
|
||||
hostname string
|
||||
expected bool
|
||||
}{
|
||||
// Basic exact matches
|
||||
{
|
||||
name: "exact match",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "example.com",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "exact match with subdomain in list",
|
||||
domains: []string{"api.example.com"},
|
||||
hostname: "api.example.com",
|
||||
expected: true,
|
||||
},
|
||||
|
||||
// Suffix matching - if domain is in list, all subdomains should match
|
||||
{
|
||||
name: "subdomain matches parent domain",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "sub.example.com",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "multiple subdomain levels match",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "deep.nested.sub.example.com",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "subdomain matches intermediate domain",
|
||||
domains: []string{"api.example.com", "example.com"},
|
||||
hostname: "sub.api.example.com",
|
||||
expected: true,
|
||||
},
|
||||
|
||||
// Multi-level TLDs
|
||||
{
|
||||
name: "co.uk domain exact match",
|
||||
domains: []string{"domain.co.uk"},
|
||||
hostname: "domain.co.uk",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "subdomain of co.uk domain",
|
||||
domains: []string{"domain.co.uk"},
|
||||
hostname: "sub.domain.co.uk",
|
||||
expected: true,
|
||||
},
|
||||
|
||||
// Case sensitivity
|
||||
{
|
||||
name: "case insensitive match",
|
||||
domains: []string{"Example.COM"},
|
||||
hostname: "example.com",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "case insensitive hostname",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "EXAMPLE.COM",
|
||||
expected: true,
|
||||
},
|
||||
|
||||
// Trailing dots
|
||||
{
|
||||
name: "domain with trailing dot",
|
||||
domains: []string{"example.com."},
|
||||
hostname: "example.com",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "hostname with trailing dot",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "example.com.",
|
||||
expected: true,
|
||||
},
|
||||
|
||||
// Non-matches
|
||||
{
|
||||
name: "different TLD",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "example.org",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "different domain",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "test.com",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "partial match but not suffix",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "com",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "empty hostname",
|
||||
domains: []string{"example.com"},
|
||||
hostname: "",
|
||||
expected: false,
|
||||
},
|
||||
|
||||
// Multiple domains in list
|
||||
{
|
||||
name: "matches first domain in list",
|
||||
domains: []string{"test.org", "example.com"},
|
||||
hostname: "example.com",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "matches second domain in list",
|
||||
domains: []string{"test.org", "example.com"},
|
||||
hostname: "test.org",
|
||||
expected: true,
|
||||
},
|
||||
{
|
||||
name: "subdomain matches any domain in list",
|
||||
domains: []string{"test.org", "example.com"},
|
||||
hostname: "sub.example.com",
|
||||
expected: true,
|
||||
},
|
||||
|
||||
// Edge cases
|
||||
{
|
||||
name: "empty domain list",
|
||||
domains: []string{},
|
||||
hostname: "example.com",
|
||||
expected: false,
|
||||
},
|
||||
{
|
||||
name: "invalid domain in list",
|
||||
domains: []string{""},
|
||||
hostname: "example.com",
|
||||
expected: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
list := NewDomainList(tt.domains...)
|
||||
result := list.Contains(tt.hostname)
|
||||
|
||||
if result != tt.expected {
|
||||
t.Errorf("Contains(%q) = %v, expected %v (domains: %v)",
|
||||
tt.hostname, result, tt.expected, tt.domains)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDomainList_Performance(t *testing.T) {
|
||||
// Test with a large number of domains to ensure performance
|
||||
domains := make([]string, 1000)
|
||||
for i := 0; i < 1000; i++ {
|
||||
domains[i] = string(rune('a'+(i%26))) + ".com"
|
||||
}
|
||||
domains = append(domains, "example.com") // Add our test domain
|
||||
|
||||
list := NewDomainList(domains...)
|
||||
|
||||
// These should be fast even with many domains
|
||||
if !list.Contains("example.com") {
|
||||
t.Error("Should match exact domain")
|
||||
}
|
||||
if !list.Contains("sub.example.com") {
|
||||
t.Error("Should match subdomain")
|
||||
}
|
||||
if list.Contains("notfound.com") {
|
||||
t.Error("Should not match unrelated domain")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDomainList_ComplexDomains(t *testing.T) {
|
||||
domains := []string{
|
||||
"very.long.domain.name.with.many.labels.com",
|
||||
"example.co.uk",
|
||||
"sub.domain.example.com",
|
||||
"a.b.c.d.e.f.com",
|
||||
}
|
||||
|
||||
list := NewDomainList(domains...)
|
||||
|
||||
tests := []struct {
|
||||
hostname string
|
||||
expected bool
|
||||
}{
|
||||
{"very.long.domain.name.with.many.labels.com", true},
|
||||
{"sub.very.long.domain.name.with.many.labels.com", true},
|
||||
{"example.co.uk", true},
|
||||
{"www.example.co.uk", true},
|
||||
{"sub.domain.example.com", true},
|
||||
{"another.sub.domain.example.com", true},
|
||||
{"a.b.c.d.e.f.com", true},
|
||||
{"x.a.b.c.d.e.f.com", true},
|
||||
{"not.matching.com", false},
|
||||
{"com", false},
|
||||
{"uk", false},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.hostname, func(t *testing.T) {
|
||||
result := list.Contains(tt.hostname)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Contains(%q) = %v, expected %v", tt.hostname, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDomainList_SpecialCases(t *testing.T) {
|
||||
t.Run("domain with asterisk treated literally", func(t *testing.T) {
|
||||
list := NewDomainList("*.example.com")
|
||||
|
||||
// The asterisk should be treated as a literal label, not a wildcard
|
||||
if !list.Contains("*.example.com") {
|
||||
t.Error("Asterisk should be treated literally, not as wildcard")
|
||||
}
|
||||
if list.Contains("test.example.com") {
|
||||
t.Error("Should not match subdomain with literal asterisk domain")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("domains with hyphens and numbers", func(t *testing.T) {
|
||||
list := NewDomainList("test-123.example.com", "123abc.org")
|
||||
|
||||
if !list.Contains("test-123.example.com") {
|
||||
t.Error("Should match domain with hyphens and numbers")
|
||||
}
|
||||
if !list.Contains("sub.test-123.example.com") {
|
||||
t.Error("Should match subdomain of hyphenated domain")
|
||||
}
|
||||
if !list.Contains("123abc.org") {
|
||||
t.Error("Should match domain starting with numbers")
|
||||
}
|
||||
if !list.Contains("www.123abc.org") {
|
||||
t.Error("Should match subdomain of numeric domain")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func BenchmarkDomainList(b *testing.B) {
|
||||
// Benchmark with realistic domain list
|
||||
domains := []string{
|
||||
"google.com",
|
||||
"github.com",
|
||||
"example.org",
|
||||
"sub.domain.com",
|
||||
"api.service.co.uk",
|
||||
"very.long.domain.name.example.com",
|
||||
}
|
||||
|
||||
list := NewDomainList(domains...)
|
||||
|
||||
b.ResetTimer()
|
||||
for b.Loop() {
|
||||
// Mix of matches and non-matches
|
||||
list.Contains("sub.example.org")
|
||||
list.Contains("api.github.com")
|
||||
list.Contains("nonexistent.com")
|
||||
list.Contains("deep.nested.sub.domain.com")
|
||||
list.Contains("service.co.uk")
|
||||
}
|
||||
}
|
52
dataset/network.go
Normal file
52
dataset/network.go
Normal file
@@ -0,0 +1,52 @@
|
||||
package dataset
|
||||
|
||||
import (
|
||||
"net"
|
||||
|
||||
"github.com/yl2chen/cidranger"
|
||||
)
|
||||
|
||||
type NetworkTree struct {
|
||||
ranger cidranger.Ranger
|
||||
}
|
||||
|
||||
func MustNetworkTree(networks ...string) *NetworkTree {
|
||||
tree, err := NewNetworkTree(networks...)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return tree
|
||||
}
|
||||
|
||||
func NewNetworkTree(networks ...string) (*NetworkTree, error) {
|
||||
tree := &NetworkTree{
|
||||
ranger: cidranger.NewPCTrieRanger(),
|
||||
}
|
||||
for _, cidr := range networks {
|
||||
if err := tree.AddCIDR(cidr); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return tree, nil
|
||||
}
|
||||
|
||||
func (tree *NetworkTree) Add(ipnet *net.IPNet) {
|
||||
if ipnet == nil {
|
||||
return
|
||||
}
|
||||
tree.ranger.Insert(cidranger.NewBasicRangerEntry(*ipnet))
|
||||
}
|
||||
|
||||
func (tree *NetworkTree) AddCIDR(cidr string) error {
|
||||
_, ipnet, err := net.ParseCIDR(cidr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
tree.ranger.Insert(cidranger.NewBasicRangerEntry(*ipnet))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tree *NetworkTree) Contains(ip net.IP) bool {
|
||||
contains, _ := tree.ranger.Contains(ip)
|
||||
return contains
|
||||
}
|
71
dataset/network_data.go
Normal file
71
dataset/network_data.go
Normal file
@@ -0,0 +1,71 @@
|
||||
package dataset
|
||||
|
||||
var (
|
||||
bogonsIPv4 = []string{
|
||||
"9.0.0.0/8", // "This" network
|
||||
"10.0.0.0/8", // RFC1918 Private-use networks
|
||||
"100.64.0.0/10", // Carrier-grade NAT
|
||||
"127.0.0.0/8", // Loopback
|
||||
"169.254.0.0/16", // Link local
|
||||
"172.16.0.0/12", // RFC1918 Private-use networks
|
||||
"192.0.0.0/24", // IETF protocol assignments
|
||||
"192.0.2.0/24", // TEST-NET-1
|
||||
"192.168.0.0/16", // RFC1918 Private-use networks
|
||||
"198.18.0.0/15", // Network interconnect device benchmark testing
|
||||
"198.51.100.0/24", // TEST-NET-2
|
||||
"203.0.113.0/24", // TEST-NET-3
|
||||
"224.0.0.0/4", // Multicast
|
||||
"240.0.0.0/4", // Reserved for future use
|
||||
"255.255.255.255/32", // Limited broadcast
|
||||
}
|
||||
bogonsIPv6 = []string{
|
||||
"::/128", // Node-scope unicast unspecified address
|
||||
"::1/128", // Node-scope unicast loopback address
|
||||
"::ffff:0:0/96", // IPv4-mapped addresses
|
||||
"::/96", // IPv4-compatible addresses
|
||||
"100::/64", // Remotely triggered black hole addresses
|
||||
"2001:10::/28", // Overlay routable cryptographic hash identifiers (ORCHID)
|
||||
"2001:db8::/32", // Documentation prefix
|
||||
"3fff::/20", // Documentation prefix
|
||||
"fc00::/7", // Unique local addresses (ULA)
|
||||
"fe80::/10", // Link-local unicast
|
||||
"fec0::/10", // Site-local unicast (deprecated)
|
||||
"ff00::/8", // Multicast (Note: ff0e:/16 is global scope and may appear on the global internet.)
|
||||
"2002::/24", // 6to4 bogon (0.0.0.0/8)
|
||||
"2002:a00::/24", // 6to4 bogon (10.0.0.0/8)
|
||||
"2002:7f00::/24", // 6to4 bogon (127.0.0.0/8)
|
||||
"2002:a9fe::/32", // 6to4 bogon (169.254.0.0/16)
|
||||
"2002:ac10::/28", // 6to4 bogon (172.16.0.0/12)
|
||||
"2002:c000::/40", // 6to4 bogon (192.0.0.0/24)
|
||||
"2002:c000:200::/40", // 6to4 bogon (192.0.2.0/24)
|
||||
"2002:c0a8::/32", // 6to4 bogon (192.168.0.0/16)
|
||||
"2002:c612::/31", // 6to4 bogon (198.18.0.0/15)
|
||||
"2002:c633:6400::/40", // 6to4 bogon (198.51.100.0/24)
|
||||
"2002:cb00:7100::/40", // 6to4 bogon (203.0.113.0/24)
|
||||
"2002:e000::/20", // 6to4 bogon (224.0.0.0/4)
|
||||
"2002:f000::/20", // 6to4 bogon (240.0.0.0/4)
|
||||
"2002:ffff:ffff::/48", // 6to4 bogon (255.255.255.255/32)
|
||||
"2001::/40", // Teredo bogon (0.0.0.0/8)
|
||||
"2001:0:a00::/40", // Teredo bogon (10.0.0.0/8)
|
||||
"2001:0:7f00::/40", // Teredo bogon (127.0.0.0/8)
|
||||
"2001:0:a9fe::/48", // Teredo bogon (169.254.0.0/16)
|
||||
"2001:0:ac10::/44", // Teredo bogon (172.16.0.0/12)
|
||||
"2001:0:c000::/56", // Teredo bogon (192.0.0.0/24)
|
||||
"2001:0:c000:200::/56", // Teredo bogon (192.0.2.0/24)
|
||||
"2001:0:c0a8::/48", // Teredo bogon (192.168.0.0/16)
|
||||
"2001:0:c612::/47", // Teredo bogon (198.18.0.0/15)
|
||||
"2001:0:c633:6400::/56", // Teredo bogon (198.51.100.0/24)
|
||||
"2001:0:cb00:7100::/56", // Teredo bogon (203.0.113.0/24)
|
||||
"2001:0:e000::/36", // Teredo bogon (224.0.0.0/4)
|
||||
"2001:0:f000::/36", // Teredo bogon (240.0.0.0/4)
|
||||
"2001:0:ffff:ffff::/64", // Teredo bogon (255.255.255.255/32)
|
||||
}
|
||||
bogons = append(bogonsIPv4, bogonsIPv6...)
|
||||
)
|
||||
|
||||
// Networks contains predefined network lists.
|
||||
var Networks = map[string]*NetworkTree{
|
||||
"bogons": MustNetworkTree(bogons...),
|
||||
"boeong4": MustNetworkTree(bogonsIPv4...),
|
||||
"bogons6": MustNetworkTree(bogonsIPv6...),
|
||||
}
|
Reference in New Issue
Block a user