Checkpoint

This commit is contained in:
2025-10-01 15:37:55 +02:00
parent 4a60059ff2
commit 03352e3312
31 changed files with 2611 additions and 384 deletions

276
dataset/domain_test.go Normal file
View File

@@ -0,0 +1,276 @@
package dataset
import (
"testing"
)
func TestDomainList(t *testing.T) {
tests := []struct {
name string
domains []string
hostname string
expected bool
}{
// Basic exact matches
{
name: "exact match",
domains: []string{"example.com"},
hostname: "example.com",
expected: true,
},
{
name: "exact match with subdomain in list",
domains: []string{"api.example.com"},
hostname: "api.example.com",
expected: true,
},
// Suffix matching - if domain is in list, all subdomains should match
{
name: "subdomain matches parent domain",
domains: []string{"example.com"},
hostname: "sub.example.com",
expected: true,
},
{
name: "multiple subdomain levels match",
domains: []string{"example.com"},
hostname: "deep.nested.sub.example.com",
expected: true,
},
{
name: "subdomain matches intermediate domain",
domains: []string{"api.example.com", "example.com"},
hostname: "sub.api.example.com",
expected: true,
},
// Multi-level TLDs
{
name: "co.uk domain exact match",
domains: []string{"domain.co.uk"},
hostname: "domain.co.uk",
expected: true,
},
{
name: "subdomain of co.uk domain",
domains: []string{"domain.co.uk"},
hostname: "sub.domain.co.uk",
expected: true,
},
// Case sensitivity
{
name: "case insensitive match",
domains: []string{"Example.COM"},
hostname: "example.com",
expected: true,
},
{
name: "case insensitive hostname",
domains: []string{"example.com"},
hostname: "EXAMPLE.COM",
expected: true,
},
// Trailing dots
{
name: "domain with trailing dot",
domains: []string{"example.com."},
hostname: "example.com",
expected: true,
},
{
name: "hostname with trailing dot",
domains: []string{"example.com"},
hostname: "example.com.",
expected: true,
},
// Non-matches
{
name: "different TLD",
domains: []string{"example.com"},
hostname: "example.org",
expected: false,
},
{
name: "different domain",
domains: []string{"example.com"},
hostname: "test.com",
expected: false,
},
{
name: "partial match but not suffix",
domains: []string{"example.com"},
hostname: "com",
expected: false,
},
{
name: "empty hostname",
domains: []string{"example.com"},
hostname: "",
expected: false,
},
// Multiple domains in list
{
name: "matches first domain in list",
domains: []string{"test.org", "example.com"},
hostname: "example.com",
expected: true,
},
{
name: "matches second domain in list",
domains: []string{"test.org", "example.com"},
hostname: "test.org",
expected: true,
},
{
name: "subdomain matches any domain in list",
domains: []string{"test.org", "example.com"},
hostname: "sub.example.com",
expected: true,
},
// Edge cases
{
name: "empty domain list",
domains: []string{},
hostname: "example.com",
expected: false,
},
{
name: "invalid domain in list",
domains: []string{""},
hostname: "example.com",
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
list := NewDomainList(tt.domains...)
result := list.Contains(tt.hostname)
if result != tt.expected {
t.Errorf("Contains(%q) = %v, expected %v (domains: %v)",
tt.hostname, result, tt.expected, tt.domains)
}
})
}
}
func TestDomainList_Performance(t *testing.T) {
// Test with a large number of domains to ensure performance
domains := make([]string, 1000)
for i := 0; i < 1000; i++ {
domains[i] = string(rune('a'+(i%26))) + ".com"
}
domains = append(domains, "example.com") // Add our test domain
list := NewDomainList(domains...)
// These should be fast even with many domains
if !list.Contains("example.com") {
t.Error("Should match exact domain")
}
if !list.Contains("sub.example.com") {
t.Error("Should match subdomain")
}
if list.Contains("notfound.com") {
t.Error("Should not match unrelated domain")
}
}
func TestDomainList_ComplexDomains(t *testing.T) {
domains := []string{
"very.long.domain.name.with.many.labels.com",
"example.co.uk",
"sub.domain.example.com",
"a.b.c.d.e.f.com",
}
list := NewDomainList(domains...)
tests := []struct {
hostname string
expected bool
}{
{"very.long.domain.name.with.many.labels.com", true},
{"sub.very.long.domain.name.with.many.labels.com", true},
{"example.co.uk", true},
{"www.example.co.uk", true},
{"sub.domain.example.com", true},
{"another.sub.domain.example.com", true},
{"a.b.c.d.e.f.com", true},
{"x.a.b.c.d.e.f.com", true},
{"not.matching.com", false},
{"com", false},
{"uk", false},
}
for _, tt := range tests {
t.Run(tt.hostname, func(t *testing.T) {
result := list.Contains(tt.hostname)
if result != tt.expected {
t.Errorf("Contains(%q) = %v, expected %v", tt.hostname, result, tt.expected)
}
})
}
}
func TestDomainList_SpecialCases(t *testing.T) {
t.Run("domain with asterisk treated literally", func(t *testing.T) {
list := NewDomainList("*.example.com")
// The asterisk should be treated as a literal label, not a wildcard
if !list.Contains("*.example.com") {
t.Error("Asterisk should be treated literally, not as wildcard")
}
if list.Contains("test.example.com") {
t.Error("Should not match subdomain with literal asterisk domain")
}
})
t.Run("domains with hyphens and numbers", func(t *testing.T) {
list := NewDomainList("test-123.example.com", "123abc.org")
if !list.Contains("test-123.example.com") {
t.Error("Should match domain with hyphens and numbers")
}
if !list.Contains("sub.test-123.example.com") {
t.Error("Should match subdomain of hyphenated domain")
}
if !list.Contains("123abc.org") {
t.Error("Should match domain starting with numbers")
}
if !list.Contains("www.123abc.org") {
t.Error("Should match subdomain of numeric domain")
}
})
}
func BenchmarkDomainList(b *testing.B) {
// Benchmark with realistic domain list
domains := []string{
"google.com",
"github.com",
"example.org",
"sub.domain.com",
"api.service.co.uk",
"very.long.domain.name.example.com",
}
list := NewDomainList(domains...)
b.ResetTimer()
for b.Loop() {
// Mix of matches and non-matches
list.Contains("sub.example.org")
list.Contains("api.github.com")
list.Contains("nonexistent.com")
list.Contains("deep.nested.sub.domain.com")
list.Contains("service.co.uk")
}
}