70 lines
1.7 KiB
Go

package main
import (
"log"
"regexp"
"sort"
"time"
)
func cleanBadDomains(domains []string) []string {
defer timeTrack(time.Now(), "cleanBadDomains")
// remove duplicates
total := len(domains)
all := make(map[string]bool)
list := []string{}
for _, item := range domains {
if _, value := all[item]; !value {
all[item] = true
list = append(list, item)
}
}
domains = list
log.Printf("[INFO] Duplicate items removed: %d\n", total-len(domains))
// remove hosts that are too long
total = len(domains)
list = []string{}
for _, blocklistItem := range domains {
if len([]rune(blocklistItem)) > 250 {
continue
}
list = append(list, blocklistItem)
}
domains = list
log.Printf("[INFO] Hosts with too many characters removed: %d\n", total-len(domains))
// remove allow-listed matches
total = len(domains)
// filter out bad regex
goodAllowedItemList := []string{}
for _, allowedItem := range config.Config.AllowLists {
_, err := regexp.Compile(allowedItem)
if err != nil {
log.Printf("[ERROR] Allow list item (%s) is not valid regex: %v\n", allowedItem, err)
break
}
goodAllowedItemList = append(goodAllowedItemList, allowedItem)
}
list = []string{}
for k, v := range domains {
log.Printf("[DEBUG] Processing %d of %d (%0.2f%%)\n", k+1, len(domains), float64((k+1)/len(domains)))
for _, allowedItem := range goodAllowedItemList {
if regexp.MustCompile(allowedItem).MatchString(v) {
log.Printf("[DEBUG] Removing allowed matching item: %s\n", v)
} else {
list = append(list, v)
}
}
}
domains = list
log.Printf("[INFO] Allowed hosts removed: %d\n", total-len(domains))
log.Printf("[INFO] Total domains in list at end: %d.\n", len(domains))
sort.Strings(domains)
return domains
}