2023-12-09 14:21:28 -06:00

68 lines
1.7 KiB
Go

package main
import (
"regexp"
"sort"
)
func cleanBadDomains(domains []string) []string {
// remove duplicates
total := len(domains)
all := make(map[string]bool)
list := []string{}
for _, item := range domains {
if _, value := all[item]; !value {
all[item] = true
list = append(list, item)
}
}
domains = list
cfg.Log.Info("hosts removed from blocklist", "reason", "duplicate", "hosts", total-len(domains))
// remove hosts that are too long
total = len(domains)
list = []string{}
for _, blocklistItem := range domains {
if len([]rune(blocklistItem)) > 240 {
continue
}
list = append(list, blocklistItem)
}
domains = list
cfg.Log.Info("hosts removed from blocklist", "reason", "too many characters", "hosts", total-len(domains))
// remove allow-listed matches
total = len(domains)
// filter out bad regex
goodAllowedItemList := []string{}
for _, allowedItem := range cfg.ConfigFile.AllowLists {
_, err := regexp.Compile(allowedItem)
if err != nil {
cfg.Log.Error("unable to parse allow list item", "error", err, "regex", allowedItem)
continue
}
goodAllowedItemList = append(goodAllowedItemList, allowedItem)
}
list = []string{}
for _, v := range domains {
addEntry := true
for _, allowedItem := range goodAllowedItemList {
if regexp.MustCompile(allowedItem).MatchString(v) {
cfg.Log.Debug("hosts removed from blocklist", "reason", "allowed host", "match string", allowedItem, "host", v)
addEntry = false
}
}
if addEntry {
list = append(list, v)
}
}
domains = list
cfg.Log.Info("hosts removed from blocklist", "hosts", total-len(domains))
cfg.Log.Info("total domains in list", "hosts", len(domains))
sort.Strings(domains)
return domains
}