136 lines
2.9 KiB
Go

package main
import (
"log"
"sort"
"time"
"pihole-blocklist/v2/internal/httpclient"
)
func main() {
initialize()
// get remote URL data
badDomains := getListData()
// clean-up
config.NamedConfig.BadDomains = cleanBadDomains(badDomains)
buildBindResponsePolicyFile()
}
func getListData() []string {
var badDomains []string
listSimple := make(chan []string)
listComplex := make(chan []string)
log.Printf("[INFO] Downloading blocklists\n")
// Get Simple Blocklists
go func() {
data := getData(config.URLBlocklistsSimple)
domains := parseSimple(data)
listSimple <- domains
}()
// Get Host File Blocklists
go func() {
data := getData(config.URLBlocklistHostFiles)
domains := parseComplex(data)
listComplex <- domains
}()
// Wait for all downloads to finish
var (
simple, complex []string
simpleFinished, complexFinished bool
)
for {
select {
case simple = <-listSimple:
simpleFinished = true
log.Printf("[INFO] All simple lists have been retrieved.\n")
case complex = <-listComplex:
log.Printf("[INFO] All complex lists have been retrieved.\n")
complexFinished = true
default:
time.Sleep(time.Millisecond * 100)
}
if simpleFinished && complexFinished {
badDomains = append(badDomains, simple...)
badDomains = append(badDomains, complex...)
log.Printf("[INFO] Number of domains detected: %d\n", len(badDomains))
break
}
}
return badDomains
}
func getData(urls []string) []byte {
var listData []byte
for _, u := range urls {
log.Printf("[TRACE] Downloading URL: %s\n", u)
c := httpclient.DefaultClient()
data, err := c.Get(u)
if err != nil {
log.Printf("[ERROR] Unable to get remote content from URL (%s): %v", u, err)
}
listData = append(listData, data...)
}
return listData
}
func cleanBadDomains(domains []string) []string {
// remove duplicates
total := len(domains)
all := make(map[string]bool)
list := []string{}
for _, item := range domains {
if _, value := all[item]; !value {
all[item] = true
list = append(list, item)
}
}
domains = list
log.Printf("[INFO] Duplicate items removed: %d\n", total-len(domains))
// remove hosts that are too long
total = len(domains)
list = []string{}
for _, blocklistItem := range domains {
if len([]rune(blocklistItem)) > 255 {
continue
}
list = append(list, blocklistItem)
}
domains = list
log.Printf("[INFO] Hosts with too many characters removed: %d\n", total-len(domains))
// remove allow-listed matches
total = len(domains)
list = []string{}
for _, blocklistItem := range domains {
var match bool
for _, allowlistItem := range config.DomainAllowlist {
if allowlistItem.MatchString(blocklistItem) {
match = true
break
}
}
if !match {
list = append(list, blocklistItem)
}
}
domains = list
log.Printf("[INFO] Allowed hosts removed: %d\n", total-len(domains))
log.Printf("[INFO] Total domains in list at end: %d.\n", len(domains))
sort.Strings(domains)
return domains
}