Skip to content

Instantly share code, notes, and snippets.

@tiberiosantos
Last active December 29, 2021 05:29
Show Gist options
  • Save tiberiosantos/3e4ab9f5dbfb31123fd0e80bff2cfcff to your computer and use it in GitHub Desktop.
Save tiberiosantos/3e4ab9f5dbfb31123fd0e80bff2cfcff to your computer and use it in GitHub Desktop.
Script to download and build a huge ADS blocklist for Pi-hole
#!/bin/bash
#
# Script to download and build a huge ADS blocklist for Pi-hole.
#
# Copyright (c) 2021 Tiberio A. Santos
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, version 3.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
adblocker_domains='https://blocklistproject.github.io/Lists/alt-version/abuse-nl.txt
https://blocklistproject.github.io/Lists/alt-version/ads-nl.txt
https://blocklistproject.github.io/Lists/alt-version/crypto-nl.txt
https://blocklistproject.github.io/Lists/alt-version/fraud-nl.txt
https://blocklistproject.github.io/Lists/alt-version/malware-nl.txt
https://blocklistproject.github.io/Lists/alt-version/phishing-nl.txt
https://blocklistproject.github.io/Lists/alt-version/ransomware-nl.txt
https://blocklistproject.github.io/Lists/alt-version/redirect-nl.txt
https://blocklistproject.github.io/Lists/alt-version/scam-nl.txt
https://blocklistproject.github.io/Lists/alt-version/tracking-nl.txt
https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-blocklist.txt
https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt
https://justdomains.github.io/blocklists/lists/adguarddns-justdomains.txt
https://justdomains.github.io/blocklists/lists/easylist-justdomains.txt
https://justdomains.github.io/blocklists/lists/easyprivacy-justdomains.txt
https://justdomains.github.io/blocklists/lists/nocoin-justdomains.txt
https://dbl.oisd.nl/
https://pgl.yoyo.org/as/serverlist.php?hostformat=nohtml&showintro=0&mimetype=plaintext
https://raw.githubusercontent.com/AdAway/adaway.github.io/master/hosts.txt
https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts
https://raw.githubusercontent.com/crazy-max/WindowsSpyBlocker/master/data/hosts/spy.txt
https://raw.githubusercontent.com/kboghdady/youTube_ads_4_pi-hole/master/youtubelist.txt
https://raw.githubusercontent.com/notracking/hosts-blocklists/master/hostnames.txt
https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt
https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt
https://someonewhocares.org/hosts/hosts
https://sysctl.org/cameleon/hosts
https://urlhaus.abuse.ch/downloads/hostfile/
https://winhelp2002.mvps.org/hosts.txt
https://www.hostsfile.org/Downloads/hosts.txt'
excludes='s.youtube.com'
cleaner="$(printf '/^%s/d;' $excludes)"
cleaner='s/\r//g; s/.*/\L&/; s/\s*#.*//g; /(\/).*$/d; s/^.*\s+//g; /([^\.]+\.)+[^\.]{2,}/!d;'${cleaner}''
downloader='agent="Mozilla/5.0 (X11; Linux x86_64; rv:85.0) Gecko/20100101 Firefox/85.0";
slug=$(echo "$@" | sed -e "s/[^[:alnum:]]/-/g" | tr -s "-" | tr A-Z a-z);
wget -q --user-agent "$agent" --spider "$@" &&
wget -q -N --user-agent "$agent" "$@" -P /tmp/adblocker/"$slug"'
printf '\e[31m ___ ____ __ __
/ | ____/ / /_ / /___ _____/ /_____ _____
/ /| |/ __ / __ \/ / __ \/ ___/ //_/ _ \/ ___/
/ ___ / /_/ / /_/ / / /_/ / /__/ ,< / __/ /
/_/ |_\__,_/_.___/_/\____/\___/_/|_|\___/_/
__ __ __ __
/ / / /___ ____/ /___ _/ /____ _____
/ / / / __ \/ __ / __ `/ __/ _ \/ ___/
/ /_/ / /_/ / /_/ / /_/ / /_/ __/ /
\____/ .___/\__,_/\__,_/\__/\___/_/
/_/\e[0m\n\n'
printf "[\e[33m*\e[0m] Downloading domain lists in parallel ... "
echo $adblocker_domains | xargs -d ' ' -n 1 -P $[$(nproc)*4] bash -c "$downloader" _
printf "\e[32mOK.\e[0m\n"
printf "[\e[33m*\e[0m] Building domain list ... "
cat /tmp/adblocker/*/* | sed -r "$cleaner" | sort -u -o hosts.txt
printf "\e[32mOK.\e[0m\n"
printf "The builded hosts.txt has \e[1;33m$(cat hosts.txt | wc -l)\e[0m domains.\n"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment