Skip to content

Instantly share code, notes, and snippets.

@curipha
Last active May 25, 2022 12:16
Show Gist options
  • Save curipha/26fd99381cf5c407b8fd1a5250557a4a to your computer and use it in GitHub Desktop.
Save curipha/26fd99381cf5c407b8fd1a5250557a4a to your computer and use it in GitHub Desktop.
Get merged AD server domain list
#!/usr/bin/env bash
set -o errexit
set -o nounset
set -o pipefail
SOURCELIST=(
"https://280blocker.net/files/280blocker_domain_$(date +%Y%m).txt"
'https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt'
'https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt'
'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=nohtml&showintro=0&mimetype=plaintext'
)
ADHOSTS=adhosts.txt
TEMPFILE=$(mktemp -p .)
[[ -f "${ADHOSTS}" ]] && mv "${ADHOSTS}" "${ADHOSTS}.old"
echo ':: Download Ad host lists'
for uri in "${SOURCELIST[@]}"
do
echo "${uri}"
curl -w "\n" -s "${uri}" | tr -d '\r' | sed -e 's/\xef\xbb\xbf//g' -e 's/[[:space:]]*#.*$//' -e '/^[[:space:]]*$/d' | tr '[:upper:]' '[:lower:]' >> "${TEMPFILE}"
done
sort "${TEMPFILE}" | uniq > "${ADHOSTS}"
cp "${ADHOSTS}" "${TEMPFILE}"
TOTAL=$(wc -l < "${ADHOSTS}")
INDEX=0
echo ':: Delete duplicated domains'
while read -r line
do
(( INDEX++ % 71 == 0 )) && echo -en "${INDEX} / ${TOTAL}\r" # Rate limit for progress updating :)
if grep -qE "\\.${line/./\\.}$" "${ADHOSTS}"
then
sed -i -e "/\\.${line/./\\.}$/d" "${TEMPFILE}"
fi
done < "${ADHOSTS}"
echo "${TOTAL} / ${TOTAL}"
mv "${TEMPFILE}" "${ADHOSTS}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment