Skip to content

Instantly share code, notes, and snippets.

@sehrgut
Last active June 5, 2021 01:06
Show Gist options
  • Save sehrgut/13c461034a8f6e7c8f51 to your computer and use it in GitHub Desktop.
Save sehrgut/13c461034a8f6e7c8f51 to your computer and use it in GitHub Desktop.
blocklistman.sh
#!/bin/bash
#
# blocklistman.sh
# Licensed under the GPL v3
#
# Downloads a list of peer blocklists in P2P gzipped format and combines them.
# It only eliminates exact duplicate IP ranges (ignoring range name), and doesn't
# handle overlapping ranges specially.
#
# Usage: Put it in a cron job and pipe stdout to a file your application knows about.
#
# todo: output file
# todo: configurable URL file
# todo: command-line datadir
## Configuration
DATADIR="${HOME}/.blocklistman"
## Internal Defaults
SELF=blocklistman
VERSION='v0.2a'
UA="Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 ${SELF}/${VERSION}"
CURLFLAG_SILENT='-s'
FLUSHCACHE=false
COMPRESSION=gz
NETWORK=true
CACHEDIR="${DATADIR}/cache"
function print_usage () {
cat >&2 <<EOF
Usage: $SELF [-hVf]
fetch and merge ip blocklists to stdout
-h|--help: print this usage statement
-V|--verbose: enable verbose logging to stderr
-v|--version: print the version string
-f|--flush: flush cache before attempting each retrieval
-F|--format: output format (gz, text) default: gz
--no-network: use cached files if they exist, but do not attempt retrieval
EOF
}
function strip_comments () {
grep -ve '^#' -e '^$'
}
function fetch () {
local url="$1"
local fn=$(printf "%s.gz" $(md5 <<< "$url"))
## original fetcher
#curl -sL -A "$UA" "$url" | gunzip
## wget output rename doesn't work in concert with timestamping
#wget --content-disposition \
# --timestamping \
# --user-agent="$UA" \
# "$url"
$FLUSHCACHE && rm "$fn"
$NETWORK && curl $CURLFLAG_SILENT -L -c -A "$UA" \
--retry 10 \
-z "$fn" \
-o "$fn" \
"$url"
[ -f "$fn" ] && gzcat "$fn"
}
function p2p_merge () {
strip_comments | grep -oE '[0-9\.-]+$' | sort -u
}
function format_output () {
case $COMPRESSION in
gz)
gzip ;;
*)
cat ;;
esac
}
while [ -n "$1" ]; do
case "$1" in
-u|--usage|-h|--help)
print_usage
true
exit
;;
-v|--version)
echo "$SELF $VERSION" >&2
true
exit
;;
-V|--verbose)
CURLFLAG_SILENT='' ;;
-f|--flush)
FLUSHCACHE=true ;;
-F|--format)
# todo: check for valid value
COMPRESSION="$2"
shift
;;
--no-network)
NETWORK=false ;;
*)
printf 'blocklistman: illegal option "%s%\n' "$1" >&2
print_usage
false
exit
;;
esac
shift
done
[ -d "$CACHEDIR" ] || mkdir -p "$CACHEDIR"
cd "$CACHEDIR"
(
strip_comments | while read url; do
printf 'Fetching "%s"...' "$url" >&2
fetch "$url"
printf '[Done]\n' >&2
done | p2p_merge | format_output
) <<EOF
#Bluetack: level1
http://list.iblocklist.com/?list=ydxerpxkpcfqjaybcssw&fileformat=p2p&archiveformat=gz
#Bluetack: level2
http://list.iblocklist.com/?list=gyisgnzbhppbvsphucsw&fileformat=p2p&archiveformat=gz
#Bluetack: level3
http://list.iblocklist.com/?list=uwnukjqktoggdknzrhgh&fileformat=p2p&archiveformat=gz
#TBG: Primary Threats
http://list.iblocklist.com/?list=ijfqtofzixtwayqovmxn&fileformat=p2p&archiveformat=gz
#TBG: General Corporate Ranges
http://list.iblocklist.com/?list=ecqbsykllnadihkdirsh&fileformat=p2p&archiveformat=gz
#TBG: Business ISPs
http://list.iblocklist.com/?list=jcjfaxgyyshvdbceroxf&fileformat=p2p&archiveformat=gz
#Bluetack: edu
http://list.iblocklist.com/?list=imlmncgrkbnacgcwfjvh&fileformat=p2p&archiveformat=gz
#Bluetack: bogon
http://list.iblocklist.com/?list=gihxqmhyunbxhbmgqrla&fileformat=p2p&archiveformat=gz
#Bluetack: ads
http://list.iblocklist.com/?list=dgxtneitpuvgqqcpfulq&fileformat=p2p&archiveformat=gz
#Bluetack: spyware
http://list.iblocklist.com/?list=llvtlsjyoyiczbkjsxpf&fileformat=p2p&archiveformat=gz
#Bluetack: proxy
http://list.iblocklist.com/?list=xoebmbyexwuiogmbyprb&fileformat=p2p&archiveformat=gz
#Bluetack: badpeers
http://list.iblocklist.com/?list=cwworuawihqvocglcoss&fileformat=p2p&archiveformat=gz
#Bluetack: Microsoft
http://list.iblocklist.com/?list=xshktygkujudfnjfioro&fileformat=p2p&archiveformat=gz
#TBG: Educational Institutions
http://list.iblocklist.com/?list=lljggjrpmefcwqknpalp&fileformat=p2p&archiveformat=gz
#TBG: Search Engines
http://list.iblocklist.com/?list=pfefqteoxlfzopecdtyw&fileformat=p2p&archiveformat=gz
#TBG: Hijacked
http://list.iblocklist.com/?list=tbnuqfclfkemqivekikv&fileformat=p2p&archiveformat=gz
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment