Last active
June 5, 2021 01:06
-
-
Save sehrgut/13c461034a8f6e7c8f51 to your computer and use it in GitHub Desktop.
blocklistman.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# blocklistman.sh | |
# Licensed under the GPL v3 | |
# | |
# Downloads a list of peer blocklists in P2P gzipped format and combines them. | |
# It only eliminates exact duplicate IP ranges (ignoring range name), and doesn't | |
# handle overlapping ranges specially. | |
# | |
# Usage: Put it in a cron job and pipe stdout to a file your application knows about. | |
# | |
# todo: output file | |
# todo: configurable URL file | |
# todo: command-line datadir | |
## Configuration | |
DATADIR="${HOME}/.blocklistman" | |
## Internal Defaults | |
SELF=blocklistman | |
VERSION='v0.2a' | |
UA="Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36 ${SELF}/${VERSION}" | |
CURLFLAG_SILENT='-s' | |
FLUSHCACHE=false | |
COMPRESSION=gz | |
NETWORK=true | |
CACHEDIR="${DATADIR}/cache" | |
function print_usage () { | |
cat >&2 <<EOF | |
Usage: $SELF [-hVf] | |
fetch and merge ip blocklists to stdout | |
-h|--help: print this usage statement | |
-V|--verbose: enable verbose logging to stderr | |
-v|--version: print the version string | |
-f|--flush: flush cache before attempting each retrieval | |
-F|--format: output format (gz, text) default: gz | |
--no-network: use cached files if they exist, but do not attempt retrieval | |
EOF | |
} | |
function strip_comments () { | |
grep -ve '^#' -e '^$' | |
} | |
function fetch () { | |
local url="$1" | |
local fn=$(printf "%s.gz" $(md5 <<< "$url")) | |
## original fetcher | |
#curl -sL -A "$UA" "$url" | gunzip | |
## wget output rename doesn't work in concert with timestamping | |
#wget --content-disposition \ | |
# --timestamping \ | |
# --user-agent="$UA" \ | |
# "$url" | |
$FLUSHCACHE && rm "$fn" | |
$NETWORK && curl $CURLFLAG_SILENT -L -c -A "$UA" \ | |
--retry 10 \ | |
-z "$fn" \ | |
-o "$fn" \ | |
"$url" | |
[ -f "$fn" ] && gzcat "$fn" | |
} | |
function p2p_merge () { | |
strip_comments | grep -oE '[0-9\.-]+$' | sort -u | |
} | |
function format_output () { | |
case $COMPRESSION in | |
gz) | |
gzip ;; | |
*) | |
cat ;; | |
esac | |
} | |
while [ -n "$1" ]; do | |
case "$1" in | |
-u|--usage|-h|--help) | |
print_usage | |
true | |
exit | |
;; | |
-v|--version) | |
echo "$SELF $VERSION" >&2 | |
true | |
exit | |
;; | |
-V|--verbose) | |
CURLFLAG_SILENT='' ;; | |
-f|--flush) | |
FLUSHCACHE=true ;; | |
-F|--format) | |
# todo: check for valid value | |
COMPRESSION="$2" | |
shift | |
;; | |
--no-network) | |
NETWORK=false ;; | |
*) | |
printf 'blocklistman: illegal option "%s%\n' "$1" >&2 | |
print_usage | |
false | |
exit | |
;; | |
esac | |
shift | |
done | |
[ -d "$CACHEDIR" ] || mkdir -p "$CACHEDIR" | |
cd "$CACHEDIR" | |
( | |
strip_comments | while read url; do | |
printf 'Fetching "%s"...' "$url" >&2 | |
fetch "$url" | |
printf '[Done]\n' >&2 | |
done | p2p_merge | format_output | |
) <<EOF | |
#Bluetack: level1 | |
http://list.iblocklist.com/?list=ydxerpxkpcfqjaybcssw&fileformat=p2p&archiveformat=gz | |
#Bluetack: level2 | |
http://list.iblocklist.com/?list=gyisgnzbhppbvsphucsw&fileformat=p2p&archiveformat=gz | |
#Bluetack: level3 | |
http://list.iblocklist.com/?list=uwnukjqktoggdknzrhgh&fileformat=p2p&archiveformat=gz | |
#TBG: Primary Threats | |
http://list.iblocklist.com/?list=ijfqtofzixtwayqovmxn&fileformat=p2p&archiveformat=gz | |
#TBG: General Corporate Ranges | |
http://list.iblocklist.com/?list=ecqbsykllnadihkdirsh&fileformat=p2p&archiveformat=gz | |
#TBG: Business ISPs | |
http://list.iblocklist.com/?list=jcjfaxgyyshvdbceroxf&fileformat=p2p&archiveformat=gz | |
#Bluetack: edu | |
http://list.iblocklist.com/?list=imlmncgrkbnacgcwfjvh&fileformat=p2p&archiveformat=gz | |
#Bluetack: bogon | |
http://list.iblocklist.com/?list=gihxqmhyunbxhbmgqrla&fileformat=p2p&archiveformat=gz | |
#Bluetack: ads | |
http://list.iblocklist.com/?list=dgxtneitpuvgqqcpfulq&fileformat=p2p&archiveformat=gz | |
#Bluetack: spyware | |
http://list.iblocklist.com/?list=llvtlsjyoyiczbkjsxpf&fileformat=p2p&archiveformat=gz | |
#Bluetack: proxy | |
http://list.iblocklist.com/?list=xoebmbyexwuiogmbyprb&fileformat=p2p&archiveformat=gz | |
#Bluetack: badpeers | |
http://list.iblocklist.com/?list=cwworuawihqvocglcoss&fileformat=p2p&archiveformat=gz | |
#Bluetack: Microsoft | |
http://list.iblocklist.com/?list=xshktygkujudfnjfioro&fileformat=p2p&archiveformat=gz | |
#TBG: Educational Institutions | |
http://list.iblocklist.com/?list=lljggjrpmefcwqknpalp&fileformat=p2p&archiveformat=gz | |
#TBG: Search Engines | |
http://list.iblocklist.com/?list=pfefqteoxlfzopecdtyw&fileformat=p2p&archiveformat=gz | |
#TBG: Hijacked | |
http://list.iblocklist.com/?list=tbnuqfclfkemqivekikv&fileformat=p2p&archiveformat=gz | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment