Skip to content

Instantly share code, notes, and snippets.

@ngadmini
Last active June 5, 2023 08:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ngadmini/550a8fdb02cdf3b7834c47a0f347ee3f to your computer and use it in GitHub Desktop.
Save ngadmini/550a8fdb02cdf3b7834c47a0f347ee3f to your computer and use it in GitHub Desktop.
intended to reduce porn-domains as listed in https://trustpositif.kominfo.go.id/assets/db/domains_isp
#!/usr/bin/env bash
# TAGS
# untrust.sh v3.0
# https://gist.githubusercontent.com/ngadmini/49161c90fa82c4e0a2bf25d2a6709077/raw
# AUTHOR
# ngadimin@warnet-ersa.net
# https://github.com/ngadmini
# TL;DR
# intended to reducing adult domains, explicit adult-gambling domains, upperCase, non-ASCII, invalid-TLDs
# and domains ended with port number in the blacklisted domains as listed in : "$uri_1"
# ouput :
# - 'db_trust+adult_taken.txt' is adult domains taken from "$uri_1"
# - 'db_trust+adult_reduc.txt' is domains left over from "$uri_1" after reduction
# usage :
# ~ curl -sL https://gist.githubusercontent.com/ngadmini/49161c90fa82c4e0a2bf25d2a6709077/raw | bash
#
set -e
export LC_ALL=C
PATH=/usr/local/bin:/usr/bin:/bin:${PATH}
_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
#_lct=$(date +%a\ %Y-%m-%d\ %T\ %Z)
#_sed="after reducing from: adult domains, invalid TLDs, explicite adult_gambling domains, UpperCase and non-ASCII"
uri_1="https://trustpositif.kominfo.go.id/assets/db/domains_isp"
uri_2="https://gist.githubusercontent.com/ngadmini/e93677cfb451583641df87739738c15c/raw"
#uri_3="https://gist.github.com/ngadmini/49161c90fa82c4e0a2bf25d2a6709077"
adult=$(mktemp -p "${_DIR}")
trust=$(mktemp -p "${_DIR}")
regex=$(mktemp -p "${_DIR}")
out_1="db_trust+adult_taken.txt"
out_2="db_trust+adult_reduc.txt"
f_reduce() {
printf "[x] separating adult domains from '%s' ...\n" "${uri_1##*/}"
sort "${trust}" "${adult}" | uniq -d > "${out_1}"
grep -E "${ar_reg[1]}" "${trust}" >> "${out_1}"
sort -u "${out_1}" -o "${out_1}"
:<<'comment'
sed -i "1i # adult domains taken from\: \'${uri_1}'" "${out_1}"
sed -i "2i # generated at ${_lct} by: \'${uri_3}\'\n#\n" "${out_1}"
sed -i "5i adult\ncam\nlgbt\nporn\nsex\nsexy\ntube\nwebcam\nxxx" "${out_1}"
comment
printf "[x] reducing adult domains at '%s' ...\n" "${uri_1##*/}"
awk 'FILENAME == ARGV[1] && FNR==NR{a[$1];next} !($1 in a)' "${out_1}" "${trust}" > "${out_2}"
sort -u "${out_2}" -o "${out_2}"
:<<'comment'
sed -i "1i # domains list taken from\: ${uri_1}" "${out_2}"
sed -i "2i # ${_sed}" "${out_2}"
sed -i "3i # generated at ${_lct} by: ${uri_3}\n#\n" "${out_2}"
sed -i "6i bet\ncasino\nlotto\npoker" "${out_2}"
comment
}
f_response() {
if [[ $1 -ne 0 ]]; then
printf "[x] failed. could not download file from:\n%s\n" "$2"
return 1
fi
}
# main script
clear
printf "\n[x] starting %s ...\n" "${0##*/}"
cat >> "${regex}" << eof
s/[A-Z]/\L&/g;s/\-\./\./g;s/\([[:space:]]$\|[[:punct:]]$\|\s#\s[a-z]\{2,\}$\|^\(www\.\)\{1,\}\|[\/:][a0-z9]\{2,\}.*\)//g;/\(^$\|[^\o0-\o177]\|^.\{64,\}\)/d;/^.\{4\}./!d;/\.\(adult\|bet\|cam\|casino\|comm\|goog\|lgbt\|lotto\|poker\|porn\|sex\|sexy\|tp\|tube\|webcam\|xxx\)$/d
(adult|agency|amat(eu|i|u)r|an(al|im)|asshole|ayamkampus|babe|bbw|bdsm|big(tit|ass)|bi(kini|nal|rahi|spak)|blo(nde|wjob)|bo(kep|ob|b[^e])|bond(a|age)|breast|bugil|bukkake|bus(en|ty)|cam|[^lqrtu8-]cantik[^q]|ce(leb|nterfold|wek)|cli(t|max)|co(ck|itus|lmek|splay)|creampie|cr[ou]t|cu(m|nil|nni)|daunmuda|de(licate|prav|wasa)|dildo|ebony|erection|eroti|esek|escort|experience)|facial|[^ahl]f[ae](t+)[iy]|faggot|felicia|fem(me|inin)|fe(t+)ish|finger|film|free|fuck|ga(dis|irah|ngbang|y)|gender|gigolo[^t]|girl|granny|hard(core|er)|hasrat|henta[iy]|hooter|horn[iy]|huge|janda|jav[^aiv]|jembut|jizz|jorok|kamasutra|kontol|latina|lendir|le(sb|z)|licking|lingerie|lolit|lonte[^qr]|lust|^manga|mature|me(mek|sum)|milf|model|movie|[^aghirlsnt]malam|muncrat|nak(al|ed)|na[pf]su|naughty|nenen[^cq]|nge(mut|ntot|we)|mulus|nikmatan|nonton|nud(e|is)|rgasm|obscene|^oral|orgy|pant(at|i)|payudara|pe(dophil|juh|lacur|muas|nis)|per(awan|ek|verse|empuan)|photo|[^e]pics[^$]|pi(ctu|jat|ssing)|playmate|porn|pov|pro(found|st)|puss[iy]|^puting|([^e]ranjang|^ranjang|ranjang\.)|remaja|romant|^rough|sange|salop|seks|[sz]ex|semok|shemale|show|s[lm]ut|sodom|spread|squirt|stocking|su(ck|su[^kpqn])|swing|syur|tante(^p|p[aeirs])|tante|te(en|tek)|threesome|ti(ght|tit|ts)|toge[^nlr]|to(ket|ngue)|topp?les|tub(uh|e)|vid[ei]o|vi(ntage|rgin)|voyeur|wanita|whore|wom[ae]n|^x[^n][^\-]{2}|\.x\.|[x]{2,4}|young|zoo)
/\.[a-z]\{2,\}\(-[a-z]\{1,\}\)\{1,\}$/d;/\.\(ai[djn]\|asia\|asp\|biz\|blog\|bola\|cafe\|c[cn]\|click\|club\|cocomp\|com[^pm\.]\|comp[^a\.]\|dao\|dok\|ee\|esc\|fhi\|funn\|holo\|i[ad]\|info\|la[muy8]\|li\(nk\|ve\)\|ly\|me[il]\|mobii\|net[^w\.]\|onll\|org\|poker\|pro[jw]\|pw\|site\|to[jp]\|tv\|vip\|wink\|xyz\)[a0-z9]\{1,\}$/d;/\.\(beten\|clu\|co\(apps\|mi\|n\)\|cpp\|digita\|ef\|file\|html\|[a-z]\{2,\}id\|inf\|[it]p\|lecom\|net\(m\|t\|wap\|world\)\|nfo\|nxyz\|o\(ke\|nlin\|r\|tg\)\|php\|pw-\|rf\|s\|service\|sid\|so\(h\|lution\|m\)\|usm\|vidio\|we\|xn\|xn--\(com-7m0aaa\|ebac0d6ih94e\|rbafm4qg0919b\|tckwedesktop\)\|xtgecom\|x+\(yz\)?\|xy\|xyv\|yu\)$/d
eof
# processing
mapfile -t ar_reg < "${regex}"
printf "[x] grabbing %s ...\n" "${uri_1##*/}"
curl --retry 5 --retry-all-errors -f -s "${uri_1}" | sed -e "${ar_reg[0]}" | sed -e "${ar_reg[2]}" > "${trust}"
resp_1=$?
f_response "${resp_1}" "${uri_1}"
printf "[x] grabbing 'porn.txt'. use it's as a control to identifying adult domains ...\n"
curl --retry 5 --retry-all-errors -f -s "${uri_2}" | sed -e "${ar_reg[0]}" > "${adult}"
resp_2=$?
f_response "${resp_2}" "${uri_2}"
f_reduce
# finishing
printf "[x] removing temp files ...\n"
rm -rf tmp.*
unset -v ar_reg
printf "[x] done, bye!\n\n"
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment