Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Parallel Proxy Scraper & Checker Tool /
#!/bin/bash
# HR Proxy Scraper Script
# Rebuilded By Kyxrec0n
# Demo video : youtube.com/watch?v=iXCeR_XsP6o
# USAGE: ./proxy-scraper.sh <ARGUMENT> <OPTIONS>
# ARGUMENTS:
# Proxy Checker - Single Proxy Check
# -s <IP>:<PORT>
#
# Proxy Checker - List Scan:
# -x </path/to/list/of/proxies/to/check>
#
# Sites List Scraper Session:
# -L /path/to/list/of/proxy/pages/to/scrape
#
# Pre-Built Scraper Sessions:
# -P <OPTION>
# 1 = Samair.ru Free Proxy List
# 2 = Atomintersoft.com Free Proxy Lists
# 3 = Multiproxy.org Free Proxy List
# 4 = Proxz.com Free Proxy Lists
# 5 = Aliveproxy.com Free Proxy Lists
# 6 = Xroxy.com Free Proxy Lists
# 7 = Proxylists.net Free Proxy Lists
# 8 = Proxynova.com Free Proxy Lists
# 9 = Elite-proxies.blogspot.com Daily Proxy Lists
# 10 = Dailyproxylists.com Daily Proxy List
# 11 = Proxy-ip-list.com Daily Proxy Lists
# 12 = Nntime.com Free Proxy Lists
# 13 = HideMyAss.com Free Proxy Lists
# 14 = Freeproxylists.com Elite Proxy Lists
# 15 = Proxys.com.ar Free Proxy Lists
#
# EX: ./proxy-scraper.sh -L scraper-samair.lst
# EX: ./proxy-scraper.sh -P 1
# EX: ./proxy-scraper.sh -x /home/HR/scraped_proxynova_proxies.lst
# EX: ./proxy-scraper.sh -s 127.0.0.1:8080
#
# ****List should contain one link per line, each link should point to page to be scraped for proxies****
#
# Pre-requisite: parallel (not the one from moreutils package :p)
# To-Install:
# ftp://ftp.gnu.org/gnu/parallel/<latest-copy>
# Tested with => ftp://ftp.gnu.org/gnu/parallel/parallel-20120622.tar.bz2
# tar -jxvf parallel-<latest-copy>.tar.bz2
# cd parallel-<latest-copy>/
# ./configure && make
# sudo make install
#
# Let the magic begin....
JUNK=/tmp
LIST="$2"
PREOPT="$2"
SPROXY="$2"
SITE_LIST="$2"
STOR=$(mktemp -p "$JUNK" -t fooooobarproxyscraper.tmp.XXX)
STOR2=$(mktemp -p "$JUNK" -t fooooobarproxyscraper2.tmp.XXX)
#1=Opera, 2=Chrome, 3=FireFox, 4=IE
uagent1="Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00"
uagent2="Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1"
uagent3="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0 Safari/536.6"
uagent4="Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0"
JUDGE="http://www.samair.ru/proxy-service/proxyjudge.php?send="
ATOMLINKS="
http://atomintersoft.com/proxy_list_United-States_us";
ALIVELINKS="
http://aliveproxy.com/high-anonymity-proxy-list/
http://aliveproxy.com/anonymous-proxy-list/";
XROXYLINKS="
http://www.xroxy.com/proxylist.php?port=&type=Anonymous&ssl=&country=US&latency=&reliability=&sort=reliability&desc=true&pnum=1#table
http://www.xroxy.com/proxylist.php?port=&type=Anonymous&ssl=&country=US&latency=&reliability=&sort=reliability&desc=true&pnum=2#table
http://www.xroxy.com/proxylist.php?port=&type=Anonymous&ssl=&country=US&latency=&reliability=&sort=reliability&desc=true&pnum=3#table";
PROXYIPLISTLINKS="
http://proxy-ip-list.com/free-usa-proxy-ip.html";
#First a simple Bashtrap function to handle interupt (CTRL+C)
trap bashtrap INT
bashtrap(){
echo
echo
echo 'CTRL+C has been detected!.....shutting down now' | grep --color '.....shutting down now'
rm -f "$STOR" 2> /dev/null
rm -f "$STOR2" 2> /dev/null
#exit entire script if called
exit;
}
#End bashtrap()
function usage_info(){
echo
echo "Hood3dRob1n's Proxy Scraper Script" | grep --color -E 'Hood3dRob1n||s Proxy Scraper Script'
echo
echo "USAGE: $0 <ARGUMENT> <OPTIONS>" | grep --color 'USAGE'
echo "Proxy Checker - Single IP:" | grep --color -E 'Proxy Checker||Single IP'
echo " -s <IP>:<PORT>"
echo
echo "Proxy Checker - List Scan:" | grep --color -E 'Proxy Checker||List Scan'
echo " -x </path/to/list/of/proxies/to/check>"
echo
echo "Custom Sites List Scraper Session:" | grep --color -E 'Custom Sites List Scraper Session'
echo " -L /path/to/list/of/proxy/pages/to/scrape"
echo
echo "Pre-Built Scraper Sessions:" | grep --color -E 'Pre||Built Scraper Sessions'
echo " -P <OPTION>"
echo " 1 => Samair.ru Free Proxy List" | grep --color '1'
echo " 2 => Atomintersoft.com Free Proxy Lists" | grep --color '2'
echo " 3 => Multiproxy.org Free Proxy List" | grep --color '3'
echo " 4 => Proxz.com Free Proxy Lists" | grep --color '4'
echo " 5 => Aliveproxy.com Free Proxy Lists" | grep --color '5'
echo " 6 => Xroxy.com Free Proxy Lists" | grep --color '6'
echo " 7 => Proxylists.net Free Proxy Lists" | grep --color '7'
echo " 8 => Proxynova.com Free Proxy Lists" | grep --color '8'
echo " 9 => Elite-proxies.blogspot.com Daily Proxy Lists" | grep --color '9'
echo " 10 => Dailyproxylists.com Daily Proxy Lists" | grep --color '10'
echo " 11 => Proxy-ip-list.com Daily Proxy Lists" | grep --color '11'
echo " 12 => Nntime.com Daily Proxy Lists" | grep --color '12'
echo " 13 => HideMyAss.com Daily Proxy Lists" | grep --color '13'
echo " 14 => Freeproxylists.com Elite Proxy Lists" | grep --color '14'
echo " 15 => Proxys.com.ar Free Proxy Lists" | grep --color '15'
echo
echo "EX: $0 -L scraper-samair.lst" | grep --color 'EX'
echo "EX: $0 -P 1" | grep --color 'EX'
echo "EX: $0 -x /home/HR/scraped_proxynova_proxies.lst" | grep --color 'EX'
echo "EX: $0 -s 127.0.0.1:8080" | grep --color 'EX'
echo
echo "****List should contain one link per line, each link should point to page to be scraped for proxies****"
exit
}
#End of usage_info()
function type_check(){
if [ "$METH" == 1 ]; then
FINOUT="$SPROXY"
else
FINOUT="$IP"
fi
case $RESP in
transparent)
echo "[ TRANSPARENT ] $FINOUT" | grep --color -E "\[ TRANSPARENT \]"
echo " [ COUNTRY: $CCODE ]" | grep --color -E "\[ COUNTRY||\]"
echo $FINOUT >> transparent-proxies.lst 2> /dev/null
;;
anonymous)
echo "[ ANON ] $FINOUT" | grep --color -E "\[ ANON \]"
echo " [ COUNTRY: $CCODE ]" | grep --color -E "\[ COUNTRY||\]"
echo $FINOUT >> anonymous-proxies.lst 2> /dev/null
;;
high-anonymous)
echo "[ HIGH-ANON ] $FINOUT" | grep --color -E "\[ HIGH-ANON \]"
echo " [ COUNTRY: $CCODE ]" | grep --color -E "\[ COUNTRY||\]"
echo $FINOUT >> high-anonymous-proxies.lst 2> /dev/null
;;
*)
echo "[BAD] $FINOUT"
;;
esac
}
function single_scan(){
TARGET="$JUDGE$SPROXY"
echo
echo "Checking single proxy address...." | grep --color 'Checking single proxy address'
curl --url $TARGET --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxyjudge" -A "$uagent3" -o "$STOR" 2> /dev/null
RESP=$(awk '{ print $0 }' $STOR | cut -d'|' -f2)
CCODE=$(awk '{ print $0 }' $STOR | cut -d'|' -f3)
METH=1
type_check
}
#End single_scan()
function list_scan(){
echo
if [ ! -r "$LIST" ]; then
echo
echo "Unable to read provided list file! Please check permissions or path and re-try...." | grep --color -E 'Unable to read provided list file||Please check permissions or path and re||try'
echo
usage_info
fi
echo "Preparing to check proxies from proxy list...." | grep --color 'Preparing to check proxies from proxy list'
METH=2
RESULTS=$(cat "$LIST" | parallel -k -j 20 curl $JUDGE{} --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null)
echo "$RESULTS" | while read res
do
IP=$(echo "$res" | cut -d'|' -f1)
RESP=$(echo "$res" | cut -d'|' -f2)
CCODE=$(echo "$res" | cut -d'|' -f3)
type_check
done
}
#End list_scan()
function scraper(){
echo
echo "Starting proxy scraping, please hang tight - this might take a minute or two...." | grep --color -E 'Starting proxy scraping||please hang tight||this might take a minute or two'
cat $SITE_LIST | parallel -k -j 0 -X curl {} --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' >> "$STOR" 2> /dev/null
cat "$STOR" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR2" 2> /dev/null
NUMPROX=$(wc -l "$STOR2" | cut -d' ' -f1)
cat "$STOR2" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" | sort | uniq > scraped_proxies.lst 2> /dev/null
NUMPROXTOTAL=$(wc -l scraped_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
cat scraped_proxies.lst
echo
}
function pre_built_scraper(){
case $PREOPT in
1)
echo
echo "Starting Samair.ru pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting Samair||ru pre||built proxy scraper||hang tight this might take a few'
seq 1 9 | parallel -k -j 0 -X curl http://www.samair.ru/proxy/proxy-0{}.htm --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' >> "$STOR" 2> /dev/null
seq 10 75 | parallel -k -j 0 -X curl http://www.samair.ru/proxy/proxy-{}.htm --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' >> "$STOR" 2> /dev/null
cat "$STOR" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR2"
cat "$STOR2" | sort | uniq > scraped_samair_proxies.lst
NUMPROX=$(wc -l scraped_samair_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_samair_proxies.lst
echo
;;
2)
echo "Starting Scraper for atomintersoft.com, hang tight this might take a minute..." | grep --color -E 'Starting Scraper for atomintersoft||com||hang tight this might take a minute'
echo "${ATOMLINKS[@]}" | parallel -k -j 0 --xapply curl {} --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://atomintersoft.com/" -A 'proxb0t' >> "$STOR" 2> /dev/null
cat "$STOR" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR2"
cat "$STOR2" | sort | uniq > scraped_atomintersoft_proxies.lst
NUMPROX=$(wc -l scraped_atomintersoft_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_atomintersoft_proxies.lst
echo
;;
3)
echo "Starting Scraper for multiproxy.org, hang tight this might take a minute..." | grep --color -E 'Starting Scraper for multiproxy||org||hang tight this might take a minute'
curl --url http://multiproxy.org/cgi-bin/search-proxy.pl --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A "$uagent1" > "$STOR" 2> /dev/null
cat "$STOR" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR2"
cat "$STOR2" | sort | uniq > scraped_multiproxy_proxies.lst
NUMPROX=$(wc -l scraped_multiproxy_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_multiproxy_proxies.lst
echo
;;
4)
echo "Starting proxz.com pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting proxz||com pre||built proxy scraper||hang tight this might take a few'
seq 0 6 | parallel -k -j 0 curl http://www.proxz.com/proxy_list_high_anonymous_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<\/td><\/tr><script type='text\/javascript'>eval(unescape('//" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><tr><td colspan=\"3\">//" | printf $(cat - | sed 's/\\/\\\\/g;s/\(%\)\([0-9a-fA-F][0-9a-fA-F]\)/\\x\2/g') | sed 's/<\/td><td>/:/g' | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" | sort | uniq > scraped_proxz_proxies.lst
NUMPROX=$(wc -l scraped_proxz_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_proxz_proxies.lst
echo
;;
5)
echo "Starting Scraper for aliveproxy.com, hang tight this might take a minute..." | grep --color -E 'Starting Scraper for aliveproxy||com||hang tight this might take a minute'
echo "${ALIVELINKS[@]}" | parallel -k -j 0 --xapply curl {} --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://aliveproxy.com/" -A 'proxb0t' >> "$STOR" 2> /dev/null
cat "$STOR" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR2"
cat "$STOR2" | sort | uniq > scraped_aliveproxy_proxies.lst
NUMPROX=$(wc -l scraped_aliveproxy_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_aliveproxy_proxies.lst
echo
;;
6)
echo "Starting Scraper for xroxy.com, hang tight this might take a minute..." | grep --color -E 'Starting Scraper for xroxy||com||hang tight this might take a minute'
echo "${XROXYLINKS[@]}" | parallel -k -j 0 curl {} --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.xroxy.com/proxylist.php" -A 'proxb0t' >> "$STOR" 2> /dev/null
cat "$STOR" | sed 's/&port=/:/g' | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR2"
cat "$STOR2" | sort | uniq > scraped_xroxy_proxies.lst
NUMPROX=$(wc -l scraped_xroxy_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_xroxy_proxies.lst
echo
;;
7)
echo "Starting proxylists.net pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting proxylists||net pre||built proxy scraper||hang tight this might take a few'
seq 1 10 | parallel -k -j 0 curl http://www.proxylists.net/us_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 24 | parallel -k -j 0 curl http://www.proxylists.net/br_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 35 | parallel -k -j 0 curl http://www.proxylists.net/cn_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 2 | parallel -k -j 0 curl http://www.proxylists.net/cz_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 2 | parallel -k -j 0 curl http://www.proxylists.net/nl_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 2 | parallel -k -j 0 curl http://www.proxylists.net/de_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 3 | parallel -k -j 0 curl http://www.proxylists.net/in_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 4 | parallel -k -j 0 curl http://www.proxylists.net/ru_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
seq 1 5 | parallel -k -j 0 curl http://www.proxylists.net/eg_{}.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
curl http://www.proxylists.net/gb_0.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
curl http://www.proxylists.net/ca_0.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
curl http://www.proxylists.net/au_0.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
curl http://www.proxylists.net/fr_0.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
curl http://www.proxylists.net/hk_0.html --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep "javascript'>eval(unescape('" | sed "s/<tr><td><script type='text\/javascript'>eval(unescape('//g" | sed "s/'));<\/script><noscript>Please enable javascript<\/noscript><\/td><td>/:/g" | sed "s/<\/td><\/tr>//g" >> "$STOR"
cat "$STOR" | while read line;
do
printf $(echo $line | sed 's/\\/\\\\/g;s/\(%\)\([0-9a-fA-F][0-9a-fA-F]\)/\\x\2/g') | sed "s/\");//g" >> "$STOR2" 2> /dev/null;
done;
cat "$STOR2" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" | sort | uniq > scraped_proxylists_proxies.lst
NUMPROX=$(wc -l scraped_proxylists_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_proxylists_proxies.lst
echo
;;
8)
echo "Starting proxynova.com pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting proxynova||com pre||built proxy scraper||hang tight this might take a few'
seq 1 10 | parallel -k -j 0 curl http://www.proxynova.com/proxy-server-list/?p={} --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.proxynova.com/proxy-server-list/" -A 'proxb0t' 2> /dev/null | sed -e :a -e 's/<[^>]*>//g;/</N;//ba' -e 's/ //g' | grep -A 8 "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}" | awk -F'--' '{ print $1 }' | sed -e 's/.$//' -e '/^$/d' > "$STOR"
LINE_COUNT=0
IP=""
PORT=""
cat "$STOR" | while read line
do
LINE_COUNT=$((LINE_COUNT +1));
if [ "$LINE_COUNT" == 1 ]; then
IP="$line"
fi
if [ "$LINE_COUNT" == 2 ]; then
PORT="$line"
echo "$IP:$PORT"
echo "$IP:$PORT" >> "$STOR2"
LINE_COUNT=0
fi
done
cat "$STOR2" | uniq > scraped_proxynova_proxies.lst
NUMPROX=$(wc -l scraped_proxynova_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_proxynova_proxies.lst
echo
;;
9)
echo
echo "Starting elite-proxies.blogspot.com pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting elite||proxies||blogspot||com pre||built proxy scraper||hang tight this might take a few'
curl http://elite-proxies.blogspot.com/ --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" | sort | uniq > scraped_elite-proxies-blog_proxies.lst
NUMPROX=$(wc -l scraped_elite-proxies-blog_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_elite-proxies-blog_proxies.lst
echo
;;
10)
echo
echo "Starting dailyproxylists.com pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting dailyproxylists||com pre||built proxy scraper||hang tight this might take a few'
curl http://www.dailyproxylists.com/index.php/proxy-lists --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' | grep 'document.write(unescape(\"' | sed -e 's/<div id=\"phoca\-top\"><SCRIPT LANGUAGE=\"JavaScript\">document\.write(unescape(\"//g' -e 's/\"))<\/SCRIPT><\/div>//g' -e 's/^[ \t]*//;s/[ \t]*$//' | printf $(cat - | sed 's/\\/\\\\/g;s/\(%\)\([0-9a-fA-F][0-9a-fA-F]\)/\\x\2/g') | sed 's/<\/td><td class=\"cell0\">/:/g' | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" | sort | uniq > scraped_dailyproxylists_proxies.lst
NUMPROX=$(wc -l scraped_dailyproxylists_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_dailyproxylists_proxies.lst
echo
;;
11)
echo "Starting Scraper for proxy-ip-list.com, hang tight this might take a minute..." | grep --color -E 'Starting Scraper for proxy||ip||list||com||hang tight this might take a minute'
echo "${PROXYIPLISTLINKS[@]}" | parallel -k -j 0 curl {} --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR" 2> /dev/null
cat "$STOR" | sort | uniq > scraped_proxy-ip-list_proxies.lst
NUMPROX=$(wc -l scraped_proxy-ip-list_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_proxy-ip-list_proxies.lst
echo
;;
12)
echo "Starting nntime.com pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting nntime||com pre||built proxy scraper||hang tight this might take a few'
seq 1 9 | parallel -k -j 0 curl http://nntime.com/proxy-list-0{}.htm --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep 'onclick=\"choice()\"' | sed -e 's/<tr class=\"[a-z]\{1,4\}\"><td><input type=\"checkbox\" name=\"c[0-9]\{1,3\}\" id=\"row[0-9]\{1,2\}\" value=//g' -e 's/onclick="choice()\" \/><\/td><td>//g' -e 's/<script type=\"text\/javascript\">document.write(\":"/:/g' -e 's/)<\/script><\/td>//g' -e 's/\"//g' | cut -d'.' -f4-8 | sed 's/+//g' | sed 's/:/ /g' | awk -F" " '{ print ""$2":"length($3)":"$1 }' | while read line; do IP=$(echo $line | awk -F":" '{ print $1 }' | sed 's/:[a-z]\{1,6\}//g'); MINUS=$(echo $line | awk -F":" '{ print $2 }' | sed 's/:[a-z]\{1,6\}//g'); FUNK=$(echo $line | awk -F":" '{ print $3 }' | sed 's/:[a-z]\{1,6\}//g'); LENGTH=$(echo $line | awk -F":" '{ print length($3) }'); PORT=${FUNK:(LENGTH - MINUS):LENGTH}; echo "$IP:$PORT" >> "$STOR"; done;
seq 10 12 | parallel -k -j 0 curl http://nntime.com/proxy-list-{}.htm --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.google.com/q?=proxylists" -A 'proxb0t' 2> /dev/null | grep 'onclick=\"choice()\"' | sed -e 's/<tr class=\"[a-z]\{1,4\}\"><td><input type=\"checkbox\" name=\"c[0-9]\{1,3\}\" id=\"row[0-9]\{1,2\}\" value=//g' -e 's/onclick="choice()\" \/><\/td><td>//g' -e 's/<script type=\"text\/javascript\">document.write(\":"/:/g' -e 's/)<\/script><\/td>//g' -e 's/\"//g' | cut -d'.' -f4-8 | sed 's/+//g' | sed 's/:/ /g' | awk -F" " '{ print ""$2":"length($3)":"$1 }' | while read line; do IP=$(echo $line | awk -F":" '{ print $1 }' | sed 's/:[a-z]\{1,6\}//g'); MINUS=$(echo $line | awk -F":" '{ print $2 }' | sed 's/:[a-z]\{1,6\}//g'); FUNK=$(echo $line | awk -F":" '{ print $3 }' | sed 's/:[a-z]\{1,6\}//g'); LENGTH=$(echo $line | awk -F":" '{ print length($3) }'); PORT=${FUNK:(LENGTH - MINUS):LENGTH}; echo "$IP:$PORT" >> "$STOR"; done;
cat "$STOR" | sort | uniq > scraped_nntime_proxies.lst
NUMPROX=$(wc -l scraped_nntime_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_nntime_proxies.lst
echo
;;
13)
echo "Starting HideMyAss.com pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting HideMyAss||com pre||built proxy scraper||hang tight this might take a few'
seq 2 50 | parallel -k -j 0 curl http://www.hidemyass.com/proxy-list/{} -b "PHPSESSID=f0997g34g7qee5speh0bian143" --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.hidemyass.com/proxy-list/" 2> /dev/null | grep -A2 '<div style=\"display:none\">' | sed -e 's/<\/style>//g' -e 's/\-\-//g' -e 's/ <td>//g' -e '/^$/d' | sed -e 's/_/-/g' -e 's/<span class="[a-zA-Z\-]\{1,4\}">/~/' -e 's/<div style=\"display:none\">/~/g' -e 's/<span class=\"[a-zA-Z0-9\-]\{1,4\}\">/~/g' -e 's/<span class=\"\" style=\"\">/~/g' -e 's/<span style=\"display: inline\">/~/g' -e 's/<span style=\"display:none\">/~/g' -e 's/<\/div>//g' -e 's/<\/span>//g' -e 's/<\/td>//g' -e 's/<span>//g' -e 's/^~//g' >> "$STOR"
LINE_COUNT=0
IP1=""
IP2=""
IP3=""
IP4=""
PORT=""
cat "$STOR" | while read line
do
LINE_COUNT=$((LINE_COUNT +1));
if [ "$LINE_COUNT" == 1 ]; then
IP1=$(echo $line | awk -F"~" ' { print $1 }')
IP2=$(echo $line | awk -F"." ' { print $2 }' | sed -e 's/^~//g' | awk -F"~" ' { print $1 } ')
IP3=$(echo $line | awk -F"." ' { print $3 }' | sed -e 's/^~//g' | awk -F"~" '{ print $1 }')
IP4=$(echo $line | awk -F"." ' { print $4 }' | sed -e 's/^~//g')
fi
if [ "$LINE_COUNT" == 2 ]; then
PCHK=$(echo $line | awk '{print length($1)}')
if [ $PCHK -le 5 ]; then
PORT=$(echo $line | awk '{print $1}')
fi
echo "$IP1.$IP2.$IP3.$IP4:$PORT" >> "$STOR2"
LINE_COUNT=0
fi
done
cat "$STOR2" | sort | uniq > scraped_HMA_proxies.lst
NUMPROX=$(wc -l scraped_HMA_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_HMA_proxies.lst
echo
;;
14)
echo "Starting freeproxylists.com pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting freeproxylists||com pre||built proxy scraper||hang tight this might take a few'
curl http://www.freeproxylists.com/elite.php --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.freeproxylists.com/" -A 'proxb0t' | grep 'elite/' | sed -e 's/<a href=//g' -e 's/>detailed list #[0-9]\{1,2\}<\/a> ([0-9].[0-9A-Z]\{1,5\})//g' -e 's/>elite #[0-9]\{1,2\}<\/a> ([0-9].[0-9A-Z]\{1,5\})//g' -e "s/'//g" > "$STOR"
BASE="http://www.freeproxylists.com/load_"
cat "$STOR" | sed 's/\//_/g' | while read line
do
LINK="$BASE$line"
curl $LINK --header "Content-Type: text/xml" --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.freeproxylists.com/elite.php" -A 'proxb0t' | sed -e 's/&lt;/</g' -e 's/&gt;/>/g' -e 's/<\/td><td>/:/g' | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" >> "$STOR2"
done
cat "$STOR2" | sort | uniq > scraped_freeproxylists_proxies.lst
NUMPROX=$(wc -l scraped_freeproxylists_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_freeproxylists_proxies.lst
echo
;;
15)
echo "Starting proxys.com.ar pre-built proxy scraper, hang tight this might take a few...." | grep --color -E 'Starting proxys||com||ar pre||built proxy scraper||hang tight this might take a few'
curl "http://www.proxys.com.ar/index.php?act=list&port=&type=&country=&page=[1-5]" --retry 2 --retry-delay 3 --connect-timeout 3 --no-keepalive -s -e "http://www.proxys.com.ar/" -A 'proxb0t' 2> /dev/null | grep -B5 "<td><a href=\"index.php?act=whois&ip=" >> "$STOR"
count=1
cat "$STOR" | while read line
do
if [ "$count" -le 2 ]; then
echo $line | sed -e 's/<td>//' -e 's/<\/td>//' >> "$STOR2"
fi
if [ "$count" == 7 ]; then
count=0
fi
count=$((count +1))
done
count=1
cat "$STOR2" | while read pieces
do
if [ "$count" == 1 ]; then
IP="$pieces"
elif [ "$count" == 2 ]; then
PORT="$pieces"
count=0
echo "$IP:$PORT" >> "$STOR"
fi
count=$((count +1))
done
cat "$STOR" | grep -o "[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}\.[0-9]\{1,3\}:[0-9]\{1,5\}" | sort | uniq > scraped_proxys-ar_proxies.lst
NUMPROX=$(wc -l scraped_proxys-ar_proxies.lst | cut -d' ' -f1)
echo
echo "RESULTS FROM PROXY SCRAPING:" | grep --color 'RESULTS FROM PROXY SCRAPING'
echo "Proxies Found: $NUMPROX" | grep --color 'Proxies Found'
echo
cat scraped_proxys-ar_proxies.lst
echo
;;
*)
echo
echo "Unrecognized option provided! Check usage and try again...." | grep --color -E 'Unrecognized option provided||Check usage and try again'
echo
usage_info
;;
esac
}
# MAIN-----------------------------------------------------
clear
if [ -z "$1" ] || [ "$1" == '-h' ] || [ "$1" == '--help' ]; then
usage_info
fi
while getopts ":s:x:L:P:" usage_options;
do
case $usage_options in
L)
scraper
;;
P)
pre_built_scraper
;;
s)
single_scan
;;
x)
list_scan
;;
*)
usage_info
;;
esac
done
rm -f "$STOR" 2> /dev/null
rm -f "$STOR2" 2> /dev/null
#EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment