Last active
July 1, 2018 17:36
-
-
Save harish2704/26b40a40425f3bd57e613a3909f8693c to your computer and use it in GitHub Desktop.
parallel downloading with proxies in shell script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# Usage: run-parallel.sh <URL-list> <proxy-list> <parallel-job-count> <cmd> | |
# Environment variables | |
# - SKIP_PROXY_VALIDATION - if set, skip proxy list validation step. | |
# - OUTPUT - default value is 'tmux' which will send each jobs to tmux session. for any string, output of each jobs will be saved inside directory tree with given name | |
urlListRaw=$1 | |
proxyListRaw=$2 | |
jobCount=$3 | |
cmd=$4 | |
if [ -z $SKIP_PROXY_VALIDATION ]; then | |
parallel -j 8 -a "$proxyListRaw" "bash ./validate-proxy.sh $proxyListRaw {}" | |
fi | |
output=${OUTPUT:-'tmux'} | |
urlListClean="${urlListRaw}.clean" | |
cat $urlListRaw | sed -e '/^[^h]*$/d' > "$urlListClean" | |
proxyListClean="${proxyListRaw}.clean" | |
cat $proxyListRaw | sed -e '/^[^0-9].*$/d' | awk '{print $1":"$2 }' > "$proxyListClean" | |
if [ $output == 'tmux' ]; then | |
parallel --link --tmux -j $jobCount -a $urlListClean -a $proxyListClean "env http_proxy=http://{2} https_proxy=http://{2} ${cmd} {1}" | |
else | |
parallel --link --results $output -j $jobCount -a $urlListClean -a $proxyListClean "env http_proxy=http://{2} https_proxy=http://{2} ${cmd} {1}" | |
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# | |
# Usage: validate-proxy.sh <proxy-list-file> <proxy-line-text> | |
# - <proxy-list-file> conains list of <proxy-line-text> | |
# - <proxy-line-text> is a single line from <proxy-list-file> | |
# This script will validate the given proxy address and, comment out the corresponding line on list file if this proxy is not working | |
proxyLine=$2 | |
mainSource="$1" | |
# Ignore commented lines | |
if [[ "$proxyLine" =~ ^#.* ]]; then | |
exit 0; | |
fi | |
addr=$( echo $proxyLine | awk '{print $1":"$2 }' ) | |
pUrl="http://$addr" | |
env http_proxy=$pUrl https_proxy=$pUrl curl --connect-timeout 5 -s 'https://api.ipify.org?format=json' > /dev/null && isSuccess=true | |
if [[ -n $isSuccess ]]; then | |
echo "$pUrl Success"; | |
else | |
echo "$pUrl Failed"; | |
ip=${addr%:*} | |
# Comment this ip from ip-list file | |
sed -i "s=^\($ip.*\)=# \1=" $mainSource | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment