Prime caches
#!/bin/bash | |
# ./curl-urls.sh numprocessses http://urltograp urlpattern | |
# ./curl-urls.sh 3 http://www.domain.com domain.com | |
numprocessses=$1 | |
baseurl=$2 | |
urlpattern=$3 | |
function forky() { | |
local num_par_procs | |
if [[ -z $1 ]] ; then | |
num_par_procs=3 | |
else | |
num_par_procs=$1 | |
fi | |
while [[ $(jobs | wc -l) -ge $num_par_procs ]] ; do | |
sleep 1 | |
done | |
} | |
curl -s $baseurl -O /tmp/baseurl.html | |
sed -n 's/.*href="\([^"]*\).*/\1/p' /tmp/baseurl.html | grep -E "$urlpattern" | sort | uniq > /tmp/urls.txt | |
# below is an example - make sure to change this to your needs | |
for url in `cat /tmp/urls.txt`; do | |
echo "`jobs | wc -l` jobs in spool" | |
echo "Grabbing $url" | |
curl -s "$url" > /dev/null & | |
forky $numprocessses | |
done | |
wait |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment