Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Prime caches
#!/bin/bash
# ./curl-urls.sh numprocessses http://urltograp urlpattern
# ./curl-urls.sh 3 http://www.domain.com domain.com
numprocessses=$1
baseurl=$2
urlpattern=$3
function forky() {
local num_par_procs
if [[ -z $1 ]] ; then
num_par_procs=3
else
num_par_procs=$1
fi
while [[ $(jobs | wc -l) -ge $num_par_procs ]] ; do
sleep 1
done
}
curl -s $baseurl -O /tmp/baseurl.html
sed -n 's/.*href="\([^"]*\).*/\1/p' /tmp/baseurl.html | grep -E "$urlpattern" | sort | uniq > /tmp/urls.txt
# below is an example - make sure to change this to your needs
for url in `cat /tmp/urls.txt`; do
echo "`jobs | wc -l` jobs in spool"
echo "Grabbing $url"
curl -s "$url" > /dev/null &
forky $numprocessses
done
wait
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment