tott/gist:2fb5a32786542c3b423faf007b085d74

## gistfile1.txt
#!/bin/bash
# ./curl-urls.sh numprocessses http://urltograp urlpattern
# ./curl-urls.sh 3 http://www.domain.com domain.com
numprocessses=$1
baseurl=$2
urlpattern=$3

function forky() {
	local num_par_procs
	if [[ -z $1 ]] ; then
		num_par_procs=3
	else
		num_par_procs=$1
	fi

	while [[ $(jobs | wc -l) -ge $num_par_procs ]] ; do
		sleep 1
	done
}

curl -s $baseurl -O /tmp/baseurl.html

sed -n 's/.*href="\([^"]*\).*/\1/p' /tmp/baseurl.html | grep -E "$urlpattern" | sort | uniq > /tmp/urls.txt

# below is an example - make sure to change this to your needs
for url in `cat /tmp/urls.txt`; do
	echo "`jobs | wc -l` jobs in spool"
	echo "Grabbing $url"
	curl -s "$url" > /dev/null &
	forky $numprocessses
done

wait
	#!/bin/bash
	# ./curl-urls.sh numprocessses http://urltograp urlpattern
	# ./curl-urls.sh 3 http://www.domain.com domain.com
	numprocessses=$1
	baseurl=$2
	urlpattern=$3

	function forky() {
	local num_par_procs
	if [[ -z $1 ]] ; then
	num_par_procs=3
	else
	num_par_procs=$1
	fi

	while [[ $(jobs \| wc -l) -ge $num_par_procs ]] ; do
	sleep 1
	done
	}

	curl -s $baseurl -O /tmp/baseurl.html

	sed -n 's/.href="\([^"]\).*/\1/p' /tmp/baseurl.html \| grep -E "$urlpattern" \| sort \| uniq > /tmp/urls.txt

	# below is an example - make sure to change this to your needs
	for url in `cat /tmp/urls.txt`; do
	echo "`jobs \| wc -l` jobs in spool"
	echo "Grabbing $url"
	curl -s "$url" > /dev/null &
	forky $numprocessses
	done

	wait