ideamonk/gist:604317

## gistfile1.py
import urllib2
from BeautifulSoup import BeautifulSoup

def safename(s):
    return "".join([x for x in s if x.isalpha() or x.isdigit()])

print "Doing..."

mothers = [
    # these pages contain links to pages to be saved
    "http://www.foobarbeep.com/urlinks/foo1.html",
    "http://www.foobarbeep.com/urlinks/foo2.html",
    "http://www.foobarbeep.com/urlinks/foo3.html",
]

for mother in mothers:
    scriptname = safename(mother.split('/')[-1])
    shellscript = open("scrape_%s.sh" % scriptname ,"w")

    soup = BeautifulSoup( urllib2.urlopen(mother).read() )
    links = soup.findAll('a')


    for link in links:
            dirname = safename(  link['href'].split('/')[-1]  )
            shellscript.write("wget --directory-prefix ./%s --no-parent --timestamping --convert-links --page-requisites --no-directories --no-host-directories -erobots=off %s\n\n" % (dirname, link['href']))

    shellscript.write ("\ncd %s \nfor f in `find . | grep -e '\.asp$'`; do mv $f `echo $f.html`;  done;" % (dirname))
    shellscript.close()

print "Done."
	import urllib2
	from BeautifulSoup import BeautifulSoup

	def safename(s):
	return "".join([x for x in s if x.isalpha() or x.isdigit()])

	print "Doing..."

	mothers = [
	# these pages contain links to pages to be saved
	"http://www.foobarbeep.com/urlinks/foo1.html",
	"http://www.foobarbeep.com/urlinks/foo2.html",
	"http://www.foobarbeep.com/urlinks/foo3.html",
	]

	for mother in mothers:
	scriptname = safename(mother.split('/')[-1])
	shellscript = open("scrape_%s.sh" % scriptname ,"w")

	soup = BeautifulSoup( urllib2.urlopen(mother).read() )
	links = soup.findAll('a')


	for link in links:
	dirname = safename( link['href'].split('/')[-1] )
	shellscript.write("wget --directory-prefix ./%s --no-parent --timestamping --convert-links --page-requisites --no-directories --no-host-directories -erobots=off %s\n\n" % (dirname, link['href']))

	shellscript.write ("\ncd %s \nfor f in `find . \| grep -e '\.asp$'`; do mv $f `echo $f.html`; done;" % (dirname))
	shellscript.close()

	print "Done."