Skip to content

Instantly share code, notes, and snippets.

@madr
Created June 7, 2012 08:57
Show Gist options
  • Save madr/2887556 to your computer and use it in GitHub Desktop.
Save madr/2887556 to your computer and use it in GitHub Desktop.
create local html files from a site behind login using python
'''
Example htmldump_config.py:
url = "http://localhost:5000"
login = "admin@adeprimo.se"
password = "app161770"
def pages_to_validate():
pages = [
# event registration
('event-start', '/events/start/'),
('event-form', '/events/create'),
('events', '/events'),
('event-edit', '/events/view/'),
return pages
'''
import mechanize
import re
from htmldump_config import pages_to_validate, url, login, password
url = url + "%s"
filepattern = "../static/htmldumps/%s.html"
print "creating fake browser env"
br = mechanize.Browser()
print "--- done"
print "logging in as admin"
br.open(url % "/login")
br.select_form(nr=0)
br["email"] = login
br["password"] = password
br.submit()
print "--- now logged in as %s" % login
def htmldump(name, doc):
with open(filepattern % name, "w") as f:
f.write(doc)
f.close()
def begin_download(pages):
for filename, url_path in pages:
print "downloading: %s" % url_path
print " to: %s.html" % filename
response = br.open(url % url_path)
htmldump(filename, response.read())
pages = pages_to_validate()
begin_download(pages)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment