Created
June 7, 2012 08:57
-
-
Save madr/2887556 to your computer and use it in GitHub Desktop.
create local html files from a site behind login using python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Example htmldump_config.py: | |
url = "http://localhost:5000" | |
login = "admin@adeprimo.se" | |
password = "app161770" | |
def pages_to_validate(): | |
pages = [ | |
# event registration | |
('event-start', '/events/start/'), | |
('event-form', '/events/create'), | |
('events', '/events'), | |
('event-edit', '/events/view/'), | |
return pages | |
''' | |
import mechanize | |
import re | |
from htmldump_config import pages_to_validate, url, login, password | |
url = url + "%s" | |
filepattern = "../static/htmldumps/%s.html" | |
print "creating fake browser env" | |
br = mechanize.Browser() | |
print "--- done" | |
print "logging in as admin" | |
br.open(url % "/login") | |
br.select_form(nr=0) | |
br["email"] = login | |
br["password"] = password | |
br.submit() | |
print "--- now logged in as %s" % login | |
def htmldump(name, doc): | |
with open(filepattern % name, "w") as f: | |
f.write(doc) | |
f.close() | |
def begin_download(pages): | |
for filename, url_path in pages: | |
print "downloading: %s" % url_path | |
print " to: %s.html" % filename | |
response = br.open(url % url_path) | |
htmldump(filename, response.read()) | |
pages = pages_to_validate() | |
begin_download(pages) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment