Skip to content

Instantly share code, notes, and snippets.

@hanksims
Created April 2, 2015 16:09
Show Gist options
  • Save hanksims/630e186e76c61316a4d8 to your computer and use it in GitHub Desktop.
Save hanksims/630e186e76c61316a4d8 to your computer and use it in GitHub Desktop.
import mechanize
import cookielib
from BeautifulSoup import BeautifulSoup
URL = 'http://leginfo.legislature.ca.gov/faces/codes.xhtml'
class Browser(object):
def __init__(self, section, code):
br = mechanize.Browser()
cj = cookielib.LWPCookieJar()
br.set_cookiejar(cj)
br.set_handle_equiv(True)
br.set_handle_gzip(True)
br.set_handle_redirect(True)
br.set_handle_referer(True)
br.set_handle_robots(False)
br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
br.addheaders = [('User-agent','Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1') ]
self.browser = br
self.browser.open(URL)
self.browser.select_form(name='expertsearchformid')
self.browser['expertsearchformid:toctextcodeid'] = [section]
self.browser['expertsearchformid:toctextsectionid'] = str(code)
self.browser.submit()
self.response = self.browser.response().read()
self.soup = BeautifulSoup(self.response)
self.text = self.soup.find('div', {'id': 'codeLawSectionNoHead'}).prettify()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment