Skip to content

Instantly share code, notes, and snippets.

Last active December 10, 2015 13:58
Show Gist options
  • Save tzengerink/4443900 to your computer and use it in GitHub Desktop.
Save tzengerink/4443900 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
Get internet pages, send post requests, inspect HTML using BeautifulSoup
or simply check response codes of an URL.
Example usage:
# Create new Browser instance
browser = Browser()
# Login (or simply post data) to a website'', {'username':'john',
# Get page contents
html = browser.get('')
# Get page contents as BeautifulSoup
soup = browser.soup('')
# Get the response code for a given URL
code = browser.code('')
Copyright (c) 2013 T. Zengerink
Licensed under MIT License.
import cookielib, sys, urllib, urllib2
from BeautifulSoup import BeautifulSoup
class Browser:
def __init__(self):
self.cookie_jar = cookielib.CookieJar()
self.cookie_proc = urllib2.HTTPCookieProcessor(self.cookie_jar)
self.opener = urllib2.build_opener(self.cookie_proc)
def code(self, url):
Get the HTTP response code for a given URL.
url -- URL to fetch and get code from.
except urllib2.URLError, e:
return e.code
return 200
def get(self, url):
Get the given URL and return the contents.
url -- URL to fetch.
def post(self, url, data):
Post data to a given URL.
url -- URL to post to.
return, urllib.urlencode(data))
def soup(self, url):
Get BeatifulSoup from the page contents.
url -- URL to fetch.
return BeautifulSoup(self.get(url))
def main(args):
browser = Browser()
for arg in args:
print browser.get(arg)
if __name__ == "__main__":
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment