Skip to content

Instantly share code, notes, and snippets.

@tzengerink
Last active December 10, 2015 13:58
Show Gist options
  • Save tzengerink/4443900 to your computer and use it in GitHub Desktop.
Save tzengerink/4443900 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
BROWSER
-------
Get internet pages, send post requests, inspect HTML using BeautifulSoup
or simply check response codes of an URL.
Example usage:
# Create new Browser instance
browser = Browser()
# Login (or simply post data) to a website
browser.post('http://url.to/login', {'username':'john',
'password':'pass123'})
# Get page contents
html = browser.get('http://url.to/password-protected-page')
# Get page contents as BeautifulSoup
soup = browser.soup('http://url.to/password-protected-page')
# Get the response code for a given URL
code = browser.code('http://url.to/password-protected-page')
Copyright (c) 2013 T. Zengerink
Licensed under MIT License.
See: https://gist.github.com/raw/3151357/6806e68cb9cc0042b265f25be9bc25dd39f75267/LICENSE.md
"""
import cookielib, sys, urllib, urllib2
from BeautifulSoup import BeautifulSoup
class Browser:
def __init__(self):
self.cookie_jar = cookielib.CookieJar()
self.cookie_proc = urllib2.HTTPCookieProcessor(self.cookie_jar)
self.opener = urllib2.build_opener(self.cookie_proc)
def code(self, url):
"""
Get the HTTP response code for a given URL.
url -- URL to fetch and get code from.
"""
try:
urllib2.urlopen(urllib2.Request(url))
except urllib2.URLError, e:
return e.code
return 200
def get(self, url):
"""
Get the given URL and return the contents.
url -- URL to fetch.
"""
return self.opener.open(url).read()
def post(self, url, data):
"""
Post data to a given URL.
url -- URL to post to.
"""
return self.opener.open(url, urllib.urlencode(data))
def soup(self, url):
"""
Get BeatifulSoup from the page contents.
url -- URL to fetch.
"""
return BeautifulSoup(self.get(url))
def main(args):
browser = Browser()
for arg in args:
print browser.get(arg)
if __name__ == "__main__":
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment