Skip to content

Instantly share code, notes, and snippets.

@dopuskh3
Created April 30, 2010 16:27
import os
import logging
import urllib2
import cookielib
class WebDownloader(object):
def __init__(self):
self.cookiemgr = cookielib.LWPCookieJar()
self.cookiefile = 'cookies.dat'
self.cookie_opener = None
def __try_load_cookies(self):
if os.path.isfile(self.cookiefile):
try:
self.cookiemgr.load(self.cookiefile)
for index, cookie in enumerate(self.cookiemgr):
logging.info("Loaded cookie %d : %s" % (index, cookie))
except:
logging.critical("Bad cookie file %s"%self.cookiefile)
def __store_cookies(self):
if self.cookiemgr:
for index, cookie in enumerate(self.cookiemgr):
logging.info("Fetched cookie %d : %s" % (index, cookie))
self.cookiemgr.save(self.cookiefile)
def __build_opener(self):
self.cookie_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookiemgr))
urllib2.install_opener(self.cookie_opener)
def fetch(self, url):
if not self.cookie_opener:
self.__build_opener()
self.__try_load_cookies()
request = urllib2.Request(url)
html = self.cookie_opener.open(request).read()
self.__store_cookies()
return html
if __name__ == "__main__":
logging.basicConfig(level = logging.DEBUG)
co = WebDownloader()
co.fetch("http://www.amazon.com")
co.fetch("http://www.amazon.com/books-used-books-textbooks/b/ref=sa_menu_bo0/176-6568881-0637038?_encoding=UTF8&node=283155&pf_rd_m=ATVPDKIKX0DER&pf_rd_s=left-nav-1&pf_rd_r=1B7Z07TMEEHTDKKETJ04&pf_rd_t=101&pf_rd_p=328655101&pf_rd_i=507846")
</pre>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment