Skip to content

Instantly share code, notes, and snippets.

@adriaant
Last active December 18, 2015 22:48
Show Gist options
  • Save adriaant/5856786 to your computer and use it in GitHub Desktop.
Save adriaant/5856786 to your computer and use it in GitHub Desktop.
import sys
import os
import zlib
from urllib2 import (build_opener, HTTPPasswordMgrWithDefaultRealm,
HTTPBasicAuthHandler, HTTPDigestAuthHandler, HTTPError, URLError)
from httplib import BadStatusLine
class HTTPHandlerError(Exception):
"""
Exceptions related to the use of HTTPHandler.
"""
def __init__(self, httpurl=None, httpcode=None, httpmsg=None, httpheaders=None, httpdata=None):
Exception.__init__(self)
self.url = httpurl or ''
self.code = httpcode or 600
self.message = httpmsg or ''
self.headers = httpheaders or {}
self.data = httpdata or ''
def __repr__(self):
return '<HTTPHandlerError %d, %s>' % (self.code, self.message)
def __str__(self):
return 'HTTPHandlerError %d: %s' % (self.code, self.message)
def __unicode__(self):
return 'HTTPHandlerError %d: %s' % (self.code, self.message)
class HTTPHandler(object):
"""
An HTTP download handler with support for basic auth
"""
READ_BLOCK_SIZE = 1024*8
def __init__(self):
# http://bugs.python.org/issue9639
if sys.version_info[:2] == (2, 6) and sys.version_info[2] >= 6:
def fixed_http_error_401(self, req, fp, code, msg, headers):
url = req.get_full_url()
response = self.http_error_auth_reqed('www-authenticate',
url, req, headers)
self.retried = 0
return response
HTTPBasicAuthHandler.http_error_401 = fixed_http_error_401
self.http_opener = build_opener()
def set_authentication(self, uri, login, password):
password_manager = HTTPPasswordMgrWithDefaultRealm()
password_manager.add_password(realm=None, uri=uri, user=login, passwd=password)
self.http_opener = build_opener(HTTPBasicAuthHandler(password_manager), HTTPDigestAuthHandler(password_manager))
self.http_opener.addheaders = [('Accept-encoding', 'gzip,deflate')]
def download(self, url, path, timeout=30):
try:
if (sys.version_info[0] == 2 and sys.version_info[1] > 5) or sys.version_info[0] > 2:
response = self.http_opener.open(url, timeout=timeout)
else:
response = self.http_opener.open(url)
except HTTPError as error:
if error.fp is None:
raise HTTPHandlerError(error.filename, error.code, error.msg, dict(error.hdrs))
else:
raise HTTPHandlerError(error.filename, error.code, error.msg, dict(error.hdrs), error.read())
except URLError as error:
# Try to get the tuple arguments of URLError
if hasattr(error.reason, 'args') and isinstance(error.reason.args, tuple) and len(error.reason.args) == 2:
raise HTTPHandlerError(httpcode=error.reason.args[0], httpmsg=error.reason.args[1])
else:
raise HTTPHandlerError(httpmsg='urllib2.URLError: %s' % (error.reason))
except BadStatusLine as error:
raise HTTPHandlerError(httpmsg='httplib.BadStatusLine: %s' % (error.line))
# save to file
file_name = os.path.join(path, url.split('/')[-1])
f = open(file_name, 'wb')
isGZipped = response.headers.get('content-encoding', '').find('gzip') >= 0
if isGZipped:
d = zlib.decompressobj(16+zlib.MAX_WBITS) # this magic number can be inferred from the structure of a gzip file
sys.stdout.write("Downloading: {0}".format(url))
while True:
data = response.read(HTTPHandler.READ_BLOCK_SIZE)
sys.stdout.write('.')
if not data:
break
if isGZipped:
data = d.decompress(data)
f.write(data)
sys.stdout.write("done\n")
f.close()
del data
del response
return file_name
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment