Example of a chunked downloader using Requests
#!/usr/bin/env python | |
# encoding: utf-8 | |
"""Download one or more URLs with a running progress display""" | |
from __future__ import (absolute_import, division, print_function, | |
unicode_literals) | |
import os | |
import sys | |
from timeit import default_timer | |
if sys.version_info < (3, ): | |
from urllib import quote | |
from urlparse import urlsplit | |
else: | |
from urllib.parse import quote, urlsplit | |
import requests | |
if sys.version_info < (2, 7, 9): | |
print('This script requires Python >= 2.7.9 for the PEP-466 SSL improvements', | |
file=sys.stderr) | |
sys.exit(123) | |
DEFAULT_CHUNK_SIZE = 1024 * 1024 | |
def sizeof_fmt(num, suffix='B'): | |
# https://stackoverflow.com/a/1094933/59984 as an alternative to humanize | |
# or Django's filesizeformat | |
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: | |
if abs(num) < 1024.0: | |
return "%3.1f%s%s" % (num, unit, suffix) | |
num /= 1024.0 | |
return "%.1f%s%s" % (num, 'Yi', suffix) | |
def download_url(url, filename=None, chunk_size=DEFAULT_CHUNK_SIZE): | |
"""Save the contents of a URL to the provided filename""" | |
if filename is None: | |
scheme, netloc, path, query, fragment = urlsplit(url) | |
filename = os.path.basename(path) | |
if not filename: | |
filename = quote(filename, safe="") | |
print('Downloading %s to %s' % (url, filename)) | |
start_time = default_timer() | |
bytes_received = 0 | |
resp = requests.get(url, stream=True, allow_redirects=True) | |
if not resp.ok: | |
print('HTTP %d: %s for %s' % (resp.status_code, resp.reason, url)) | |
return | |
try: | |
with open(filename, 'wb') as f: | |
for chunk in resp.iter_content(chunk_size=chunk_size): | |
f.write(chunk) | |
bytes_received += len(chunk) | |
elapsed = default_timer() - start_time | |
print('Received %s in %0.03f seconds (%s/s)' % ( | |
sizeof_fmt(bytes_received), | |
elapsed, | |
sizeof_fmt(bytes_received / elapsed), | |
)) | |
except Exception as exc: | |
print('Error while retrieving %s to %s: %s' % (url, filename, exc)) | |
os.unlink(filename) | |
return | |
finally: | |
resp.close() | |
if __name__ == '__main__': | |
for url in sys.argv[1:]: | |
download_url(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment