Skip to content

Instantly share code, notes, and snippets.

@acdha
Created January 19, 2018 18:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save acdha/669d36f57454a1e576756acfaf9b2a24 to your computer and use it in GitHub Desktop.
Save acdha/669d36f57454a1e576756acfaf9b2a24 to your computer and use it in GitHub Desktop.
Example of a chunked downloader using Requests
#!/usr/bin/env python
# encoding: utf-8
"""Download one or more URLs with a running progress display"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
import os
import sys
from timeit import default_timer
if sys.version_info < (3, ):
from urllib import quote
from urlparse import urlsplit
else:
from urllib.parse import quote, urlsplit
import requests
if sys.version_info < (2, 7, 9):
print('This script requires Python >= 2.7.9 for the PEP-466 SSL improvements',
file=sys.stderr)
sys.exit(123)
DEFAULT_CHUNK_SIZE = 1024 * 1024
def sizeof_fmt(num, suffix='B'):
# https://stackoverflow.com/a/1094933/59984 as an alternative to humanize
# or Django's filesizeformat
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def download_url(url, filename=None, chunk_size=DEFAULT_CHUNK_SIZE):
"""Save the contents of a URL to the provided filename"""
if filename is None:
scheme, netloc, path, query, fragment = urlsplit(url)
filename = os.path.basename(path)
if not filename:
filename = quote(filename, safe="")
print('Downloading %s to %s' % (url, filename))
start_time = default_timer()
bytes_received = 0
resp = requests.get(url, stream=True, allow_redirects=True)
if not resp.ok:
print('HTTP %d: %s for %s' % (resp.status_code, resp.reason, url))
return
try:
with open(filename, 'wb') as f:
for chunk in resp.iter_content(chunk_size=chunk_size):
f.write(chunk)
bytes_received += len(chunk)
elapsed = default_timer() - start_time
print('Received %s in %0.03f seconds (%s/s)' % (
sizeof_fmt(bytes_received),
elapsed,
sizeof_fmt(bytes_received / elapsed),
))
except Exception as exc:
print('Error while retrieving %s to %s: %s' % (url, filename, exc))
os.unlink(filename)
return
finally:
resp.close()
if __name__ == '__main__':
for url in sys.argv[1:]:
download_url(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment