Skip to content

Instantly share code, notes, and snippets.

@teshanshanuka
Last active August 16, 2021 12:11
Show Gist options
  • Save teshanshanuka/01f79074e89d68678705f017c8c2e2f9 to your computer and use it in GitHub Desktop.
Save teshanshanuka/01f79074e89d68678705f017c8c2e2f9 to your computer and use it in GitHub Desktop.
A better download function for Python
# Author: Teshan Liyanage <teshanuka@gmail.com>
import os
from urllib.parse import urlparse
import requests
import re
def is_valid_url(x):
try:
result = urlparse(x)
return all([result.scheme, result.netloc])
except:
return False
def download(url: str,
download_dir: str = '.',
default_extension: str = None,
overwrite: bool = False,
exists_ok: bool = False,
chunk_size: int = 1024*1024) -> str:
"""Download file from url and return downloaded file path
Args:
url: Download url
download_dir: Download directory
default_extension: File extension if url does not provide an extension
overwrite: Whether to overwrite if the file exists
exists_ok: Do nothing if overwrite is false and file exists
chunk_size: Chunk size to download at a time
Returns:
Download file path and md5 hash of the file
Raises:
RuntimeError: If download request fails
FileExistsError: If file exists and overwrite is false
"""
if not is_valid_url(url):
raise RuntimeError(f"'{url}' is not a valid url")
if not os.path.exists(download_dir):
os.makedirs(download_dir)
r = requests.get(url, stream=True)
if not r.ok:
raise RuntimeError(f"Download request returned status code {r.status_code}")
file_name = ''
if 'content-disposition' in r.headers:
try:
file_name = re.findall("filename=(.+)", r.headers['content-disposition'])[0]
except IndexError:
pass
if not file_name:
o = urlparse(url)
file_name = o.path.split('/')[-1]
if '.' not in file_name: # seems like not a file name (no extension)
if default_extension is not None:
file_name += default_extension
outfile = os.path.join(download_dir, file_name)
if os.path.isfile(outfile) and not overwrite:
if exists_ok:
return outfile
else:
raise FileExistsError(f"Download file '{outfile}' exists")
with open(outfile, "wb") as fp:
for chunk in r.iter_content(chunk_size=chunk_size):
# writing one chunk at a time to file
if chunk:
fp.write(chunk)
return outfile
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment