teshanshanuka/download.py

## download.py
# Author: Teshan Liyanage <teshanuka@gmail.com>
import os
from urllib.parse import urlparse
import requests
import re


def is_valid_url(x):
    try:
        result = urlparse(x)
        return all([result.scheme, result.netloc])
    except:
        return False


def download(url: str,
             download_dir: str = '.',
             default_extension: str = None,
             overwrite: bool = False,
             exists_ok: bool = False,
             chunk_size: int = 1024*1024) -> str:
    """Download file from url and return downloaded file path

    Args:
        url: Download url
        download_dir: Download directory
        default_extension: File extension if url does not provide an extension
        overwrite: Whether to overwrite if the file exists
        exists_ok: Do nothing if overwrite is false and file exists
        chunk_size: Chunk size to download at a time
    Returns:
        Download file path and md5 hash of the file

    Raises:
        RuntimeError: If download request fails
        FileExistsError: If file exists and overwrite is false
    """
    if not is_valid_url(url):
        raise RuntimeError(f"'{url}' is not a valid url")

    if not os.path.exists(download_dir):
        os.makedirs(download_dir)

    r = requests.get(url, stream=True)

    if not r.ok:
        raise RuntimeError(f"Download request returned status code {r.status_code}")

    file_name = ''
    if 'content-disposition' in r.headers:
        try:
            file_name = re.findall("filename=(.+)", r.headers['content-disposition'])[0]
        except IndexError:
            pass

    if not file_name:
        o = urlparse(url)
        file_name = o.path.split('/')[-1]
        if '.' not in file_name:  # seems like not a file name (no extension)
            if default_extension is not None:
                file_name += default_extension

    outfile = os.path.join(download_dir, file_name)
    if os.path.isfile(outfile) and not overwrite:
        if exists_ok:
            return outfile
        else:
            raise FileExistsError(f"Download file '{outfile}' exists")

    with open(outfile, "wb") as fp:
        for chunk in r.iter_content(chunk_size=chunk_size):
            # writing one chunk at a time to file
            if chunk:
                fp.write(chunk)
    return outfile
	# Author: Teshan Liyanage <teshanuka@gmail.com>
	import os
	from urllib.parse import urlparse
	import requests
	import re


	def is_valid_url(x):
	try:
	result = urlparse(x)
	return all([result.scheme, result.netloc])
	except:
	return False


	def download(url: str,
	download_dir: str = '.',
	default_extension: str = None,
	overwrite: bool = False,
	exists_ok: bool = False,
	chunk_size: int = 1024*1024) -> str:
	"""Download file from url and return downloaded file path

	Args:
	url: Download url
	download_dir: Download directory
	default_extension: File extension if url does not provide an extension
	overwrite: Whether to overwrite if the file exists
	exists_ok: Do nothing if overwrite is false and file exists
	chunk_size: Chunk size to download at a time
	Returns:
	Download file path and md5 hash of the file

	Raises:
	RuntimeError: If download request fails
	FileExistsError: If file exists and overwrite is false
	"""
	if not is_valid_url(url):
	raise RuntimeError(f"'{url}' is not a valid url")

	if not os.path.exists(download_dir):
	os.makedirs(download_dir)

	r = requests.get(url, stream=True)

	if not r.ok:
	raise RuntimeError(f"Download request returned status code {r.status_code}")

	file_name = ''
	if 'content-disposition' in r.headers:
	try:
	file_name = re.findall("filename=(.+)", r.headers['content-disposition'])[0]
	except IndexError:
	pass

	if not file_name:
	o = urlparse(url)
	file_name = o.path.split('/')[-1]
	if '.' not in file_name: # seems like not a file name (no extension)
	if default_extension is not None:
	file_name += default_extension

	outfile = os.path.join(download_dir, file_name)
	if os.path.isfile(outfile) and not overwrite:
	if exists_ok:
	return outfile
	else:
	raise FileExistsError(f"Download file '{outfile}' exists")

	with open(outfile, "wb") as fp:
	for chunk in r.iter_content(chunk_size=chunk_size):
	# writing one chunk at a time to file
	if chunk:
	fp.write(chunk)
	return outfile