Skip to content

Instantly share code, notes, and snippets.

@Grace-Amondi
Last active July 26, 2022 16:47
Show Gist options
  • Save Grace-Amondi/5d043f91bb03a251bff653a1601a708d to your computer and use it in GitHub Desktop.
Save Grace-Amondi/5d043f91bb03a251bff653a1601a708d to your computer and use it in GitHub Desktop.
Batch File Download
import requests
from requests.models import HTTPBasicAuth
from multiprocessing.pool import ThreadPool
from tqdm import tqdm
def download_url(url):
# get file output name from path
out_name = url.split("/")[-1]
r = requests.get(url, auth=HTTPBasicAuth("username", "password"), stream=True)
# progress bar
total_size_in_bytes= int(r.headers.get('content-length', 0))
block_size = 1024 #1 Kibibyte
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
if r.status_code == requests.codes.ok:
with open("data/"+out_name, 'wb') as f:
for data in r.iter_content(block_size):
progress_bar.update(len(data))
f.write(data)
# progress bar
progress_bar.close()
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
print("ERROR, something went wrong")
return url
with open("link.txt", "r") as f:
links = f.readlines()
dwnld_links = []
for link in links:
link_fmt = link.strip()
dwnld_links.append(link_fmt)
# Run 5 multiple threads. Each call will take the next element in urls list
results = ThreadPool(5).imap_unordered(download_url, dwnld_links)
for r in results:
print(r)
requests==2.26.0
tqdm==4.62.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment