Skip to content

Instantly share code, notes, and snippets.

@averne
Created March 15, 2019 04:15
Show Gist options
  • Save averne/7b76254c29f0986c52235d4f509df5bd to your computer and use it in GitHub Desktop.
Save averne/7b76254c29f0986c52235d4f509df5bd to your computer and use it in GitHub Desktop.
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import os, sys
import threading, queue
import requests
from tqdm import tqdm
BUF_SIZE = 0x1000
TIMEOUT = 3
THREAD_NB = 5
MAX_RETRIES = 5
make_request = lambda method, url, **kwargs: requests.request(method, url, stream=True, **kwargs)
def worker(k, l, q, url, start, end, fp, **kwargs):
downloaded = 0
r = make_request('GET', url, headers={'Range': 'bytes=%d-%d' % (start, end)}, **kwargs)
for buf in r.iter_content(BUF_SIZE):
if k.is_set():
break
l.acquire()
fp.seek(start + downloaded)
fp.write(buf)
l.release()
downloaded += len(buf)
q.put(len(buf))
def threaded_download(url, dest, retries_count=0, **kwargs):
name = os.path.basename(dest)
os.makedirs(os.path.dirname(dest), exist_ok=True)
fp = open(dest, 'wb')
r = make_request('HEAD', url, **kwargs)
if ('Accept-Ranges' not in r.headers) or (r.headers['Accept-Ranges'] == 'none'):
raise requests.exceptions.RequestException('Content range requesting is not supported')
size = int(r.headers['Content-Length'])
offsets = ((n * (size // THREAD_NB),
(n + 1) * (size // THREAD_NB) - 1
if (n < THREAD_NB - 1)
else size)
for n in range(THREAD_NB))
threads = []
pbar = tqdm(total=size, desc=name, unit='B', unit_scale=True, leave=False)
k = threading.Event() # Used to kill threads
l = threading.Lock() # Used to ensure file writes thread-safety
q = queue.Queue() # Used to get downloaded byte count
for start, end in offsets:
t = threading.Thread(target=worker, args=(k, l, q, url, start, end, fp), kwargs=kwargs)
t.daemon = True
t.start()
threads.append(t)
try:
while True:
pbar.update(q.get(timeout=TIMEOUT))
except queue.Empty:
downloaded = pbar.n
exit_flag = False
except (KeyboardInterrupt, SystemExit):
exit_flag = True
pbar.close()
k.set()
for t in threads:
t.join()
fp.close()
if exit_flag:
sys.exit(exit_flag)
if (downloaded != size) and (retries_count < MAX_RETRIES):
print('Size mismatch, restarting download...')
threaded_download(url, dest, retries_count=retries_count+1, **kwargs)
elif downloaded != size:
raise requests.exceptions.RetryError('Size mismatch, but max retries count exceeded, exiting...')
if not retries_count:
print('Saved to %s!' % dest)
if __name__ == '__main__':
threaded_download(input('Enter url: '), input('Enter destination: '))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment