Skip to content

Instantly share code, notes, and snippets.

/redl.py Secret

Created August 26, 2016 23:48
Show Gist options
  • Save anonymous/8fe3b59aba80a3094518bf60d03253db to your computer and use it in GitHub Desktop.
Save anonymous/8fe3b59aba80a3094518bf60d03253db to your computer and use it in GitHub Desktop.
from urllib import request
import argparse
import os
import time
def report_hook(downloaded, total_size, current_rate, mean_rate, *args):
# print("\r" + " " * 80, end="")
print("\rSize: %.2f/%.2f MB. Current: %.2f KB/sec. Mean: %.2f KB/sec" % (
downloaded / 1024 / 1024, total_size / 1024 / 1024,
current_rate / 1024, mean_rate / 1024), end="")
def redl(url, path, chunk_size=16384, threshold=0.8, delay=1, overwrite=False, report_hook=None):
bytes_ready = 0
if overwrite:
open(path, 'wb').close()
elif os.path.isfile(path):
bytes_ready = os.path.getsize(path)
resp = request.urlopen(url)
total_size = int(resp.getheader('Content-Length'))
finished = bytes_ready >= total_size
start_time = time.time()
session_bytes = 0 # to properly report if the file existed
while not finished:
data = b''
req = request.Request(url)
req.add_header('Range', 'bytes=%d-' % bytes_ready)
resp = request.urlopen(req)
peak, counter = 1, 0
current_request_time = time.time()
current_request_bytes = 0
while True:
chunk = resp.read(chunk_size)
data += chunk
l = len(chunk)
bytes_ready += l
current_request_bytes += l
session_bytes += l
if not chunk:
finished = True
break
passed = time.time() - current_request_time
current_rate = current_request_bytes / passed
mean_rate = session_bytes / (time.time() - start_time)
if report_hook:
report_hook(bytes_ready, total_size, current_rate, mean_rate, session_bytes)
if current_rate / peak > 1.05: # ignore small fluctuations
peak = current_rate
if current_rate / peak < threshold and passed > delay:
break
with open(path, 'ab') as f:
f.write(data)
return bytes_ready
if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument(dest='url', action="store",
help="Direct file URL.")
parser.add_argument("-o", "--output", action="store", dest='output',
help="Path to save the file to.", required=True)
parser.add_argument("-f", "--full", action="store_true", dest='complete',
default=False, help="Wait until the download is complete before writing to disk.")
parser.add_argument("-c", "--chunk", action="store", dest='chunk',
type=int, help="Chunk size.", default=16384)
parser.add_argument("-w", "--overwrite", action="store_true", dest='overwrite',
default=False, help="Ignore the data on disk and start anew.")
parser.add_argument("-t", "--threshold", action="store", dest='threshold',
type=float, help=("Download rate threshold relative to the max upon "
"reaching which the download will be restarted. You should play with this "
"value if either the process is restarting too often or the rate is "
"taking too long to drop below the threshold. To a certain extent, higher "
"threshold usually means higher mean transfer rate, provided the "
"latencies are low enough."), default=0.8)
parser.add_argument("-d", "--delay", action="store", dest='delay',
type=float, help=("Minimum delay before a request can be deemed too slow, in seconds. "
"Helps ignore initial rate changes at the starts."), default=1)
args = parser.parse_args()
redl(args.url, args.output, args.chunk, args.threshold, args.delay, args.overwrite, report_hook=report_hook)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment