DakuTree/download_multicon.py

## download_multicon.py
#!/usr/bin/env python3
# Example of using concurrent.futures with the Range HTTP header for faster downloading.
# This requires the server you are download from supports the Range header.
# Couldn't find any clear solution for how to do this, so I wrote one up myself.
#
# Here we are using equal sized chunks representive of `(chunkSize, _) = divmod(contentLength, CONNECTIONS)`
# In my own testing:
#  - Size =< 10MB~ is slower than a single connection.
#  - Size  > 10MB~ is faster than a single connection.
#
# I'm assuming we could use variable connections & chunkSize to improve speeds in some cases, but that is beyond my current knowledge. Feel free to improve where needed.

from concurrent import futures
import requests
import time

from math import floor

URL = "http://ipv4.download.thinkbroadband.com/100MB.zip" # https://www.thinkbroadband.com/download for test files
CONNECTIONS = 8
DATACHUNKS = {}

def main():
    head = requests.head(URL)
    contentLength = int(head.headers['Content-Length'])

    (chunkSize, _) = divmod(contentLength, CONNECTIONS)
    with futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as e:
        downloads = []
        for i in range(1, CONNECTIONS):
            time.sleep(0.20) # Delay between requests

            # There is probably a nicer way to doing all this logic with Python, but I couldn't find any..
            (chunkStart, chunkEnd) = (((chunkSize * i) - chunkSize), (chunkSize * i))
            if not i == 1:
                chunkStart += i-1
                chunkEnd   += i-1

            downloads.append(e.submit(downloadChunk, URL, chunkStart, chunkEnd))

        downloads.append(e.submit(downloadChunk, URL, (((CONNECTIONS - 1) * chunkSize) + (CONNECTIONS - 1)), contentLength))

        futures.wait(downloads)
        print("All chunks downloaded, dumping to file.")

        filename = URL.split('/')[-1] # NOTE: This will not work with URLs using a Content-Disposition filename header
        with open(filename, 'wb') as f:
            f.write(b"".join(v for (k,v) in sorted(DATACHUNKS.items())))

def downloadChunk(url, bytes_start, bytes_end):
    print(f"Downloading chunk {bytes_start} - {bytes_end}")

    try:
        r = requests.get(url, headers={"range" : f"bytes={bytes_start}-{bytes_end}"})
        r.raise_for_status()
        DATACHUNKS[bytes_start] = r.content

    except requests.exceptions.RequestException as e:
        print(e)
        # sys.exit(1)


if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	# Example of using concurrent.futures with the Range HTTP header for faster downloading.
	# This requires the server you are download from supports the Range header.
	# Couldn't find any clear solution for how to do this, so I wrote one up myself.
	#
	# Here we are using equal sized chunks representive of `(chunkSize, _) = divmod(contentLength, CONNECTIONS)`
	# In my own testing:
	# - Size =< 10MB~ is slower than a single connection.
	# - Size > 10MB~ is faster than a single connection.
	#
	# I'm assuming we could use variable connections & chunkSize to improve speeds in some cases, but that is beyond my current knowledge. Feel free to improve where needed.

	from concurrent import futures
	import requests
	import time

	from math import floor

	URL = "http://ipv4.download.thinkbroadband.com/100MB.zip" # https://www.thinkbroadband.com/download for test files
	CONNECTIONS = 8
	DATACHUNKS = {}

	def main():
	head = requests.head(URL)
	contentLength = int(head.headers['Content-Length'])

	(chunkSize, _) = divmod(contentLength, CONNECTIONS)
	with futures.ThreadPoolExecutor(max_workers=CONNECTIONS) as e:
	downloads = []
	for i in range(1, CONNECTIONS):
	time.sleep(0.20) # Delay between requests

	# There is probably a nicer way to doing all this logic with Python, but I couldn't find any..
	(chunkStart, chunkEnd) = (((chunkSize * i) - chunkSize), (chunkSize * i))
	if not i == 1:
	chunkStart += i-1
	chunkEnd += i-1

	downloads.append(e.submit(downloadChunk, URL, chunkStart, chunkEnd))

	downloads.append(e.submit(downloadChunk, URL, (((CONNECTIONS - 1) * chunkSize) + (CONNECTIONS - 1)), contentLength))

	futures.wait(downloads)
	print("All chunks downloaded, dumping to file.")

	filename = URL.split('/')[-1] # NOTE: This will not work with URLs using a Content-Disposition filename header
	with open(filename, 'wb') as f:
	f.write(b"".join(v for (k,v) in sorted(DATACHUNKS.items())))

	def downloadChunk(url, bytes_start, bytes_end):
	print(f"Downloading chunk {bytes_start} - {bytes_end}")

	try:
	r = requests.get(url, headers={"range" : f"bytes={bytes_start}-{bytes_end}"})
	r.raise_for_status()
	DATACHUNKS[bytes_start] = r.content

	except requests.exceptions.RequestException as e:
	print(e)
	# sys.exit(1)


	if __name__ == "__main__":
	main()