Skip to content

Instantly share code, notes, and snippets.

@masroore
Created March 13, 2023 05:20
Show Gist options
  • Save masroore/f93cc1aa5681acd29e0d26c19b298e70 to your computer and use it in GitHub Desktop.
Save masroore/f93cc1aa5681acd29e0d26c19b298e70 to your computer and use it in GitHub Desktop.
Download Multiple Files Concurrently in Python
import requests
from multiprocessing.pool import ThreadPool
def download_url(url):
print("downloading: ",url)
# assumes that the last segment after the / represents the file name
# if url is abc/xyz/file.txt, the file name will be file.txt
file_name_start_pos = url.rfind("/") + 1
file_name = url[file_name_start_pos:]
r = requests.get(url, stream=True)
if r.status_code == requests.codes.ok:
with open(file_name, 'wb') as f:
for data in r:
f.write(data)
return url
urls = ["https://jsonplaceholder.typicode.com/posts",
"https://jsonplaceholder.typicode.com/comments",
"https://jsonplaceholder.typicode.com/photos",
"https://jsonplaceholder.typicode.com/todos",
"https://jsonplaceholder.typicode.com/albums"
]
# Run 5 multiple threads. Each call will take the next element in urls list
results = ThreadPool(5).imap_unordered(download_url, urls)
for r in results:
print(r)
@masroore
Copy link
Author

from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool

def download_url(args):
    t0 = time.time()
    url, fn = args[0], args[1]
    try:
        r = requests.get(url)
        with open(fn, 'wb') as f:
            f.write(r.content)
        return(url, time.time() - t0)
    except Exception as e:
        print('Exception in download_url():', e)

def download_parallel(args):
    cpus = cpu_count()
    results = ThreadPool(cpus - 1).imap_unordered(download_url, args)
    for result in results:
        print('url:', result[0], 'time (s):', result[1])

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment