Skip to content

Instantly share code, notes, and snippets.

@WP-LKL
Last active March 28, 2021 14:30
Show Gist options
  • Save WP-LKL/8900138462e6003f3ce7793e64e15ca4 to your computer and use it in GitHub Desktop.
Save WP-LKL/8900138462e6003f3ce7793e64e15ca4 to your computer and use it in GitHub Desktop.
Minimalist parallel image downloading with python. Please consult TOS/robots.txt prior to use.
import concurrent.futures
import urllib.request
import uuid
import requests
def getURLs(filename : str) -> list:
with open(filename, 'r') as f:
urls = f.read().splitlines()
return urls
URLS = getURLs("image_urls.txt")
dir = "img/b01_" # folder/batchPrefix
# Retrieve a single url and download its image
def load_url(url):
response = requests.get(url)
# ./dir/BatchPrefix_uuid.jpg
file = open(dir+uuid.uuid4().hex[:7]+".jpg", "wb")
file.write(response.content)
file.close()
with concurrent.futures.ThreadPoolExecutor(max_workers=None) as executor:
future_to_url = {executor.submit(load_url, url): url for url in URLS}
concurrent.futures.as_completed(future_to_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment