Skip to content

Instantly share code, notes, and snippets.

@hmhard
Created April 22, 2023 15:41
Show Gist options
  • Save hmhard/56d13c8c9f3d7cf5f4a0c38220b48b86 to your computer and use it in GitHub Desktop.
Save hmhard/56d13c8c9f3d7cf5f4a0c38220b48b86 to your computer and use it in GitHub Desktop.
Parallel image downloading gist for python users
import os
import requests
from multiprocessing import Process
from PIL import Image
from time import time as timer
import urllib
from threading import Thread
def fetch_url(entry):
try:
path='storage_path'
url='base_url'
uri =entry
#urllib.urlretrieve(url+uri, entry)
#print(str(url+uri))
r = requests.get(url+uri, stream=True)
print(r.status_code)
if r.status_code == 200:
with open(path+uri, 'wb') as f:
f.write(r.content)
except Exception as e:
print(e)
def fetch_photo(names):
for name in names:
fetch_url(name)
def start():
fil=open('list of entries.txt','r')
mylist=['.JPG','.jpg']
name_list=[]
i=0
start=timer()
threads=[]
for f in fil.readlines():
if f !='NULL\n':
name_list.append(f.rstrip('\n'))
if len(name_list)>=5000:
thread=Process(target=fetch_photo, args=(name_list,))
name_list=[]
threads.append(thread)
i=i+1
# print(str(f))
#fetch_url(f.rstrip('\n'))
#results = ThreadPool(8).imap_unordered(fetch_url, f.rstrip('\n'))
print(len(threads))
for thread in threads:
print('starting')
thread.start()
for thread in threads:
thread.join()
#results = ThreadPool(8).imap_unordered(fetch_url, urls)
print(f"Elapsed Time: {timer() - start}")
if __name__ == "__main__":
start()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment