Created
April 22, 2023 15:41
-
-
Save hmhard/56d13c8c9f3d7cf5f4a0c38220b48b86 to your computer and use it in GitHub Desktop.
Parallel image downloading gist for python users
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
from multiprocessing import Process | |
from PIL import Image | |
from time import time as timer | |
import urllib | |
from threading import Thread | |
def fetch_url(entry): | |
try: | |
path='storage_path' | |
url='base_url' | |
uri =entry | |
#urllib.urlretrieve(url+uri, entry) | |
#print(str(url+uri)) | |
r = requests.get(url+uri, stream=True) | |
print(r.status_code) | |
if r.status_code == 200: | |
with open(path+uri, 'wb') as f: | |
f.write(r.content) | |
except Exception as e: | |
print(e) | |
def fetch_photo(names): | |
for name in names: | |
fetch_url(name) | |
def start(): | |
fil=open('list of entries.txt','r') | |
mylist=['.JPG','.jpg'] | |
name_list=[] | |
i=0 | |
start=timer() | |
threads=[] | |
for f in fil.readlines(): | |
if f !='NULL\n': | |
name_list.append(f.rstrip('\n')) | |
if len(name_list)>=5000: | |
thread=Process(target=fetch_photo, args=(name_list,)) | |
name_list=[] | |
threads.append(thread) | |
i=i+1 | |
# print(str(f)) | |
#fetch_url(f.rstrip('\n')) | |
#results = ThreadPool(8).imap_unordered(fetch_url, f.rstrip('\n')) | |
print(len(threads)) | |
for thread in threads: | |
print('starting') | |
thread.start() | |
for thread in threads: | |
thread.join() | |
#results = ThreadPool(8).imap_unordered(fetch_url, urls) | |
print(f"Elapsed Time: {timer() - start}") | |
if __name__ == "__main__": | |
start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment