Last active
May 28, 2019 08:14
-
-
Save mazzzystar/f1dfb32b2a23aac8e386fa742806562d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# First, you should install flickrapi | |
# pip install flickrapi | |
import flickrapi | |
import urllib.request | |
import threading, queue, time | |
from PIL import Image | |
# Adapted from https://gist.github.com/yunjey/14e3a069ad2aa3adf72dee93a53117d6 | |
# Changed to the multi-threads version. | |
# Flickr api access key | |
flickr = flickrapi.FlickrAPI('c6a2c45591d4973ff525042472446ca2', '202ffe6f387ce29b', cache=True) | |
keyword = 'cat' | |
photos = flickr.walk(text=keyword, | |
extras='url_c', | |
per_page=500, # may be you can try different numbers.. | |
sort='relevance') | |
urls = [] | |
for i, photo in enumerate(photos): | |
print(i) | |
url = photo.get('url_c') | |
if url and url.startswith('https'): | |
urls.append(url) | |
# get 50 urls | |
# if i > 50: | |
# break | |
print("Total usable urls: {}".format(len(urls))) | |
urlQueue = queue.Queue() | |
for url in urls: | |
urlQueue.put(url) | |
print(urls) | |
f = open('urls.txt', 'w') | |
for url in urls: | |
f.write(url.strip() + '\n') | |
f.close() | |
def fetch_url(urlQueue): | |
while True: | |
try: | |
url = urlQueue.get_nowait() | |
i = urlQueue.qsize() | |
except Exception as e: | |
break | |
# print('Current Thread Name %s, Url: %s ' % (threading.currentThread().name, url)) | |
try: | |
name = url.strip().split('/')[-1][:-4] | |
urllib.request.urlretrieve(url, 'images/{}.jpg'.format(name)) | |
except Exception as e: | |
continue | |
start = time.time() | |
threads = [] | |
threadNum = 8 | |
for i in range(0, threadNum): | |
t = threading.Thread(target=fetch_url, args=(urlQueue,)) | |
threads.append(t) | |
for t in threads: | |
t.start() | |
for t in threads: | |
t.join() | |
print("Elapsed Time: %s" % (time.time() - start)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment