Skip to content

Instantly share code, notes, and snippets.

@mazzzystar
Last active May 28, 2019 08:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mazzzystar/f1dfb32b2a23aac8e386fa742806562d to your computer and use it in GitHub Desktop.
Save mazzzystar/f1dfb32b2a23aac8e386fa742806562d to your computer and use it in GitHub Desktop.
# First, you should install flickrapi
# pip install flickrapi
import flickrapi
import urllib.request
import threading, queue, time
from PIL import Image
# Adapted from https://gist.github.com/yunjey/14e3a069ad2aa3adf72dee93a53117d6
# Changed to the multi-threads version.
# Flickr api access key
flickr = flickrapi.FlickrAPI('c6a2c45591d4973ff525042472446ca2', '202ffe6f387ce29b', cache=True)
keyword = 'cat'
photos = flickr.walk(text=keyword,
extras='url_c',
per_page=500, # may be you can try different numbers..
sort='relevance')
urls = []
for i, photo in enumerate(photos):
print(i)
url = photo.get('url_c')
if url and url.startswith('https'):
urls.append(url)
# get 50 urls
# if i > 50:
# break
print("Total usable urls: {}".format(len(urls)))
urlQueue = queue.Queue()
for url in urls:
urlQueue.put(url)
print(urls)
f = open('urls.txt', 'w')
for url in urls:
f.write(url.strip() + '\n')
f.close()
def fetch_url(urlQueue):
while True:
try:
url = urlQueue.get_nowait()
i = urlQueue.qsize()
except Exception as e:
break
# print('Current Thread Name %s, Url: %s ' % (threading.currentThread().name, url))
try:
name = url.strip().split('/')[-1][:-4]
urllib.request.urlretrieve(url, 'images/{}.jpg'.format(name))
except Exception as e:
continue
start = time.time()
threads = []
threadNum = 8
for i in range(0, threadNum):
t = threading.Thread(target=fetch_url, args=(urlQueue,))
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
print("Elapsed Time: %s" % (time.time() - start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment