mazzzystar/download_flicker.py

## download_flicker.py
# First, you should install flickrapi
# pip install flickrapi
import flickrapi
import urllib.request
import threading, queue, time
from PIL import Image

# Adapted from https://gist.github.com/yunjey/14e3a069ad2aa3adf72dee93a53117d6
# Changed to the multi-threads version.
# Flickr api access key
flickr = flickrapi.FlickrAPI('c6a2c45591d4973ff525042472446ca2', '202ffe6f387ce29b', cache=True)

keyword = 'cat'

photos = flickr.walk(text=keyword,
                     extras='url_c',
                     per_page=500,  # may be you can try different numbers..
                     sort='relevance')

urls = []
for i, photo in enumerate(photos):
    print(i)

    url = photo.get('url_c')
    if url and url.startswith('https'):
        urls.append(url)

    #  get 50 urls
    # if i > 50:
    #     break

print("Total usable urls: {}".format(len(urls)))

urlQueue = queue.Queue()
for url in urls:
    urlQueue.put(url)

print(urls)
f = open('urls.txt', 'w')
for url in urls:
    f.write(url.strip() + '\n')
f.close()


def fetch_url(urlQueue):
    while True:
        try:
            url = urlQueue.get_nowait()
            i = urlQueue.qsize()
        except Exception as e:
            break
        # print('Current Thread Name %s, Url: %s ' % (threading.currentThread().name, url))
        try:
            name = url.strip().split('/')[-1][:-4]
            urllib.request.urlretrieve(url, 'images/{}.jpg'.format(name))
        except Exception as e:
            continue


start = time.time()
threads = []
threadNum = 8
for i in range(0, threadNum):
    t = threading.Thread(target=fetch_url, args=(urlQueue,))
    threads.append(t)
for t in threads:
    t.start()
for t in threads:
    t.join()
print("Elapsed Time: %s" % (time.time() - start))
	# First, you should install flickrapi
	# pip install flickrapi
	import flickrapi
	import urllib.request
	import threading, queue, time
	from PIL import Image

	# Adapted from https://gist.github.com/yunjey/14e3a069ad2aa3adf72dee93a53117d6
	# Changed to the multi-threads version.
	# Flickr api access key
	flickr = flickrapi.FlickrAPI('c6a2c45591d4973ff525042472446ca2', '202ffe6f387ce29b', cache=True)

	keyword = 'cat'

	photos = flickr.walk(text=keyword,
	extras='url_c',
	per_page=500, # may be you can try different numbers..
	sort='relevance')

	urls = []
	for i, photo in enumerate(photos):
	print(i)

	url = photo.get('url_c')
	if url and url.startswith('https'):
	urls.append(url)

	# get 50 urls
	# if i > 50:
	# break

	print("Total usable urls: {}".format(len(urls)))

	urlQueue = queue.Queue()
	for url in urls:
	urlQueue.put(url)

	print(urls)
	f = open('urls.txt', 'w')
	for url in urls:
	f.write(url.strip() + '\n')
	f.close()


	def fetch_url(urlQueue):
	while True:
	try:
	url = urlQueue.get_nowait()
	i = urlQueue.qsize()
	except Exception as e:
	break
	# print('Current Thread Name %s, Url: %s ' % (threading.currentThread().name, url))
	try:
	name = url.strip().split('/')[-1][:-4]
	urllib.request.urlretrieve(url, 'images/{}.jpg'.format(name))
	except Exception as e:
	continue


	start = time.time()
	threads = []
	threadNum = 8
	for i in range(0, threadNum):
	t = threading.Thread(target=fetch_url, args=(urlQueue,))
	threads.append(t)
	for t in threads:
	t.start()
	for t in threads:
	t.join()
	print("Elapsed Time: %s" % (time.time() - start))