Skip to content

Instantly share code, notes, and snippets.

@bcse
Last active September 27, 2015 03:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bcse/1205875 to your computer and use it in GitHub Desktop.
Save bcse/1205875 to your computer and use it in GitHub Desktop.
Fetch top 100 popular photos from Flickr
import re
import json
import os
from urllib.request import urlopen, urlretrieve
from glob import glob
config = {
'api_key': 'YOUR_API_KEY',
'sort': 'interestingness-desc',
'per_page': 100
}
api_call = 'http://api.flickr.com/services/rest/?method=flickr.photos.search&format=json&nojsoncallback=1&api_key=%(api_key)s&extras=original_format&sort=%(sort)s&per_page=%(per_page)s&tags=%(tags)s'
url_format = 'http://farm%(farm)s.static.flickr.com/%(server)s/%(id)s_%(secret)s.jpg'
url_format_o = 'http://farm%(farm)s.static.flickr.com/%(server)s/%(id)s_%(originalsecret)s_o.%(originalformat)s'
for tag in ['boy', 'children', 'crowd', 'face', 'girl', 'kid', 'man', 'men', 'people', 'person', 'portrait', 'woman', 'women']:
cfg = config.copy()
cfg['tags'] = tag
data = urlopen(api_call % cfg).read().decode('utf-8')
obj = json.loads(data)
if not os.path.exists(tag):
os.mkdir(tag)
for p in obj['photos']['photo']:
if p.get('originalsecret'):
url = url_format_o % p
else:
url = url_format % p
name = os.path.join(tag, os.path.basename(url))
urlretrieve(url, name)
import re
import json
import os
from urllib.request import urlopen, urlretrieve
config = {
'api_key': 'YOUR_API_KEY',
'sort': 'interestingness-desc',
'per_page': 100
}
api_call = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&format=json&nojsoncallback=1&api_key=%(api_key)s&extras=original_format&sort=%(sort)s&per_page=%(per_page)s&has_geo=1'
url_format = 'http://farm%(farm)s.static.flickr.com/%(server)s/%(id)s_%(secret)s.jpg'
url_format_o = 'http://farm%(farm)s.static.flickr.com/%(server)s/%(id)s_%(originalsecret)s_o.%(originalformat)s'
def process(p):
if p.get('originalsecret') is None:
return
url = url_format_o % p
name = os.path.basename(url)
if os.path.exists(name):
return
print 'downloading %s' % name
urlretrieve(url, name)
if __name__ == '__main__':
from multiprocessing import Pool
pool = Pool(8)
page = 1
try:
while True:
print 'page %d' % page
cfg = config.copy()
cfg['page'] = page
data = urlopen(api_call % cfg).read().decode('utf-8')
obj = json.loads(data)
if not obj['photos']['photo']:
break
pool.map(process, obj['photos']['photo'])
page += 1
except:
pool.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment