Skip to content

Instantly share code, notes, and snippets.

@akfreas
Created February 28, 2013 07:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akfreas/5054929 to your computer and use it in GitHub Desktop.
Save akfreas/5054929 to your computer and use it in GitHub Desktop.
Grabs profile pictures of a person/place/thing from Wikipedia. This function will check for all images associated with an article's title and compare those images against the images inside the infobox (typically the box on the top right hand corner of an article), returning the WikiMedia URLs of the images inside the infobox.
def figure_wikipedia_pic(figure_name, image_size):
wiki_images_get = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=images" % figure_name)
wiki_json = wiki_images_get.json()
wiki_page_json = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=revisions&rvprop=content&rvsection=0" % figure_name).json()
wiki_page_json = str(wiki_page_json)
pages = wiki_json['query']['pages']
images = [pages[key] for key in pages.keys()][0]['images'] #flatten list, this might not work
first_image = None
if len(images) > 0:
for image_dict in images:
formatted_image_name = image_dict['title'].split(":")[1]
if string.find(wiki_page_json, formatted_image_name) > -1:
first_image = image_dict
if first_image != None:
image_info = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=imageinfo&iiprop=url" % first_image['title']).json()
image_query = image_info['query']
pages = image_query['pages']
image_urls = []
for page_key in pages.keys():
image_info_dict = {}
if "title" in pages[page_key].keys():
raw_image_title = pages[page_key]["title"]
image_title = raw_image_title.split(":")[1].replace(" ", "_")
if "imageinfo" in pages[page_key].keys():
imageinfo = pages[page_key]['imageinfo']
for info in imageinfo:
info['url']
url_split = info['url'].split("commons")
url_split.insert(1, "commons/thumb")
formatted_url = "".join(url_split)
resized_url = "%s/%dpx-%s" % (formatted_url, image_size, image_title)
image_urls.append({'url' : resized_url, 'title' : image_title})
return image_urls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment