public
Created

Grabs profile pictures of a person/place/thing from Wikipedia. This function will check for all images associated with an article's title and compare those images against the images inside the infobox (typically the box on the top right hand corner of an article), returning the WikiMedia URLs of the images inside the infobox.

  • Download Gist
gistfile1.py
Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 
def figure_wikipedia_pic(figure_name, image_size):
 
 
wiki_images_get = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=images" % figure_name)
wiki_json = wiki_images_get.json()
 
wiki_page_json = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=revisions&rvprop=content&rvsection=0" % figure_name).json()
wiki_page_json = str(wiki_page_json)
 
 
pages = wiki_json['query']['pages']
images = [pages[key] for key in pages.keys()][0]['images'] #flatten list, this might not work
first_image = None
if len(images) > 0:
for image_dict in images:
formatted_image_name = image_dict['title'].split(":")[1]
if string.find(wiki_page_json, formatted_image_name) > -1:
first_image = image_dict
 
if first_image != None:
image_info = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=imageinfo&iiprop=url" % first_image['title']).json()
image_query = image_info['query']
 
pages = image_query['pages']
image_urls = []
for page_key in pages.keys():
image_info_dict = {}
 
if "title" in pages[page_key].keys():
raw_image_title = pages[page_key]["title"]
image_title = raw_image_title.split(":")[1].replace(" ", "_")
 
if "imageinfo" in pages[page_key].keys():
imageinfo = pages[page_key]['imageinfo']
for info in imageinfo:
info['url']
url_split = info['url'].split("commons")
url_split.insert(1, "commons/thumb")
formatted_url = "".join(url_split)
resized_url = "%s/%dpx-%s" % (formatted_url, image_size, image_title)
image_urls.append({'url' : resized_url, 'title' : image_title})
 
return image_urls

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.