Created

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Grabs profile pictures of a person/place/thing from Wikipedia. This function will check for all images associated with an article's title and compare those images against the images inside the infobox (typically the box on the top right hand corner of an article), returning the WikiMedia URLs of the images inside the infobox.

View gist:5054929
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 
def figure_wikipedia_pic(figure_name, image_size):
 
 
wiki_images_get = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=images" % figure_name)
wiki_json = wiki_images_get.json()
 
wiki_page_json = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=revisions&rvprop=content&rvsection=0" % figure_name).json()
wiki_page_json = str(wiki_page_json)
 
 
pages = wiki_json['query']['pages']
images = [pages[key] for key in pages.keys()][0]['images'] #flatten list, this might not work
first_image = None
if len(images) > 0:
for image_dict in images:
formatted_image_name = image_dict['title'].split(":")[1]
if string.find(wiki_page_json, formatted_image_name) > -1:
first_image = image_dict
 
if first_image != None:
image_info = requests.get("http://en.wikipedia.org/w/api.php?format=json&action=query&titles=%s&prop=imageinfo&iiprop=url" % first_image['title']).json()
image_query = image_info['query']
 
pages = image_query['pages']
image_urls = []
for page_key in pages.keys():
image_info_dict = {}
 
if "title" in pages[page_key].keys():
raw_image_title = pages[page_key]["title"]
image_title = raw_image_title.split(":")[1].replace(" ", "_")
 
if "imageinfo" in pages[page_key].keys():
imageinfo = pages[page_key]['imageinfo']
for info in imageinfo:
info['url']
url_split = info['url'].split("commons")
url_split.insert(1, "commons/thumb")
formatted_url = "".join(url_split)
resized_url = "%s/%dpx-%s" % (formatted_url, image_size, image_title)
image_urls.append({'url' : resized_url, 'title' : image_title})
 
return image_urls
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.