Skip to content

Instantly share code, notes, and snippets.

@jsundram
Last active February 11, 2021 18:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsundram/530324b4dbdaf83692d4997967d41bdc to your computer and use it in GitHub Desktop.
Save jsundram/530324b4dbdaf83692d4997967d41bdc to your computer and use it in GitHub Desktop.
from datetime import datetime
from dateutil import parser
import csv
import json
import requests
def id_for_page(page):
"""Uses the wikipedia api to find the wikidata id for a page"""
api = "https://en.wikipedia.org/w/api.php"
query = "?action=query&prop=pageprops&titles=%s&format=json"
slug = page.split('/')[-1]
response = json.loads(requests.get(api + query % slug).content)
# Assume we got 1 page result and it is correct.
page_info = list(response['query']['pages'].values())[0]
return page_info['pageprops']['wikibase_item']
def lifespan_for_id(wikidata_id):
"""Uses the wikidata API to retrieve wikidata for the given id."""
data_url = "https://www.wikidata.org/wiki/Special:EntityData/%s.json"
page = json.loads(requests.get(data_url % wikidata_id).content)
claims = list(page['entities'].values())[0]['claims']
# P569 (birth) and P570 (death) ... not everyone has died yet.
return [get_claim_as_time(claims, cid) for cid in ['P569', 'P570']]
def get_claim_as_time(claims, claim_id):
"""Helper function to work with data returned from wikidata api"""
try:
claim = claims[claim_id][0]['mainsnak']['datavalue']
assert claim['type'] == 'time', "Expecting time data type"
# dateparser chokes on leading '+', thanks wikidata.
return parser.parse(claim['value']['time'][1:])
except KeyError as e:
print(e)
return None
def main():
page = 'https://en.wikipedia.org/wiki/Albert_Einstein'
# 1. use the wikipedia api to find the wikidata id for this page
wikidata_id = id_for_page(page)
# 2. use the wikidata id to get the birth and death dates
span = lifespan_for_id(wikidata_id)
for label, dt in zip(["birth", "death"], span):
print(label, " = ", datetime.strftime(dt, "%b %d, %Y"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment