Skip to content

Instantly share code, notes, and snippets.

@jonadem
Last active November 9, 2019 14:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonadem/434151b95308403f36a980cc30e612cb to your computer and use it in GitHub Desktop.
Save jonadem/434151b95308403f36a980cc30e612cb to your computer and use it in GitHub Desktop.
Wikidata : Add the family name property of a person if missing
"""Add the family name property of a person if missing
For a person, check if the family name exists.
If not, create it and link it to the person
"""
import sys
import pywikibot
from pywikibot import pagegenerators
_VERBOSE = True
# Iterate over several persons (or itemsID)
itemsID = (
"Q8023", # Nelson Mandela
)
INSTANCE_OF_PROP = "P31"
FAMILY_NAME_PROP = "P734"
HUMAN_ITEM = "Q5"
FAMILY_NAME_ITEM = "Q101352"
def get_family_name_item(repo, itemtitle):
"""get family name item from string
"repo" is the repo for pywikibot.
"itemtitle" is the string to match.
Returns ID if found.
"""
sparql = "SELECT ?item WHERE {{ ?item rdfs:label '{}'@en .\
?item wdt:{} wd:{} }}".format(itemtitle, INSTANCE_OF_PROP, FAMILY_NAME_ITEM)
entities = pagegenerators.WikidataSPARQLPageGenerator(sparql, site=repo)
entities = list(entities)
if len(entities) > 1:
raise ValueError("Multiple family name instance with the same label in Wikidata")
elif len(entities) == 1:
return entities[0].getID()
else:
return
def query_yes_no_or_skip(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
"""
valid = {"yes": 'y', "y": 'y', "ye": 'y',
"no": 'n', "n": 'n',
"skip": 's', "s": 's'}
if default is None:
prompt = " [y/n/s/?] " # Question mark means 'show me more explanation'
elif default == "yes":
prompt = " [Y/n/s/?] "
elif default == "no":
prompt = " [y/N/s/?] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == '':
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' or 'skip'"
"(or 'y' or 'n' or 's').\n")
def query_yes_no(question, default="yes"):
"""Ask a yes/no question via raw_input() and return their answer.
"question" is a string that is presented to the user.
"default" is the presumed answer if the user just hits <Enter>.
It must be "yes" (the default), "no" or None (meaning
an answer is required of the user).
The "answer" return value is True for "yes" or False for "no".
"""
valid = {"yes": True, "y": True, "ye": True,
"no": False, "n": False}
if default is None:
prompt = " [y/n] "
elif default == "yes":
prompt = " [Y/n] "
elif default == "no":
prompt = " [y/N] "
else:
raise ValueError("invalid default answer: '%s'" % default)
while True:
sys.stdout.write(question + prompt)
choice = input().lower()
if default is not None and choice == '':
return valid[default]
elif choice in valid:
return valid[choice]
else:
sys.stdout.write("Please respond with 'yes' or 'no' "
"(or 'y' or 'n').\n")
site = pywikibot.Site("wikidata", "wikidata")
repo = site.data_repository()
for itemID in itemsID:
person_item = pywikibot.ItemPage(repo, itemID)
person_dict = person_item.get()
person_label = person_dict['labels']['en']
clm_dict = person_dict["claims"]
if FAMILY_NAME_PROP in clm_dict:
# Family name exists, nothing to do
clm_list = clm_dict[FAMILY_NAME_PROP]
for clm in clm_list:
clm_trgt = clm.getTarget()
fn_label = clm_trgt.text['labels']['en']
print(u"'{}' is the family name indicated for '{}'\n".format(fn_label, person_label))
else:
# Check first that the item is an instance of 'human'
humanity_validated = False
if INSTANCE_OF_PROP in clm_dict:
for clm in clm_dict[INSTANCE_OF_PROP]:
if HUMAN_ITEM == clm.getTarget().getID():
humanity_validated = True
if humanity_validated:
full_name = person_item.text['labels']['en']
print(u"'{}' ({}) has no family name".format(full_name, itemID))
# Take the last part of the name to guess the family name (e.g. 'Doe' from 'John Doe')
proposed_family_name = full_name.split()[-1]
# Check (by a human) that family name is correct
user_input = query_yes_no_or_skip(u"Is '{}' OK ?".format(proposed_family_name))
if user_input == 'y':
print(u"You validate '{}'".format(proposed_family_name))
family_name_str = proposed_family_name
elif user_input == 's':
print('') # Add a new line
continue
else:
while True:
# proposed_family_name was not accepted, enter a new one
new_name = input("Enter a new name: ")
if query_yes_no(u"You entered '{}', do you validate ?".format(new_name)):
family_name_str = new_name
break
# Create the item for his/her family name if it doesn't exist
name_item_id = get_family_name_item(repo, family_name_str)
if name_item_id :
print(u"Name '{}' already exists!".format(family_name_str))
else:
print(u"'{}' doesn't exist yet!".format(family_name_str))
# name_item is not defined, create it!
name_item = pywikibot.ItemPage(site)
# create the labels
name_labels = {}
for language in ('en', 'fr', 'nl', 'de'):
name_labels[language] = family_name_str
name_item.editLabels(name_labels, summary='Set labels')
# create the descriptions
name_descriptions = {'en': 'family name',
'fr': 'nom de famille',
'nl': 'achternaam',
'de': 'Familienname'}
name_item.editDescriptions(name_descriptions, summary='Setting descriptions.')
# 'name_item' is an instance of 'FAMILY_NAME_ITEM'
claim = pywikibot.Claim(repo, INSTANCE_OF_PROP)
target = pywikibot.ItemPage(repo, FAMILY_NAME_ITEM)
claim.setTarget(target)
name_item.addClaim(claim, summary=u'Adding claim "instance of family name"')
name_item_id = name_item.getID()
print(u"'{}' created!".format(family_name_str))
# family name of 'person_item' is 'name_item'
claim = pywikibot.Claim(repo, FAMILY_NAME_PROP)
target = pywikibot.ItemPage(repo, name_item_id)
claim.setTarget(target)
person_item.addClaim(claim, summary=u'Adding claim "has family name"')
print(u"'{}' has been linked to '{}'\n".format(family_name_str, person_label))
else:
print(u"'{}' is not an instance of 'human'\n".format(person_label))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment