Last active
November 9, 2019 14:15
-
-
Save jonadem/434151b95308403f36a980cc30e612cb to your computer and use it in GitHub Desktop.
Wikidata : Add the family name property of a person if missing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Add the family name property of a person if missing | |
For a person, check if the family name exists. | |
If not, create it and link it to the person | |
""" | |
import sys | |
import pywikibot | |
from pywikibot import pagegenerators | |
_VERBOSE = True | |
# Iterate over several persons (or itemsID) | |
itemsID = ( | |
"Q8023", # Nelson Mandela | |
) | |
INSTANCE_OF_PROP = "P31" | |
FAMILY_NAME_PROP = "P734" | |
HUMAN_ITEM = "Q5" | |
FAMILY_NAME_ITEM = "Q101352" | |
def get_family_name_item(repo, itemtitle): | |
"""get family name item from string | |
"repo" is the repo for pywikibot. | |
"itemtitle" is the string to match. | |
Returns ID if found. | |
""" | |
sparql = "SELECT ?item WHERE {{ ?item rdfs:label '{}'@en .\ | |
?item wdt:{} wd:{} }}".format(itemtitle, INSTANCE_OF_PROP, FAMILY_NAME_ITEM) | |
entities = pagegenerators.WikidataSPARQLPageGenerator(sparql, site=repo) | |
entities = list(entities) | |
if len(entities) > 1: | |
raise ValueError("Multiple family name instance with the same label in Wikidata") | |
elif len(entities) == 1: | |
return entities[0].getID() | |
else: | |
return | |
def query_yes_no_or_skip(question, default="yes"): | |
"""Ask a yes/no question via raw_input() and return their answer. | |
"question" is a string that is presented to the user. | |
"default" is the presumed answer if the user just hits <Enter>. | |
It must be "yes" (the default), "no" or None (meaning | |
an answer is required of the user). | |
The "answer" return value is True for "yes" or False for "no". | |
""" | |
valid = {"yes": 'y', "y": 'y', "ye": 'y', | |
"no": 'n', "n": 'n', | |
"skip": 's', "s": 's'} | |
if default is None: | |
prompt = " [y/n/s/?] " # Question mark means 'show me more explanation' | |
elif default == "yes": | |
prompt = " [Y/n/s/?] " | |
elif default == "no": | |
prompt = " [y/N/s/?] " | |
else: | |
raise ValueError("invalid default answer: '%s'" % default) | |
while True: | |
sys.stdout.write(question + prompt) | |
choice = input().lower() | |
if default is not None and choice == '': | |
return valid[default] | |
elif choice in valid: | |
return valid[choice] | |
else: | |
sys.stdout.write("Please respond with 'yes' or 'no' or 'skip'" | |
"(or 'y' or 'n' or 's').\n") | |
def query_yes_no(question, default="yes"): | |
"""Ask a yes/no question via raw_input() and return their answer. | |
"question" is a string that is presented to the user. | |
"default" is the presumed answer if the user just hits <Enter>. | |
It must be "yes" (the default), "no" or None (meaning | |
an answer is required of the user). | |
The "answer" return value is True for "yes" or False for "no". | |
""" | |
valid = {"yes": True, "y": True, "ye": True, | |
"no": False, "n": False} | |
if default is None: | |
prompt = " [y/n] " | |
elif default == "yes": | |
prompt = " [Y/n] " | |
elif default == "no": | |
prompt = " [y/N] " | |
else: | |
raise ValueError("invalid default answer: '%s'" % default) | |
while True: | |
sys.stdout.write(question + prompt) | |
choice = input().lower() | |
if default is not None and choice == '': | |
return valid[default] | |
elif choice in valid: | |
return valid[choice] | |
else: | |
sys.stdout.write("Please respond with 'yes' or 'no' " | |
"(or 'y' or 'n').\n") | |
site = pywikibot.Site("wikidata", "wikidata") | |
repo = site.data_repository() | |
for itemID in itemsID: | |
person_item = pywikibot.ItemPage(repo, itemID) | |
person_dict = person_item.get() | |
person_label = person_dict['labels']['en'] | |
clm_dict = person_dict["claims"] | |
if FAMILY_NAME_PROP in clm_dict: | |
# Family name exists, nothing to do | |
clm_list = clm_dict[FAMILY_NAME_PROP] | |
for clm in clm_list: | |
clm_trgt = clm.getTarget() | |
fn_label = clm_trgt.text['labels']['en'] | |
print(u"'{}' is the family name indicated for '{}'\n".format(fn_label, person_label)) | |
else: | |
# Check first that the item is an instance of 'human' | |
humanity_validated = False | |
if INSTANCE_OF_PROP in clm_dict: | |
for clm in clm_dict[INSTANCE_OF_PROP]: | |
if HUMAN_ITEM == clm.getTarget().getID(): | |
humanity_validated = True | |
if humanity_validated: | |
full_name = person_item.text['labels']['en'] | |
print(u"'{}' ({}) has no family name".format(full_name, itemID)) | |
# Take the last part of the name to guess the family name (e.g. 'Doe' from 'John Doe') | |
proposed_family_name = full_name.split()[-1] | |
# Check (by a human) that family name is correct | |
user_input = query_yes_no_or_skip(u"Is '{}' OK ?".format(proposed_family_name)) | |
if user_input == 'y': | |
print(u"You validate '{}'".format(proposed_family_name)) | |
family_name_str = proposed_family_name | |
elif user_input == 's': | |
print('') # Add a new line | |
continue | |
else: | |
while True: | |
# proposed_family_name was not accepted, enter a new one | |
new_name = input("Enter a new name: ") | |
if query_yes_no(u"You entered '{}', do you validate ?".format(new_name)): | |
family_name_str = new_name | |
break | |
# Create the item for his/her family name if it doesn't exist | |
name_item_id = get_family_name_item(repo, family_name_str) | |
if name_item_id : | |
print(u"Name '{}' already exists!".format(family_name_str)) | |
else: | |
print(u"'{}' doesn't exist yet!".format(family_name_str)) | |
# name_item is not defined, create it! | |
name_item = pywikibot.ItemPage(site) | |
# create the labels | |
name_labels = {} | |
for language in ('en', 'fr', 'nl', 'de'): | |
name_labels[language] = family_name_str | |
name_item.editLabels(name_labels, summary='Set labels') | |
# create the descriptions | |
name_descriptions = {'en': 'family name', | |
'fr': 'nom de famille', | |
'nl': 'achternaam', | |
'de': 'Familienname'} | |
name_item.editDescriptions(name_descriptions, summary='Setting descriptions.') | |
# 'name_item' is an instance of 'FAMILY_NAME_ITEM' | |
claim = pywikibot.Claim(repo, INSTANCE_OF_PROP) | |
target = pywikibot.ItemPage(repo, FAMILY_NAME_ITEM) | |
claim.setTarget(target) | |
name_item.addClaim(claim, summary=u'Adding claim "instance of family name"') | |
name_item_id = name_item.getID() | |
print(u"'{}' created!".format(family_name_str)) | |
# family name of 'person_item' is 'name_item' | |
claim = pywikibot.Claim(repo, FAMILY_NAME_PROP) | |
target = pywikibot.ItemPage(repo, name_item_id) | |
claim.setTarget(target) | |
person_item.addClaim(claim, summary=u'Adding claim "has family name"') | |
print(u"'{}' has been linked to '{}'\n".format(family_name_str, person_label)) | |
else: | |
print(u"'{}' is not an instance of 'human'\n".format(person_label)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment