Created
October 18, 2023 22:33
-
-
Save jonadem/eac41eb517ec570b34f45ef474167c80 to your computer and use it in GitHub Desktop.
Wikidata : Add the gender property of a person if missing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Add the gender property of a person if missing | |
For a person, check if the gender is provided. | |
If not, ask for it and link it to the person. | |
""" | |
import sys | |
from pywikibot.data import api | |
import pywikibot | |
# Iterate over several persons (or itemsID) | |
itemsID = ( | |
"Q8023", # Nelson Mandela | |
) | |
GENDER_PROP = "P21" | |
INSTANCE_OF_PROP = "P31" | |
HUMAN_ITEM = "Q5" | |
MALE_ITEM = "Q6581097" | |
FEMALE_ITEM = "Q6581072" | |
def getItemsWithSameName(site, itemtitle): | |
search_result = getItems(site, itemtitle) | |
return [result for result in search_result['search'] if result['label'] == itemtitle] | |
def getItems(site, itemtitle): | |
params = { 'action' :'wbsearchentities' , 'format' : 'json' , 'language' : 'en', 'type' : 'item', 'search': itemtitle} | |
request = api.Request(site=site,parameters=params) | |
return request.submit() | |
def getItem(site, wdItem, token): | |
request = api.Request(site=site, | |
action='wbgetentities', | |
format='json', | |
ids=wdItem) | |
return request.submit() | |
def query_gender(): | |
"""Ask to give a gender via raw_input. | |
""" | |
valid = {"male": MALE_ITEM, "female": FEMALE_ITEM, | |
"m": MALE_ITEM, "f": FEMALE_ITEM} | |
while True: | |
sys.stdout.write("Give a gender [m/f] : ") | |
choice = input().lower() | |
if choice in valid: | |
return valid[choice] | |
else: | |
sys.stdout.write("Please enter 'male' or 'female' " | |
"(or 'm' or 'f').\n") | |
site = pywikibot.Site("wikidata", "wikidata") | |
repo = site.data_repository() | |
for itemID in itemsID: | |
person_item = pywikibot.ItemPage(repo, itemID) | |
person_dict = person_item.get() | |
person_label = person_dict['labels']['en'] | |
clm_dict = person_dict["claims"] | |
if GENDER_PROP in clm_dict: | |
# Gender exists, nothing to do | |
clm_list = clm_dict[GENDER_PROP] | |
for clm in clm_list: | |
clm_trgt = clm.getTarget() | |
fn_label = clm_trgt.text['labels']['en'] | |
print(u"'{}' is the gender indicated for '{}'\n".format(fn_label, person_label)) | |
else: | |
humanity_validated = False | |
if INSTANCE_OF_PROP in clm_dict: | |
for clm in clm_dict[INSTANCE_OF_PROP]: | |
if HUMAN_ITEM == clm.getTarget().getID(): | |
humanity_validated = True | |
if humanity_validated: | |
full_name = person_item.text['labels']['en'] | |
print(u"'{}' has no gender".format(full_name)) | |
# Ask the gender to the user | |
gender_item = query_gender() | |
# gender of 'person_item' is 'gender_item' | |
claim = pywikibot.Claim(repo, GENDER_PROP) #'Has gender' | |
target = pywikibot.ItemPage(repo, gender_item) | |
claim.setTarget(target) | |
person_item.addClaim(claim) | |
if gender_item == MALE_ITEM: | |
print(u"'{}' should be a male\n".format(person_label)) | |
elif gender_item == FEMALE_ITEM: | |
print(u"'{}' should be a female\n".format(person_label)) | |
else: | |
raise ValueError("invalid gender given: '%s'" % gender_item) | |
else: | |
print(u"'{}' is not an instance of 'human'\n".format(person_label)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment