Last active
March 2, 2019 22:47
-
-
Save thibaut-d/205d93aa801ef2549642730ea6bf2410 to your computer and use it in GitHub Desktop.
Cheatsheet for Pywikibot from the doc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
wikidata_site = pywikibot.Site("wikidata", "wikidata") | |
wikidata_repo = wikidata_site.data_repository() | |
page = pywikibot.Page(wikidata_site, 'Wikidata:WikiProject_Materials/Test') | |
print(page.exists()) | |
print(page.text)) | |
page.text = 'Hello world !!!' | |
page.save(u"Testing Pywikibot (sandbox page)") #Beware, this replace all the content ! | |
print(page.text) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
wikidata_site = pywikibot.Site("wikidata", "wikidata") | |
wikidata_repo = wikidata_site.data_repository() | |
item = pywikibot.ItemPage(wikidata_repo, "Q2225") | |
print(item) | |
title = item.title() # Return the Qid | |
properties = item.properties() #return properties | |
item_dict = item.get() # Get the data of the item as dict | |
clm_dict = item_dict["claims"] # Get the claims of the item | |
clm_list = clm_dict["P2069"] # Get the claims for P2069 | |
sitelinks = item.sitelinks # Get all the sitelinks | |
aliases = item.aliases # Get all the alias | |
if 'en' in item.labels: | |
print('The label in English is: ' + item.labels['en']) #Get the english label | |
if item.claims: | |
if 'P31' in item.claims: # Check if the item has P31 property (P31 is "instance of" so most have) | |
print(item.claims['P31'][0].getTarget()) # Get the target of the first P31 related claim | |
print(item.claims['P31'][0].sources[0]) # Same for souces (since many don't have, an if would be needed here too) | |
#For each claim in the claim list get the content of the claim | |
for clm in clm_list: | |
print(clm.toJSON()) # print a dict | |
rank = clm.rank # "normal" | |
claim_id = clm.id # "P2069" | |
is_reference = clm.isReference # False | |
snak = clm.snak # "Q2225$edaaaf4e-48fd-6503-016c-27d857e55f40" | |
claim_item = clm.on_item # an item object that prints [[wikidata:Q2225]] | |
clm_trgt = clm.getTarget() # dict | |
if type(clm_trgt) == pywikibot.WbQuantity : | |
amount = clm_trgt.amount # -1.00115965218076 | |
unit = clm_trgt.unit # http://www.wikidata.org/entity/Q737120 | |
lower_bound = clm_trgt.lowerBound # -1.00115965218077 | |
upper_bound = clm_trgt.upperBound # -1.00115965218075 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
from pywikibot import pagegenerators as pg | |
wikidata_site = pywikibot.Site("wikidata", "wikidata") | |
wikidata_repo = wikidata_site.data_repository() | |
with open('pka-query.rq', 'r') as query_file: #The Query is stored in a separated file. | |
QUERY = query_file.read() | |
print('The content of pka-query.rq is:\n\n'+QUERY) | |
wikidata_site = pywikibot.Site("wikidata", "wikidata") | |
generator = pg.WikidataSPARQLPageGenerator(QUERY, site=wikidata_site) | |
print('\n\nItems in the generator are:') | |
for item in generator: | |
print(item) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
from pywikibot import pagegenerators as pg | |
wikidata_site = pywikibot.Site("wikidata", "wikidata") | |
wikidata_repo = wikidata_site.data_repository() | |
property = 'P1117' | |
QUERY2 = f''' | |
SELECT ?item ?value | |
WHERE | |
{{ | |
?item wdt:{property} ?value . | |
}} | |
''' | |
generator2 = pg.WikidataSPARQLPageGenerator(QUERY2, site=wikidata_site) | |
print('Items in the generator are:') | |
for item in generator2: | |
print(item) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
site = pywikibot.Site("test", "wikidata") | |
repo = site.data_repository() | |
item = pywikibot.ItemPage(repo, "Q194617") | |
new_labels = {"en": "bear2", "de": "Bär2"} | |
new_descr = {"en": "gentle creature of the forrest2", "de": "Friedlicher Waldbewohner2"} | |
new_alias = {"en": ["brown bear2", "grizzly bear2", "polar bear2"], "de": ["Braunbär2", "Grizzlybär2", "Eisbär2"]} | |
item.editLabels(labels=new_labels, summary="Setting new labels2.") | |
item.editDescriptions(new_descr, summary="Setting new descriptions2.") | |
item.editAliases(new_alias, summary="Setting new aliases2.") | |
#Or we can use the general editEntity() function : | |
data = {"labels": {"en": "bear", "de": "Bär"}, | |
"descriptions": {"en": "gentle creature of the forrest", "de": "Friedlicher Waldbewohner"}, | |
"aliases": {"en": ["brown bear", "grizzly bear", "polar bear"], "de": ["Braunbär", "Grizzlybär", "Eisbär"]}, | |
"sitelinks": [{"site": "enwiki", "title": "Bear"}, {"site": "dewiki", "title": "Bär"}]} | |
item.editEntity(data, summary=u'Edited item: set labels, descriptions, aliases') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
from pywikibot import pagegenerators as pg | |
wikidata_site = pywikibot.Site("wikidata", "wikidata") | |
wikidata_repo = wikidata_site.data_repository() | |
property = "P462" # The property "Color" that is to be settled to an item representing a color. | |
#The error dict has homonymes as keys and colors as values. | |
#The, items having the color property set a key have to be changed for the value. | |
error_dict = {"Q13191": "Q39338", #orange - "fruit": "color" | |
"Q897": "Q208045", #gold - "element": "color" | |
"Q753": "Q2722041", #copper - "element": "color" | |
"Q25381": "Q679355", #amber - "material": "color" | |
"Q134862": "Q5069879", #champagne - "drink": "color" | |
"Q1090": "Q317802", #silver - "element": "color" | |
"Q1173": "Q797446", #burgundy - "region": "color | |
"Q13411121": "Q5148721", #peach - "fruit": "color" | |
} | |
def correct_claim(generator, key): | |
'''This function call call a generator result to find a list of items having | |
the color property setted to a key, that represent an homonyme.''' | |
for page in generator: | |
item_dict = page.get() #calling the dictionary containing all the values of the item. | |
claim_list = item_dict["claims"][property] #calling the claims related the color property (variable settled above) | |
for claim in claim_list: | |
trgt = claim.getTarget() | |
if trgt.id == key: #if a claim target is settled to a key of the errors dictionary | |
print(f'Correcting {key} to {error_dict[key]}') | |
correct_page = pywikibot.ItemPage(wikidata_repo, error_dict[key], 0) #gettting the right value for the property from the error dict | |
claim.changeTarget(correct_page) #changing the target to the right value | |
for key in error_dict: | |
query = f''' | |
SELECT ?item | |
WHERE | |
{{ | |
?item wdt:{property} wd:{key} . | |
}} | |
''' #calls items having color settle to an homonyme. | |
generator = pg.WikidataSPARQLPageGenerator(query, site=wikidata_site) #a generator store these items | |
generator = wikidata_site.preloadpages(generator, pageprops=True) #improves performance ? | |
correct_claim(generator, key) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pywikibot | |
from pywikibot.data import api | |
import pprint | |
# FIXME Hardcoded for test.wikidata | |
# Define properties and data | |
p_stated_in = "P149" | |
p_half_life = "P525" | |
p_ref_url = "P93" | |
precision = 10 ** -10 | |
# data = [quantity, uncertainty, unit (Q1748 = hours)] | |
# source = [stated in item, ref url] | |
half_life_data = {"uranium-240": {"data": ["14.1", "0.1", "Q1748"], | |
"source": ["Q1751", "http://www.nndc.bnl.gov/chart/reCenter.jsp?z=92&n=148"]} | |
} | |
site = pywikibot.Site("test", "wikidata") #Please only modify the test site unless you know what you do ! | |
repo = site.data_repository() | |
def get_items(site, item_title): | |
""" | |
Requires a site and search term (item_title) and returns the results. | |
""" | |
params = {"action": "wbsearchentities", | |
"format": "json", | |
"language": "en", | |
"type": "item", | |
"search": item_title} | |
request = api.Request(site=site, **params) | |
return request.submit() | |
def check_claim_and_uncert(item, property, data): | |
""" | |
Requires a property, value, uncertainty and unit and returns boolean. | |
Returns the claim that fits into the defined precision or None. | |
This will be used to see if the claim is already settled | |
""" | |
item_dict = item.get() | |
value, uncert, unit = data | |
value, uncert = float(value), float(uncert) | |
try: | |
claims = item_dict["claims"][property] | |
except: | |
return None | |
try: | |
claim_exists = False | |
uncert_set = False | |
for claim in claims: | |
wb_quant = claim.getTarget() | |
delta_amount = wb_quant.amount - value | |
if abs(delta_amount) < precision: | |
claim_exists = True | |
delta_lower = wb_quant.amount - wb_quant.lowerBound | |
delta_upper = wb_quant.upperBound - wb_quant.amount | |
check_lower = abs(uncert - delta_lower) < precision | |
check_upper = abs(delta_upper - uncert) < precision | |
if check_upper and check_lower: | |
uncert_set = True | |
if claim_exists and uncert_set: | |
return claim | |
except: | |
return None | |
def check_source_set(claim, property, data): | |
""" | |
Takes a claim, a property and data. | |
Return a boolean | |
This will be used to see if the property is already settled | |
""" | |
source_claims = claim.getSources() | |
if len(source_claims) == 0: | |
return False #if there is no sources, return false | |
for source in source_claims: | |
try: | |
stated_in_claim = source[p_stated_in] #check if the property "stated in" is set | |
except: | |
return False #if not, we can create a new reference | |
for claim in stated_in_claim: | |
trgt = claim.target | |
if trgt.id == data[0]: | |
return True # return true only if the claim exists and is settled accoding to our import dataset | |
def set_claim(item, property, data): | |
""" | |
Set the claim's property according to our import data | |
""" | |
value, uncert, unit = data #get these variables for our import data | |
value, uncert = float(value), float(uncert) | |
claim = pywikibot.Claim(repo, property) #create a claim object with wanted property | |
unit_item = pywikibot.ItemPage(repo, unit) #create a unit item object with wanted unit | |
entity_helper_string = "http://test.wikidata.org/entity/Q1748".format() | |
wb_quant = pywikibot.WbQuantity(value, entity_helper_string, uncert) #create a quantity object with wanted quantity | |
claim.setTarget(wb_quant) #modify the created claim object and add quantity | |
item.addClaim(claim, bot=False, summary="Adding half-life claim from NNDC.") #by the end we can add the claim object in the item | |
print('Running set_claim...\n On item: ' + str(item) + '\n Setting claim:\n' + str(claim) + '\n') | |
return claim | |
def create_source_claim(claim, source_data): | |
trgt_item, ref_url = source_data | |
trgt_itempage = pywikibot.ItemPage(repo, trgt_item) #create an item object for the source | |
source_claim = pywikibot.Claim(repo, p_stated_in, isReference=True) #create the claim object for the source | |
source_claim.setTarget(trgt_itempage) #set the item source object as a target for the claim object | |
claim.addSources([source_claim]) #by the end we can add the source to the claim | |
print('Running create_source_claim...\n\n On claim :\n' + str(claim) + '\n\n Setting source:\n' + str(source_claim) + '\n') | |
return True | |
for key in half_life_data: #since there is actually only 1 key in our example the loop will run once | |
search_results = get_items(site, key) | |
print('Value of search_result is:\n' + str(search_results) + '\n') | |
if len(search_results["search"]) == 1: #only one item shall match the search term (uranium-240) | |
item = pywikibot.ItemPage(repo, search_results["search"][0]["id"]) #we will modifiy this item | |
print('Value of item from results is:' + str(item) + '\n') | |
data = half_life_data[key]["data"] #getting the value of the property from the data we want to import | |
print('Value of data is:' + str(data) + '\n') | |
source_data = half_life_data[key]["source"] #getting the value of the source of the property from the data we want to import | |
print('Value of source_data is:' + str(source_data) + '\n') | |
claim = check_claim_and_uncert(item, p_half_life, data) #check if our claim is already settled correctly in wikidata | |
print('Value of claim from check_claim_and_uncert is : ' + str(claim) + '\n') | |
if claim: # if the claim already exist, we will check if the source exists too and create it if not | |
source = check_source_set(claim, key, source_data) | |
print('Value of source is: ' + str(source) + '\n') | |
if source: | |
pass | |
else: | |
create_source_claim(claim, source_data) | |
else: #if the claim does not exists, we will create it with source | |
claim = set_claim(item, p_half_life, data) | |
create_source_claim(claim, source_data) | |
else: #only one item shall match the search term (uranium-240). If more, the program don't know wich one to settle. | |
print("No result or too many found for {}.", key) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This handbook is a summary of https://m.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial with some parts taken to other tutorials. It is build for quick reference. It is recommanded to read the tutorials before using it. These tutorials are released under Creative Commons Attribution-ShareAlike License. Feel free to copy and adapt the Notebook under this licence.