Skip to content

Instantly share code, notes, and snippets.

@thibaut-d
Last active March 2, 2019 22:47
Show Gist options
  • Save thibaut-d/205d93aa801ef2549642730ea6bf2410 to your computer and use it in GitHub Desktop.
Save thibaut-d/205d93aa801ef2549642730ea6bf2410 to your computer and use it in GitHub Desktop.
Cheatsheet for Pywikibot from the doc
import pywikibot
wikidata_site = pywikibot.Site("wikidata", "wikidata")
wikidata_repo = wikidata_site.data_repository()
page = pywikibot.Page(wikidata_site, 'Wikidata:WikiProject_Materials/Test')
print(page.exists())
print(page.text))
page.text = 'Hello world !!!'
page.save(u"Testing Pywikibot (sandbox page)") #Beware, this replace all the content !
print(page.text)
import pywikibot
wikidata_site = pywikibot.Site("wikidata", "wikidata")
wikidata_repo = wikidata_site.data_repository()
item = pywikibot.ItemPage(wikidata_repo, "Q2225")
print(item)
title = item.title() # Return the Qid
properties = item.properties() #return properties
item_dict = item.get() # Get the data of the item as dict
clm_dict = item_dict["claims"] # Get the claims of the item
clm_list = clm_dict["P2069"] # Get the claims for P2069
sitelinks = item.sitelinks # Get all the sitelinks
aliases = item.aliases # Get all the alias
if 'en' in item.labels:
print('The label in English is: ' + item.labels['en']) #Get the english label
if item.claims:
if 'P31' in item.claims: # Check if the item has P31 property (P31 is "instance of" so most have)
print(item.claims['P31'][0].getTarget()) # Get the target of the first P31 related claim
print(item.claims['P31'][0].sources[0]) # Same for souces (since many don't have, an if would be needed here too)
#For each claim in the claim list get the content of the claim
for clm in clm_list:
print(clm.toJSON()) # print a dict
rank = clm.rank # "normal"
claim_id = clm.id # "P2069"
is_reference = clm.isReference # False
snak = clm.snak # "Q2225$edaaaf4e-48fd-6503-016c-27d857e55f40"
claim_item = clm.on_item # an item object that prints [[wikidata:Q2225]]
clm_trgt = clm.getTarget() # dict
if type(clm_trgt) == pywikibot.WbQuantity :
amount = clm_trgt.amount # -1.00115965218076
unit = clm_trgt.unit # http://www.wikidata.org/entity/Q737120
lower_bound = clm_trgt.lowerBound # -1.00115965218077
upper_bound = clm_trgt.upperBound # -1.00115965218075
import pywikibot
from pywikibot import pagegenerators as pg
wikidata_site = pywikibot.Site("wikidata", "wikidata")
wikidata_repo = wikidata_site.data_repository()
with open('pka-query.rq', 'r') as query_file: #The Query is stored in a separated file.
QUERY = query_file.read()
print('The content of pka-query.rq is:\n\n'+QUERY)
wikidata_site = pywikibot.Site("wikidata", "wikidata")
generator = pg.WikidataSPARQLPageGenerator(QUERY, site=wikidata_site)
print('\n\nItems in the generator are:')
for item in generator:
print(item)
import pywikibot
from pywikibot import pagegenerators as pg
wikidata_site = pywikibot.Site("wikidata", "wikidata")
wikidata_repo = wikidata_site.data_repository()
property = 'P1117'
QUERY2 = f'''
SELECT ?item ?value
WHERE
{{
?item wdt:{property} ?value .
}}
'''
generator2 = pg.WikidataSPARQLPageGenerator(QUERY2, site=wikidata_site)
print('Items in the generator are:')
for item in generator2:
print(item)
import pywikibot
site = pywikibot.Site("test", "wikidata")
repo = site.data_repository()
item = pywikibot.ItemPage(repo, "Q194617")
new_labels = {"en": "bear2", "de": "Bär2"}
new_descr = {"en": "gentle creature of the forrest2", "de": "Friedlicher Waldbewohner2"}
new_alias = {"en": ["brown bear2", "grizzly bear2", "polar bear2"], "de": ["Braunbär2", "Grizzlybär2", "Eisbär2"]}
item.editLabels(labels=new_labels, summary="Setting new labels2.")
item.editDescriptions(new_descr, summary="Setting new descriptions2.")
item.editAliases(new_alias, summary="Setting new aliases2.")
#Or we can use the general editEntity() function :
data = {"labels": {"en": "bear", "de": "Bär"},
"descriptions": {"en": "gentle creature of the forrest", "de": "Friedlicher Waldbewohner"},
"aliases": {"en": ["brown bear", "grizzly bear", "polar bear"], "de": ["Braunbär", "Grizzlybär", "Eisbär"]},
"sitelinks": [{"site": "enwiki", "title": "Bear"}, {"site": "dewiki", "title": "Bär"}]}
item.editEntity(data, summary=u'Edited item: set labels, descriptions, aliases')
import pywikibot
from pywikibot import pagegenerators as pg
wikidata_site = pywikibot.Site("wikidata", "wikidata")
wikidata_repo = wikidata_site.data_repository()
property = "P462" # The property "Color" that is to be settled to an item representing a color.
#The error dict has homonymes as keys and colors as values.
#The, items having the color property set a key have to be changed for the value.
error_dict = {"Q13191": "Q39338", #orange - "fruit": "color"
"Q897": "Q208045", #gold - "element": "color"
"Q753": "Q2722041", #copper - "element": "color"
"Q25381": "Q679355", #amber - "material": "color"
"Q134862": "Q5069879", #champagne - "drink": "color"
"Q1090": "Q317802", #silver - "element": "color"
"Q1173": "Q797446", #burgundy - "region": "color
"Q13411121": "Q5148721", #peach - "fruit": "color"
}
def correct_claim(generator, key):
'''This function call call a generator result to find a list of items having
the color property setted to a key, that represent an homonyme.'''
for page in generator:
item_dict = page.get() #calling the dictionary containing all the values of the item.
claim_list = item_dict["claims"][property] #calling the claims related the color property (variable settled above)
for claim in claim_list:
trgt = claim.getTarget()
if trgt.id == key: #if a claim target is settled to a key of the errors dictionary
print(f'Correcting {key} to {error_dict[key]}')
correct_page = pywikibot.ItemPage(wikidata_repo, error_dict[key], 0) #gettting the right value for the property from the error dict
claim.changeTarget(correct_page) #changing the target to the right value
for key in error_dict:
query = f'''
SELECT ?item
WHERE
{{
?item wdt:{property} wd:{key} .
}}
''' #calls items having color settle to an homonyme.
generator = pg.WikidataSPARQLPageGenerator(query, site=wikidata_site) #a generator store these items
generator = wikidata_site.preloadpages(generator, pageprops=True) #improves performance ?
correct_claim(generator, key)
import pywikibot
from pywikibot.data import api
import pprint
# FIXME Hardcoded for test.wikidata
# Define properties and data
p_stated_in = "P149"
p_half_life = "P525"
p_ref_url = "P93"
precision = 10 ** -10
# data = [quantity, uncertainty, unit (Q1748 = hours)]
# source = [stated in item, ref url]
half_life_data = {"uranium-240": {"data": ["14.1", "0.1", "Q1748"],
"source": ["Q1751", "http://www.nndc.bnl.gov/chart/reCenter.jsp?z=92&n=148"]}
}
site = pywikibot.Site("test", "wikidata") #Please only modify the test site unless you know what you do !
repo = site.data_repository()
def get_items(site, item_title):
"""
Requires a site and search term (item_title) and returns the results.
"""
params = {"action": "wbsearchentities",
"format": "json",
"language": "en",
"type": "item",
"search": item_title}
request = api.Request(site=site, **params)
return request.submit()
def check_claim_and_uncert(item, property, data):
"""
Requires a property, value, uncertainty and unit and returns boolean.
Returns the claim that fits into the defined precision or None.
This will be used to see if the claim is already settled
"""
item_dict = item.get()
value, uncert, unit = data
value, uncert = float(value), float(uncert)
try:
claims = item_dict["claims"][property]
except:
return None
try:
claim_exists = False
uncert_set = False
for claim in claims:
wb_quant = claim.getTarget()
delta_amount = wb_quant.amount - value
if abs(delta_amount) < precision:
claim_exists = True
delta_lower = wb_quant.amount - wb_quant.lowerBound
delta_upper = wb_quant.upperBound - wb_quant.amount
check_lower = abs(uncert - delta_lower) < precision
check_upper = abs(delta_upper - uncert) < precision
if check_upper and check_lower:
uncert_set = True
if claim_exists and uncert_set:
return claim
except:
return None
def check_source_set(claim, property, data):
"""
Takes a claim, a property and data.
Return a boolean
This will be used to see if the property is already settled
"""
source_claims = claim.getSources()
if len(source_claims) == 0:
return False #if there is no sources, return false
for source in source_claims:
try:
stated_in_claim = source[p_stated_in] #check if the property "stated in" is set
except:
return False #if not, we can create a new reference
for claim in stated_in_claim:
trgt = claim.target
if trgt.id == data[0]:
return True # return true only if the claim exists and is settled accoding to our import dataset
def set_claim(item, property, data):
"""
Set the claim's property according to our import data
"""
value, uncert, unit = data #get these variables for our import data
value, uncert = float(value), float(uncert)
claim = pywikibot.Claim(repo, property) #create a claim object with wanted property
unit_item = pywikibot.ItemPage(repo, unit) #create a unit item object with wanted unit
entity_helper_string = "http://test.wikidata.org/entity/Q1748".format()
wb_quant = pywikibot.WbQuantity(value, entity_helper_string, uncert) #create a quantity object with wanted quantity
claim.setTarget(wb_quant) #modify the created claim object and add quantity
item.addClaim(claim, bot=False, summary="Adding half-life claim from NNDC.") #by the end we can add the claim object in the item
print('Running set_claim...\n On item: ' + str(item) + '\n Setting claim:\n' + str(claim) + '\n')
return claim
def create_source_claim(claim, source_data):
trgt_item, ref_url = source_data
trgt_itempage = pywikibot.ItemPage(repo, trgt_item) #create an item object for the source
source_claim = pywikibot.Claim(repo, p_stated_in, isReference=True) #create the claim object for the source
source_claim.setTarget(trgt_itempage) #set the item source object as a target for the claim object
claim.addSources([source_claim]) #by the end we can add the source to the claim
print('Running create_source_claim...\n\n On claim :\n' + str(claim) + '\n\n Setting source:\n' + str(source_claim) + '\n')
return True
for key in half_life_data: #since there is actually only 1 key in our example the loop will run once
search_results = get_items(site, key)
print('Value of search_result is:\n' + str(search_results) + '\n')
if len(search_results["search"]) == 1: #only one item shall match the search term (uranium-240)
item = pywikibot.ItemPage(repo, search_results["search"][0]["id"]) #we will modifiy this item
print('Value of item from results is:' + str(item) + '\n')
data = half_life_data[key]["data"] #getting the value of the property from the data we want to import
print('Value of data is:' + str(data) + '\n')
source_data = half_life_data[key]["source"] #getting the value of the source of the property from the data we want to import
print('Value of source_data is:' + str(source_data) + '\n')
claim = check_claim_and_uncert(item, p_half_life, data) #check if our claim is already settled correctly in wikidata
print('Value of claim from check_claim_and_uncert is : ' + str(claim) + '\n')
if claim: # if the claim already exist, we will check if the source exists too and create it if not
source = check_source_set(claim, key, source_data)
print('Value of source is: ' + str(source) + '\n')
if source:
pass
else:
create_source_claim(claim, source_data)
else: #if the claim does not exists, we will create it with source
claim = set_claim(item, p_half_life, data)
create_source_claim(claim, source_data)
else: #only one item shall match the search term (uranium-240). If more, the program don't know wich one to settle.
print("No result or too many found for {}.", key)
@thibaut-d
Copy link
Author

This handbook is a summary of https://m.wikidata.org/wiki/Wikidata:Pywikibot_-_Python_3_Tutorial with some parts taken to other tutorials. It is build for quick reference. It is recommanded to read the tutorials before using it. These tutorials are released under Creative Commons Attribution-ShareAlike License. Feel free to copy and adapt the Notebook under this licence.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment