Last active
May 23, 2020 00:10
-
-
Save salgo60/7971a5d70baa080d342068b6f27203df to your computer and use it in GitHub Desktop.
Check Wikidata property P3188 quality comoared wuth the new P8024
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Check the quality of the Wikidata Property P3188 | |
# See discussion if we should delete it or not | |
# https://www.wikidata.org/wiki/Wikidata:Properties_for_deletion#Nobelpris-ID_(P3188) | |
# | |
import sys, urllib3 | |
from SPARQLWrapper import SPARQLWrapper, JSON | |
version = "0.0.2" | |
endpoint_url = "https://query.wikidata.org/sparql" | |
# SPARQL https://w.wiki/Rbb | |
query = """SELECT ?item ?itemLabel ?P3188 ?P8024 ?P3188link ?NewStableLink WHERE { | |
{?item wdt:P8024 ?P8024} | |
OPTIONAL {?item wdt:P3188 ?P3188} | |
BIND(URI(CONCAT("https://www.nobelprize.org/laureate/",?P8024)) AS ?NewStableLink) | |
BIND(URI(CONCAT("https://www.nobelprize.org/nobel_prizes/",?P3188,"-facts.html")) AS ?P3188link) | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". } | |
} order by desc(xsd:integer(?P8024))""" | |
def get_results(endpoint_url, query): | |
user_agent = "WDQS Nobelprize Salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1]) | |
sparql = SPARQLWrapper(endpoint_url, agent=user_agent) | |
sparql.setQuery(query) | |
sparql.setReturnFormat(JSON) | |
return sparql.query().convert() | |
results = get_results(endpoint_url, query) | |
def checkDeadLink(link,workinglink,WD): | |
http = urllib3.PoolManager() | |
try: | |
r = http.request("GET",link) | |
if r.status == 404: | |
print("\n\tWD", WD) | |
print("\tCheck", link) | |
print("\t\tStatus 404 page missing compare ",workinglink) | |
# print("\t\tStatus: ", r.status) | |
except Exception as e: | |
print("\tIn Error",link) | |
print(e) | |
pass | |
print("\nCheckQualityofWikidataPropertyP3188 version: ",version,"\n\n") | |
for result in results["results"]["bindings"]: | |
#print(result) | |
#print(result["P8024"]["value"]," WD: ",result["P8024"]["value"]) | |
WDentity = result["item"]["value"].replace("http://www.wikidata.org/entity/","") | |
if "P3188link" not in result: | |
print("\nMissing P3188 for :" ,result["itemLabel"]["value"]," ,",WDentity | |
," ",result["item"]["value"]) | |
else: | |
#print(result["P3188link"]["value"]) | |
checkDeadLink(result["P3188link"]["value"],result["NewStableLink"]["value"],WDentity) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
/usr/local/bin/python3.8 /Users/magnus/Library/Preferences/PyCharmCE2019.2/scratches/scratch_61.py
CheckQualityofWikidataPropertyP3188 version: 0.0.2
Missing P3188 for : Michael Kremer , Q1635006 http://www.wikidata.org/entity/Q1635006
Missing P3188 for : Abiy Ahmed , Q50365049 http://www.wikidata.org/entity/Q50365049
Missing P3188 for : Peter Handke , Q44107 http://www.wikidata.org/entity/Q44107
Missing P3188 for : Akira Yoshino , Q4701206 http://www.wikidata.org/entity/Q4701206
Missing P3188 for : M. Stanley Whittingham , Q285062 http://www.wikidata.org/entity/Q285062
Missing P3188 for : John B. Goodenough , Q906529 http://www.wikidata.org/entity/Q906529
Missing P3188 for : Didier Queloz , Q124013 http://www.wikidata.org/entity/Q124013
Missing P3188 for : Michel Mayor , Q123975 http://www.wikidata.org/entity/Q123975
Missing P3188 for : Jim Peebles , Q728331 http://www.wikidata.org/entity/Q728331
Missing P3188 for : Gregg L. Semenza , Q1545025 http://www.wikidata.org/entity/Q1545025
Missing P3188 for : Peter J. Ratcliffe , Q2075967 http://www.wikidata.org/entity/Q2075967
Missing P3188 for : William G. Kaelin , Q1603351 http://www.wikidata.org/entity/Q1603351
Missing P3188 for : Paul M. Romer , Q509262 http://www.wikidata.org/entity/Q509262
Missing P3188 for : Denis Mukwege , Q1187542 http://www.wikidata.org/entity/Q1187542
Missing P3188 for : Donna Strickland , Q56855591 http://www.wikidata.org/entity/Q56855591
Missing P3188 for : Gérard Mourou , Q556543 http://www.wikidata.org/entity/Q556543
Missing P3188 for : James P. Allison , Q6140731 http://www.wikidata.org/entity/Q6140731
Missing P3188 for : International Campaign to Abolish Nuclear Weapons , Q547940 http://www.wikidata.org/entity/Q547940
Missing P3188 for : Kazuo Ishiguro , Q272855 http://www.wikidata.org/entity/Q272855
Missing P3188 for : Michael W. Young , Q1929672 http://www.wikidata.org/entity/Q1929672
Missing P3188 for : Michael Rosbash , Q1297741 http://www.wikidata.org/entity/Q1297741
Missing P3188 for : Jeffrey C. Hall , Q1686435 http://www.wikidata.org/entity/Q1686435