Last active
October 18, 2023 09:23
-
-
Save salgo60/09222815d09c6ed26432e9772acdc62b to your computer and use it in GitHub Desktop.
OK: 31168 not ok 20 procent problem 0.06412722842118763
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# P9495 | |
# https://gist.github.com/salgo60/09222815d09c6ed26432e9772acdc62b | |
# | |
import urllib3 | |
import sys | |
from SPARQLWrapper import SPARQLWrapper, JSON | |
from datetime import datetime | |
endpoint_url = "https://query.wikidata.org/sparql" | |
http = urllib3.PoolManager() | |
# Check linkroot https://w.wiki/7nY5 --> | |
# SPARQL | |
query = """ | |
SELECT ?wd ?wdLabel ?oldURL WHERE { | |
?wd wdt:P9495 ?p. | |
BIND (URI(CONCAT("https://samlingar.shm.se/",?p)) AS ?oldURL) | |
# ?wd wdt:P17 wd:Q34. # Sverige | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv". } | |
} | |
""" | |
def get_results(endpoint_url, query): | |
user_agent = "user:salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1]) | |
sparql = SPARQLWrapper(endpoint_url, agent=user_agent) | |
sparql.setQuery(query) | |
sparql.setReturnFormat(JSON) | |
return sparql.query().convert() | |
''' Check if URLS is ok''' | |
def check(url,wd, wdLabel): | |
try: | |
r = http.request('GET', url) | |
except Exception as e: | |
print("\t\tError\t",str(e)) | |
return False | |
if r.status != 200: | |
print("Status: ",r.status, " \t", url, "\tWikidata: ", wd," - ", wdLabel) | |
return False | |
return True | |
start_time = datetime.now() | |
print("Last run: ", start_time) | |
results = get_results(endpoint_url, query) | |
print ("Number records i Wikidata: " + str(len(results["results"]["bindings"]))) | |
ok = 0 | |
notok = 0 | |
for result in results["results"]["bindings"]: | |
#print(result) | |
try: | |
currentURL = result["oldURL"]["value"] | |
wd = result["wd"]["value"] | |
wdLabel = result["wdLabel"]["value"] | |
if check(currentURL,wd, wdLabel): | |
ok += 1 | |
else: | |
notok += 1 | |
except Exception as error: | |
print("An error occurred: ", wd, " - ", type(error).__name__) # An error occurred: NameError | |
procent = notok / (ok + notok) * 100 | |
print("OK: ",ok,"\t not ok",notok,"\t procent problem", procent) | |
end = datetime.now() | |
print("Ended: ", end) | |
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment