Last active
October 18, 2023 09:23
-
-
Save salgo60/09222815d09c6ed26432e9772acdc62b to your computer and use it in GitHub Desktop.
OK: 31168 not ok 20 procent problem 0.06412722842118763
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# P9495 | |
# https://gist.github.com/salgo60/09222815d09c6ed26432e9772acdc62b | |
# | |
import urllib3 | |
import sys | |
from SPARQLWrapper import SPARQLWrapper, JSON | |
from datetime import datetime | |
endpoint_url = "https://query.wikidata.org/sparql" | |
http = urllib3.PoolManager() | |
# Check linkroot https://w.wiki/7nY5 --> | |
# SPARQL | |
query = """ | |
SELECT ?wd ?wdLabel ?oldURL WHERE { | |
?wd wdt:P9495 ?p. | |
BIND (URI(CONCAT("https://samlingar.shm.se/",?p)) AS ?oldURL) | |
# ?wd wdt:P17 wd:Q34. # Sverige | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv". } | |
} | |
""" | |
def get_results(endpoint_url, query): | |
user_agent = "user:salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1]) | |
sparql = SPARQLWrapper(endpoint_url, agent=user_agent) | |
sparql.setQuery(query) | |
sparql.setReturnFormat(JSON) | |
return sparql.query().convert() | |
''' Check if URLS is ok''' | |
def check(url,wd, wdLabel): | |
try: | |
r = http.request('GET', url) | |
except Exception as e: | |
print("\t\tError\t",str(e)) | |
return False | |
if r.status != 200: | |
print("Status: ",r.status, " \t", url, "\tWikidata: ", wd," - ", wdLabel) | |
return False | |
return True | |
start_time = datetime.now() | |
print("Last run: ", start_time) | |
results = get_results(endpoint_url, query) | |
print ("Number records i Wikidata: " + str(len(results["results"]["bindings"]))) | |
ok = 0 | |
notok = 0 | |
for result in results["results"]["bindings"]: | |
#print(result) | |
try: | |
currentURL = result["oldURL"]["value"] | |
wd = result["wd"]["value"] | |
wdLabel = result["wdLabel"]["value"] | |
if check(currentURL,wd, wdLabel): | |
ok += 1 | |
else: | |
notok += 1 | |
except Exception as error: | |
print("An error occurred: ", wd, " - ", type(error).__name__) # An error occurred: NameError | |
procent = notok / (ok + notok) * 100 | |
print("OK: ",ok,"\t not ok",notok,"\t procent problem", procent) | |
end = datetime.now() | |
print("Ended: ", end) | |
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time)) |
Twitter Linnea om dessa döda länkar - bra om shm fanns på GITHUB
Om ni tar bort sidor skapa en "tumbstone page" såmvet vi vad som hänt https://github.com/diggsweden/persistent-identifiers-investigation/issues/13
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
/usr/local/bin/python3.8 "/Users/magnus/Library/Application Support/JetBrains/PyCharmCE2020.3/scratches/scratch_133.py"
Last run: 2023-10-15 01:03:32.726469
Number records i Wikidata: 31188
Status: 404 https://samlingar.shm.se/person/1DAEFAD9-9092-4025-AE9C-6B33C268AEBB Wikidata: http://www.wikidata.org/entity/Q116161013 - Elvira Hilma Augusta de la Gardie
Status: 404 https://samlingar.shm.se/person/075572B7-135E-4882-B258-BE79ED04D475 Wikidata: http://www.wikidata.org/entity/Q5915860 - Knut Jonsson
Status: 404 https://samlingar.shm.se/object/919425C1-7A82-4778-9770-C33BE9586856 Wikidata: http://www.wikidata.org/entity/Q10425763 - Bambergskrinet
Status: 404 https://samlingar.shm.se/object/8BA2743C-5065-438B-9FAA-D854606DB716 Wikidata: http://www.wikidata.org/entity/Q26253636 - Elisabethrelikvariet
Status: 404 https://samlingar.shm.se/person/0D62573A-AF91-4A6A-8D18-6E9BF674D1C8 Wikidata: http://www.wikidata.org/entity/Q774636 - Magyar Nemzeti Bank
Status: 404 https://samlingar.shm.se/person/0325D06F-75DE-4117-8ECF-1FE8442E0D06 Wikidata: http://www.wikidata.org/entity/Q6131983 - Oleg av Kiev
Status: 404 https://samlingar.shm.se/person/AFD46626-853A-418F-9CF0-4DBEC6494566 Wikidata: http://www.wikidata.org/entity/Q41657 - Romanos III Argyros
Status: 404 https://samlingar.shm.se/person/7C1F8EBB-51E9-49F5-8C99-3B9070278EBD Wikidata: http://www.wikidata.org/entity/Q41847 - Romanos II
Status: 404 https://samlingar.shm.se/peson/A7A5CDE0-A4A7-4073-88FB-517FA03F383A Wikidata: http://www.wikidata.org/entity/Q60232259 - Q60232259
Status: 404 https://samlingar.shm.se/person/5672732A-7CD1-4ECC-AC66-D76988DC7E4D Wikidata: http://www.wikidata.org/entity/Q873255 - Q873255
Status: 404 https://samlingar.shm.se/person/17979946-C8EE-4A46-9876-21D6A705BDEA Wikidata: http://www.wikidata.org/entity/Q5789138 - Hans Hansson
Status: 404 https://samlingar.shm.se/person/00138EDA-18B8-4393-9C5B-96FBD5C209ED Wikidata: http://www.wikidata.org/entity/Q29342013 - Per Vilhelm Lundström
Status: 404 https://samlingar.shm.se/person/69D000E9-0996-4D42-A719-2CF49CA444A1 Wikidata: http://www.wikidata.org/entity/Q6014469 - Arthur Nordén
Status: 404 https://samlingar.shm.se/person/A53CB9BB-2854-458B-BFF6-A3E98A160318 Wikidata: http://www.wikidata.org/entity/Q59530627 - Q59530627
Status: 404 https://samlingar.shm.se/person/4FFCF4AC-538C-4DB4-B938-FE0B13ADFAB7 Wikidata: http://www.wikidata.org/entity/Q102035715 - Orvar Isberg
Status: 404 https://samlingar.shm.se/person/28EEB40D-632A-4F38-B949-52E80D39AAE1 Wikidata: http://www.wikidata.org/entity/Q26275315 - Bo Gyllensvärd
Status: 404 https://samlingar.shm.se/person/2F49F3D8-B504-4212-9C9C-40462C37DBD7 Wikidata: http://www.wikidata.org/entity/Q30106570 - Edvard Ehrenstéen
Status: 404 https://samlingar.shm.se/person/999DA642-2D1A-46F7-843D-C658BA3C2759 Wikidata: http://www.wikidata.org/entity/Q6190360 - August Wilhelm Stiernstedt
Status: 404 https://samlingar.shm.se/person/837F6974-5D1D-44B3-9153-959BF3C5C38F Wikidata: http://www.wikidata.org/entity/Q55948207 - Wolfgang Jacobeit
Status: 404 https://samlingar.shm.se/person/B02D8C40-765B-49E3-9DAF-6FF03614C68B Wikidata: http://www.wikidata.org/entity/Q5825704 - Valdar Jaanusson
OK: 31168 not ok 20 procent problem 0.06412722842118763
Ended: 2023-10-15 01:21:58.799947
Time elapsed (hh:mm:ss.ms) 0:18:26.073511
Process finished with exit code 0