Skip to content

Instantly share code, notes, and snippets.

@salgo60
Last active September 7, 2023 09:13
Show Gist options
  • Save salgo60/a4ebde4f0a279d5f9479aeaf7b846403 to your computer and use it in GitHub Desktop.
Save salgo60/a4ebde4f0a279d5f9479aeaf7b846403 to your computer and use it in GitHub Desktop.
# Check linkroot
# See question RAÄ FB https://www.facebook.com/riksantikvarieambetet/posts/10158191730201970
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/
# https://gist.github.com/salgo60/a4ebde4f0a279d5f9479aeaf7b846403
from datetime import datetime
import urllib3
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint_url = "https://query.wikidata.org/sparql"
http = urllib3.PoolManager()
# SPARQL used https://w.wiki/Vb8
query = """SELECT ?item ?itemLabel ?Ksam ?oldURL WHERE {
?item wdt:P1260 ?Ksam
BIND (URI(CONCAT("http://kulturarvsdata.se/",?Ksam)) AS ?oldURL)
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}"""
def get_results(endpoint_url, query):
user_agent = "user:salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
return sparql.query().convert()
''' Check if URLS is ok'''
def check(url):
r = http.request('GET', url)
if r.status != 200:
print("Status: ",r.status, " \t", url)
return False
return True
start_time = datetime.now()
print("Last run: ", start_time)
results = get_results(endpoint_url, query)
ok = 0
notok = 0
for result in results["results"]["bindings"]:
#print(result)
currentURL = result["oldURL"]["value"]
if check(currentURL):
ok += 1
else:
notok += 1
print("OK: ",ok,"\t not ok",notok)
end = datetime.now()
print("Ended: ", end)
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time))
@salgo60
Copy link
Author

salgo60 commented Dec 29, 2020

Ny körning idag

OK: 155942 not ok 4608 = total 160550

==> skillnad 29 Jun dvs. 6 månader
OK: +16 not OK: +3

@salgo60
Copy link
Author

salgo60 commented Mar 25, 2021

Ny körning
OK: 159260 not ok 4606
Ended: 2021-03-25 13:00:49.400868
Time elapsed (hh:mm:ss.ms) 1:39:06.335304

==> skillnad 29 Dec 2020 dvs. 3 månader
OK: +3318 not OK: -2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment