Last active
September 7, 2023 09:13
-
-
Save salgo60/a4ebde4f0a279d5f9479aeaf7b846403 to your computer and use it in GitHub Desktop.
Test 2021 same test 2023 https://gist.github.com/salgo60/49c52e1f7009f0ef318e9fadd94addc5
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Check linkroot | |
# See question RAÄ FB https://www.facebook.com/riksantikvarieambetet/posts/10158191730201970 | |
# pip install sparqlwrapper | |
# https://rdflib.github.io/sparqlwrapper/ | |
# https://gist.github.com/salgo60/a4ebde4f0a279d5f9479aeaf7b846403 | |
from datetime import datetime | |
import urllib3 | |
import sys | |
from SPARQLWrapper import SPARQLWrapper, JSON | |
endpoint_url = "https://query.wikidata.org/sparql" | |
http = urllib3.PoolManager() | |
# SPARQL used https://w.wiki/Vb8 | |
query = """SELECT ?item ?itemLabel ?Ksam ?oldURL WHERE { | |
?item wdt:P1260 ?Ksam | |
BIND (URI(CONCAT("http://kulturarvsdata.se/",?Ksam)) AS ?oldURL) | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". } | |
}""" | |
def get_results(endpoint_url, query): | |
user_agent = "user:salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1]) | |
sparql = SPARQLWrapper(endpoint_url, agent=user_agent) | |
sparql.setQuery(query) | |
sparql.setReturnFormat(JSON) | |
return sparql.query().convert() | |
''' Check if URLS is ok''' | |
def check(url): | |
r = http.request('GET', url) | |
if r.status != 200: | |
print("Status: ",r.status, " \t", url) | |
return False | |
return True | |
start_time = datetime.now() | |
print("Last run: ", start_time) | |
results = get_results(endpoint_url, query) | |
ok = 0 | |
notok = 0 | |
for result in results["results"]["bindings"]: | |
#print(result) | |
currentURL = result["oldURL"]["value"] | |
if check(currentURL): | |
ok += 1 | |
else: | |
notok += 1 | |
print("OK: ",ok,"\t not ok",notok) | |
end = datetime.now() | |
print("Ended: ", end) | |
print('Time elapsed (hh:mm:ss.ms) {}'.format(datetime.now() - start_time)) |
Ny körning
OK: 159260 not ok 4606
Ended: 2021-03-25 13:00:49.400868
Time elapsed (hh:mm:ss.ms) 1:39:06.335304
==> skillnad 29 Dec 2020 dvs. 3 månader
OK: +3318 not OK: -2
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ny körning idag
OK: 155942 not ok 4608 = total 160550
==> skillnad 29 Jun dvs. 6 månader
OK: +16 not OK: +3