Skip to content

Instantly share code, notes, and snippets.

@salgo60
Created June 29, 2020 12:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save salgo60/110cc4eb2541b025de92f96c4546cdc2 to your computer and use it in GitHub Desktop.
Save salgo60/110cc4eb2541b025de92f96c4546cdc2 to your computer and use it in GitHub Desktop.
Check Link root
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/
import urllib3
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint_url = "https://query.wikidata.org/sparql"
http = urllib3.PoolManager()
# SPARQL used https://w.wiki/VVk
query = """# www.bebyggelseregistret.raa.se/bbr2/anlaggning/visaHistorik.raa?anlaggningId=21300000008136
# läggs upp istället för raa/bbr/21300000008136.
# Det skulle förmodligen funka generellt för alla sådana
SELECT ?item ?itemLabel ?Ksam ?oldURL ?oldhtmlURL ?newURL ?before ?before2 ?before3 WHERE {
?item wdt:P1260 ?Ksam
BIND (strbefore(?Ksam,"/") AS ?before)
BIND (strafter(?Ksam,"/") AS ?after)
BIND (strbefore(?after,"/") AS ?before2)
BIND (strafter(?after,"/") AS ?before3)
FILTER((LCASE(?before) = "raa") && (LCASE(?before2) = "bbr"))
# FILTER((LCASE(?before3) = "21300000008136"))
BIND (URI(CONCAT("http://www.bebyggelseregistret.raa.se/bbr2/anlaggning/visaHistorik.raa?anlaggningId=",?before3)) AS ?newURL)
BIND (URI(CONCAT("http://kulturarvsdata.se/",?Ksam)) AS ?oldURL)
BIND (URI(CONCAT("http://kulturarvsdata.se/",?before,"/",?before2,"/html/",?before3)) AS ?oldhtmlURL)
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
}"""
def get_results(endpoint_url, query):
user_agent = "user:salgo60/%s.%s" % (sys.version_info[0], sys.version_info[1])
sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
return sparql.query().convert()
results = get_results(endpoint_url, query)
''' Check if URLS is ok'''
def check(url):
r = http.request('GET', url)
if r.status != 200:
print("Status: ",r.status, " \t", url)
return False
return True
ok = 0
notok = 0
for result in results["results"]["bindings"]:
#print(result)
currentURL = result["oldURL"]["value"]
if check(currentURL):
ok += 1
else:
notok += 1
print("OK: ",ok,"\t not ok",notok)
@salgo60
Copy link
Author

salgo60 commented Jun 1, 2022

OK: 6553 not ok 92

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment