Skip to content

Instantly share code, notes, and snippets.

@salgo60
Last active September 5, 2020 07:19
Show Gist options
  • Save salgo60/61523c1ca62bae248945468784c3d23c to your computer and use it in GitHub Desktop.
Save salgo60/61523c1ca62bae248945468784c3d23c to your computer and use it in GitHub Desktop.
Riksarkivet NAD kollas
# pip install sparqlwrapper
# https://rdflib.github.io/sparqlwrapper/
import sys
import urllib3
from SPARQLWrapper import SPARQLWrapper, JSON
endpoint_url = "https://query.wikidata.org/sparql"
http = urllib3.PoolManager()
#
# SPARQL https://w.wiki/bJV
queryParish = """SELECT (REPLACE(STR(?item), ".*Q", "Q") AS ?qid) ?itemLabel ?itemDescription ?NADvalue WHERE {
?item wdt:P5324 ?NADvalue.
?item wdt:P31 wd:Q615980
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
} order by ?itemLabel """
# SPARQL https://w.wiki/bJX
query = """SELECT (REPLACE(STR(?item), ".*Q", "Q") AS ?qid) ?itemLabel ?itemDescription ?NADvalue WHERE {
?item wdt:P5324 ?NADvalue.
minus {?item wdt:P31 wd:Q615980}
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". }
} order by ?itemLabel """
def get_results(endpoint_url, query):
user_agent = "user Salgo50/%s.%s" % (sys.version_info[0], sys.version_info[1])
sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
return sparql.query().convert()
''' Check if URLS is ok'''
def check(url):
r = http.request('GET', url)
#print("\tStatus: ", r.status, " \t", url)
if r.status == 404:
#print("Status: ",r.status, " \t", url)
return False
return True
def checkResult(results):
for result in results["results"]["bindings"]:
#print(result)
global ok, notok
currentURL = "https://sok.riksarkivet.se/?postid=ArkisRef%20" + result["NADvalue"]["value"]
if check(currentURL):
ok += 1
else:
notok += 1
if notok == 1:
print("Errors")
print("\t", result["qid"]["value"], "|", result["itemLabel"]["value"], "|", result["NADvalue"]["value"],
"|", currentURL)
return
ok = 0
notok = 0
results = get_results(endpoint_url, queryParish)
checkResult(results)
results = get_results(endpoint_url, query)
checkResult(results)
print("OK: ",ok,"\t not ok",notok)
@salgo60
Copy link
Author

salgo60 commented Sep 5, 2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment