Last active
September 5, 2020 07:19
-
-
Save salgo60/61523c1ca62bae248945468784c3d23c to your computer and use it in GitHub Desktop.
Riksarkivet NAD kollas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install sparqlwrapper | |
# https://rdflib.github.io/sparqlwrapper/ | |
import sys | |
import urllib3 | |
from SPARQLWrapper import SPARQLWrapper, JSON | |
endpoint_url = "https://query.wikidata.org/sparql" | |
http = urllib3.PoolManager() | |
# | |
# SPARQL https://w.wiki/bJV | |
queryParish = """SELECT (REPLACE(STR(?item), ".*Q", "Q") AS ?qid) ?itemLabel ?itemDescription ?NADvalue WHERE { | |
?item wdt:P5324 ?NADvalue. | |
?item wdt:P31 wd:Q615980 | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". } | |
} order by ?itemLabel """ | |
# SPARQL https://w.wiki/bJX | |
query = """SELECT (REPLACE(STR(?item), ".*Q", "Q") AS ?qid) ?itemLabel ?itemDescription ?NADvalue WHERE { | |
?item wdt:P5324 ?NADvalue. | |
minus {?item wdt:P31 wd:Q615980} | |
SERVICE wikibase:label { bd:serviceParam wikibase:language "sv,en". } | |
} order by ?itemLabel """ | |
def get_results(endpoint_url, query): | |
user_agent = "user Salgo50/%s.%s" % (sys.version_info[0], sys.version_info[1]) | |
sparql = SPARQLWrapper(endpoint_url, agent=user_agent) | |
sparql.setQuery(query) | |
sparql.setReturnFormat(JSON) | |
return sparql.query().convert() | |
''' Check if URLS is ok''' | |
def check(url): | |
r = http.request('GET', url) | |
#print("\tStatus: ", r.status, " \t", url) | |
if r.status == 404: | |
#print("Status: ",r.status, " \t", url) | |
return False | |
return True | |
def checkResult(results): | |
for result in results["results"]["bindings"]: | |
#print(result) | |
global ok, notok | |
currentURL = "https://sok.riksarkivet.se/?postid=ArkisRef%20" + result["NADvalue"]["value"] | |
if check(currentURL): | |
ok += 1 | |
else: | |
notok += 1 | |
if notok == 1: | |
print("Errors") | |
print("\t", result["qid"]["value"], "|", result["itemLabel"]["value"], "|", result["NADvalue"]["value"], | |
"|", currentURL) | |
return | |
ok = 0 | |
notok = 0 | |
results = get_results(endpoint_url, queryParish) | |
checkResult(results) | |
results = get_results(endpoint_url, query) | |
checkResult(results) | |
print("OK: ",ok,"\t not ok",notok) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
2020 sep 5