Skip to content

Instantly share code, notes, and snippets.

@VDK
Last active January 3, 2021 20:23
Show Gist options
  • Save VDK/5f9e63fa7cb17d7e0f74909c611601c2 to your computer and use it in GitHub Desktop.
Save VDK/5f9e63fa7cb17d7e0f74909c611601c2 to your computer and use it in GitHub Desktop.
builds QuickStatments that assigns a family name as a property in Wikidata
# Created by Vera de Kok - aka 1Veertje 01.01.2021
# builds QuickStatments that assigns a family name as a property in Wikidata
# Reads a list of family names from dict.csv
#
# Please carefully check for false positives!
#
from SPARQLWrapper import SPARQLWrapper, JSON
import csv
import time
import requests
import urllib
import re
S = requests.Session()
def get_wikidata(query):
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery(query)
sparql.setReturnFormat(JSON)
return sparql.query().convert()
def get_search_results(qid, name ):
params ={
"action": "query",
"format": "json",
"list": "search",
"srlimit": "500",
"srprop": "sectiontitle",
"srsearch": "haswbstatement:\"P31=Q5\" -haswbstatement:\"P734="+qid+"\" \""+name+"\""
}
results = S.get(url="https://wikidata.org/w/api.php", params=params).json()["query"]
if results['searchinfo']['totalhits'] == 0:
return False
else:
return [results['search'][i]['title'] for i in range(len(results['search']))]
def get_label(qid):
query = """SELECT ?itemLabel WHERE
{
BIND(wd:"""+qid+""" AS ?item)
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en,nl,de" }
}"""
results = get_wikidata(query)["results"]["bindings"]
return [results[i]['itemLabel']['value'] for i in range(len(results))][0]
with open( 'dict.csv', 'r', encoding='UTF8' ) as file:
reader = csv.DictReader(file)
for line in reader:
results = get_search_results(line['qid'], line['name'])
if results != False:
with open("quickstatements.txt","a", encoding='UTF8' ) as file1:
for result in results:
label = get_label(result)
tussenvoegsel = re.search(r'\b(van|de[rsn]?|van de[srn]?|el|(in)? [\'’]t|tot|te[rn]?|op|tot|uij?t|bij|aan|voor|von|Mac|Ó) '+line['name'], label, flags=re.IGNORECASE)
if tussenvoegsel is None:
a = "!!!" if label.find(line['name']) == -1 else ""
qs = result+"|P734|"+line['qid']+ " /* "+label+" + " + line['name'] +" "+ a+"*/\n"
print(qs)
file1.write(qs)
file1.close()
time.sleep(15)
qid name
Q104618551 Stronck
Q104618549 Repelaer van Driel
Q104618548 van Helvoirt Pel
Q104618547 van den Sigtenhorst
Q104618545 Noordewier
Q104614419 Urlus
Q104614418 Rutters
Q104614417 Wierts
Q104614416 Dopper
Q104614414 Sistermans
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment