Skip to content

Instantly share code, notes, and snippets.

@ettorerizza
Created July 2, 2017 17:05
Show Gist options
  • Save ettorerizza/9c6f3995b4d2ba8eb13e4bff87d4792f to your computer and use it in GitHub Desktop.
Save ettorerizza/9c6f3995b4d2ba8eb13e4bff87d4792f to your computer and use it in GitHub Desktop.
OpenRefine/Jython sparql query (find possible locations and persons in tokens)
import sys
sys.path.append(r'D:\jython2.7.0\Lib\site-packages')
from SPARQLWrapper import SPARQLWrapper, JSON
from langdetect import detect
dbpedia_version = "http://dbpedia.org/sparql"
#TEST
value = "comptoir"
#detect language (useless with short tokens)
lang_query = detect(value)
if lang_query == "fr":
dbpedia = "http://fr.dbpedia.org/sparql"
elif lang_query == "nl":
dbpedia = "http://nl.dbpedia.org/sparql"
else:
dbpedia = "http://dbpedia.org/sparql"
def get_sparql_label(value, dbpedia_version):
dbpedia_version = dbpedia
sparql = SPARQLWrapper(dbpedia_version)
sparql.setQuery("""
SELECT DISTINCT ?entity ?score1 ?type
WHERE{
?entity ?p ?label.
?entity ?q ?abstract.
Filter langMatches(lang(?label),"%s").
Filter langMatches(lang(?abstract),"%s").
?label <bif:contains> "'%s'" OPTION(score ?score1).
FILTER (?p=<http://www.w3.org/2000/01/rdf-schema#label> ||
?p=<http://www.w3.org/2004/02/skos/core#prefLabel>).
FILTER (?q=<http://dbpedia.org/ontology/abstract>).
?entity a ?type.
FILTER (?type IN (<http://dbpedia.org/ontology/Place>,
<http://dbpedia.org/ontology/Agent>)).
FILTER isIRI(?entity).
} ORDER BY desc(?score1) LIMIT 5
""" % (lang_query, lang_query, value))
sparql.setReturnFormat(JSON)
results=sparql.query().convert()
return results
results=get_sparql_label(value, dbpedia)
liste=[]
for result in results["results"]["bindings"]:
liste.append(result["type"]["value"] + "||" + result["entity"]["value"])
if not liste:
dbpedia_version="http://nl.dbpedia.org/sparql"
lang_query = "NL"
results=get_sparql_label(value, dbpedia_version)
for result in results["results"]["bindings"]:
liste.append(result["type"]["value"] +
"||" + result["entity"]["value"])
print(liste)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment