Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Last active October 30, 2019 11:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save psychemedia/7ce6c59199f4d7cbfd7aee0451cf074a to your computer and use it in GitHub Desktop.
Save psychemedia/7ce6c59199f4d7cbfd7aee0451cf074a to your computer and use it in GitHub Desktop.
Example of finding relationships between band genres using SPARQL queries over DBPedia Linked Data
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# Import the necessary packages
from SPARQLWrapper import SPARQLWrapper, JSON
# Add some helper functions
# A function that will return the results of running a SPARQL query with
# a defined set of prefixes over a specified endpoint.
# It follows the same five-step process apart from creating the query, which
# is provided as an argument to the function.
def runQuery(endpoint, prefix, q):
''' Run a SPARQL query with a declared prefix over a specified endpoint '''
sparql = SPARQLWrapper(endpoint)
sparql.setQuery(prefix+q) # concatenate the strings representing the prefixes and the query
sparql.setReturnFormat(JSON)
return sparql.query().convert()
# Import pandas to provide facilities for creating a DataFrame to hold results
import pandas as pd
# Function to convert query results into a DataFrame
# The results are assumed to be in JSON format and therefore the Python dictionary will have
# the results indexed by 'results' and then 'bindings'.
def dict2df(results):
''' A function to flatten the SPARQL query results and return the column values '''
data = []
for result in results["results"]["bindings"]:
tmp = {}
for el in result:
tmp[el] = result[el]['value']
data.append(tmp)
df = pd.DataFrame(data)
return df
# Function to run a query and return results in a DataFrame
def dfResults(endpoint, prefix, q):
''' Generate a data frame containing the results of running
a SPARQL query with a declared prefix over a specified endpoint '''
return dict2df(runQuery(endpoint, prefix, q))
# Print a limited number of results of a query
def printQuery(results, limit=''):
''' Print the results from the SPARQL query '''
resdata = results["results"]["bindings"]
if limit != '':
resdata = results["results"]["bindings"][:limit]
for result in resdata:
for ans in result:
print('{0}: {1}'.format(ans, result[ans]['value']))
print()
# Run a query and print out a limited number of results
def printRunQuery(endpoint, prefix, q, limit=''):
''' Print the results from the SPARQL query '''
results = runQuery(endpoint, prefix, q)
printQuery(results, limit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment