Last active
September 26, 2020 22:42
-
-
Save jvfe/9a8efde35650492f5f1afee4acef82fb to your computer and use it in GitHub Desktop.
Check a wikidata sparql query against a ShEx schema
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyshex.shex_evaluator import ShExEvaluator | |
from pyshex.user_agent import SlurpyGraphWithAgent | |
from pyshex.utils.sparql_query import SPARQLQuery | |
import pandas as pd | |
def test_query_against_shex(schema, sparql): | |
"""Checks the items resulting from a Wikidata query against a shex schema | |
Using PyShEx, we can check the validity of multiple items against a predefined | |
ShEx schema. Both inputs must come as strings, careful with line breaks at the end | |
of the schema string, as that tends to break things. | |
Args: | |
schema (str): A ShEx schema to check the items against. | |
sparql (str): A SPARQL query from where to obtain the items. | |
Returns: | |
DataFrame: A Pandas DataFrame with the results, three columns: The item itself, | |
if it conforms to the schema and if not, why (reason). | |
""" | |
endpoint = "https://query.wikidata.org/sparql" | |
result_list = [] | |
for r in ShExEvaluator( | |
SlurpyGraphWithAgent(endpoint), | |
schema, | |
SPARQLQuery(endpoint, sparql).focus_nodes(), | |
).evaluate(): | |
conforms = True if r.result else False | |
result_list.append([r.focus, conforms, r.reason]) | |
result_df = pd.DataFrame.from_records( | |
result_list, columns=["item", "conforms", "reason"] | |
) | |
return result_df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment