Skip to content

Instantly share code, notes, and snippets.

@jvfe
Last active August 6, 2020 21:32
Show Gist options
  • Save jvfe/fad021e45e8f93e2670e1d582b76a23c to your computer and use it in GitHub Desktop.
Save jvfe/fad021e45e8f93e2670e1d582b76a23c to your computer and use it in GitHub Desktop.
A few utilities to easily get data from Wikidata into a dataframe
from collections import defaultdict
import pandas as pd
import requests
def perform_query(query):
"""Performs a SPARQL query to the wikidata endpoint
Args:
query: A string containing a functional sparql query
Returns:
A json with the response content.
"""
endpoint_url = "https://query.wikidata.org/sparql"
try:
response = requests.get(
endpoint_url,
params={"query": query},
headers={"Accept": "application/sparql-results+json"},
)
response.raise_for_status()
except requests.exceptions.HTTPError as err:
print(err)
else:
raw_results = response.json()
return raw_results
def parse_query_results(query_result, field_list):
"""Parse wikidata query results into a nice dataframe
Args:
query_result: A json dict with the results from the query
field_list: A list of the fields from the response you want in your final dataframe.
e.g. if your query selects "?item" and "?itemLabel" you may want ["item", "itemLabel"]
as your list.
Returns:
A pandas dataframe with a column for each component from field_list.
"""
parsed_results = defaultdict(list)
for q_r in query_result["results"]["bindings"]:
for item in field_list:
parsed_results[item].append(q_r[item]["value"])
results_df = pd.DataFrame.from_dict(parsed_results).replace(
{"http://www.wikidata.org/entity/": ""}, regex=True
)
return results_df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment