Skip to content

Instantly share code, notes, and snippets.

@pebbie
Created October 9, 2019 14:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pebbie/a3c9795a20510f2765c3d707eaff4e5d to your computer and use it in GitHub Desktop.
Save pebbie/a3c9795a20510f2765c3d707eaff4e5d to your computer and use it in GitHub Desktop.
"""
file: rdf2pandas.py
auth: Peb Ruswono Aryan
desc: import data in RDF Data Cube (assumed in particular shape) from Graph to Pandas DataFrame
"""
from rdflib import Graph, Namespace, RDF, RDFS
import pandas as pd
QB = Namespace('http://purl.org/linked-data/cube#')
DCT = Namespace('http://purl.org/dc/terms/')
def short_name(uristr: str) -> str:
hpos = uristr.rindex('#') if '#' in uristr else -1
spos = uristr.rindex('/') if '/' in uristr else -1
return uristr[max(hpos,spos)+1:]
def from_graph(g : Graph) -> pd.DataFrame :
"""
import DataFrame from rdflib.Graph
expects shape :
?ds RDF.type QB.DataSet
?ds QB.structure ?dsd
?dsd QB.component [QB.dimension ?dim] || ?dsd QB.component [QB.measure ?mea]
?obs QB.dataSet ?ds
?obs ?dim ?dimval || ?obs ?mea ?meaval
"""
data = []
columns = []
dss = list(g.subjects(RDF.type, QB.DataSet))
if len(dss)>0:
ds = dss[0]
dsds = list(g.objects(ds, QB.structure))
if len(dsds)>0:
dsd = dsds[0]
dims = []
meas = []
for c in g.objects(dsd, QB.component):
dim = g.value(c, QB.dimension)
mea = g.value(c, QB.measure)
if dim is not None:
dims.append(dim)
elif mea is not None:
meas.append(mea)
comps = dims + meas
for c in comps:
# try if there's some label annotation in the graph
lbl = g.value(c, RDFS.label)
if lbl is not None:
columns.append(lbl)
continue
lbl = g.value(c, DCT.title)
if lbl is not None:
columns.append(lbl)
continue
lbl = short_name(str(c))
columns.append(lbl)
for obs in g.subjects(QB.dataSet, ds):
row = []
for c in comps:
row.append(g.value(obs, c))
data.append(row)
df = pd.DataFrame(data, columns = columns)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment