Skip to content

Instantly share code, notes, and snippets.

Created October 9, 2019 14:59
Show Gist options
  • Save pebbie/a3c9795a20510f2765c3d707eaff4e5d to your computer and use it in GitHub Desktop.
Save pebbie/a3c9795a20510f2765c3d707eaff4e5d to your computer and use it in GitHub Desktop.
auth: Peb Ruswono Aryan
desc: import data in RDF Data Cube (assumed in particular shape) from Graph to Pandas DataFrame
from rdflib import Graph, Namespace, RDF, RDFS
import pandas as pd
QB = Namespace('')
DCT = Namespace('')
def short_name(uristr: str) -> str:
hpos = uristr.rindex('#') if '#' in uristr else -1
spos = uristr.rindex('/') if '/' in uristr else -1
return uristr[max(hpos,spos)+1:]
def from_graph(g : Graph) -> pd.DataFrame :
import DataFrame from rdflib.Graph
expects shape :
?ds RDF.type QB.DataSet
?ds QB.structure ?dsd
?dsd QB.component [QB.dimension ?dim] || ?dsd QB.component [QB.measure ?mea]
?obs QB.dataSet ?ds
?obs ?dim ?dimval || ?obs ?mea ?meaval
data = []
columns = []
dss = list(g.subjects(RDF.type, QB.DataSet))
if len(dss)>0:
ds = dss[0]
dsds = list(g.objects(ds, QB.structure))
if len(dsds)>0:
dsd = dsds[0]
dims = []
meas = []
for c in g.objects(dsd, QB.component):
dim = g.value(c, QB.dimension)
mea = g.value(c, QB.measure)
if dim is not None:
elif mea is not None:
comps = dims + meas
for c in comps:
# try if there's some label annotation in the graph
lbl = g.value(c, RDFS.label)
if lbl is not None:
lbl = g.value(c, DCT.title)
if lbl is not None:
lbl = short_name(str(c))
for obs in g.subjects(QB.dataSet, ds):
row = []
for c in comps:
row.append(g.value(obs, c))
df = pd.DataFrame(data, columns = columns)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment