Skip to content

Instantly share code, notes, and snippets.

@gdbassett
Last active February 11, 2022 19:58
Show Gist options
  • Save gdbassett/a9e0e0c25c1a4951e143806dd1d13909 to your computer and use it in GitHub Desktop.
Save gdbassett/a9e0e0c25c1a4951e143806dd1d13909 to your computer and use it in GitHub Desktop.
Convert json to json-ld. I couldn't find any examples in python so produced one. This is very basic as I'm still learning json-ld. It does not use bnodes.
from urllib.parse import quote, unquote
import logging
from uuid import uuid4
from collections import defaultdict
def flatten(l):
for el in l:
if type(el) in [list, tuple]:
yield from flatten(el)
else:
yield el
def json2linked(j, name, ns, unique_id=unique_id, properties=False, objects=False, simple_ids=True):
# recurse the data to build json-ld objects
data = json2linked_r(j, name, properties=properties, objects=objects)
# add the data to a very basic context
linked = {
"@context":
{
"@base": ns,
"@vocab": ns,
"owl": "http://www.w3.org/2002/07/owl#",
"xsd": "http://www.w3.org/2001/XMLSchema#",
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"rdfs": "http://www.w3.org/2000/01/rdf-schema#"
},
"@graph": data}
# We use UUIDs to identify unique objects in the initial pass
# Next we replace those with sequenced numbers
if simple_ids:
# First we collect a dictionary of all the objects (location is list will be their number)
obj_map = defaultdict(list)
for obj in linked['@graph']:
if "owl:NamedIndividual" in obj["@type"]:
obj_map[obj["@id"].split("_obj")[0]].append(obj["@id"])
#print(obj_map)
# Now we replaced the UUIDs with the location in the map list for more legible id numbers
for j in range(len(linked['@graph'])):
obj = linked['@graph'][j]
short_id = obj['@id'].split("_obj")[0]
if obj['@id'] in obj_map.get(short_id, []):
obj['@id'] = short_id if len(obj_map[short_id]) <= 1 else short_id + "_" + str(obj_map[short_id].index(obj['@id'])+1)
for k in obj.keys():
if type(obj[k]) == dict and "@id" in obj[k]:
short_id = obj[k]['@id'].split("_obj")[0]
if obj[k]['@id'] in obj_map.get(short_id, []):
obj[k]['@id'] = short_id if len(obj_map[short_id]) <= 1 else short_id + "_" + str(obj_map[short_id].index(obj[k]['@id'])+1)
elif type(obj[k]) == list:
for i in range(len(obj[k])):
if type(obj[k][i]) == dict and "@id" in obj[k][i]:
short_id = obj[k][i]['@id'].split("_obj")[0]
if obj[k][i]['@id'] in obj_map.get(short_id, []):
obj[k][i]['@id'] = short_id if len(obj_map[short_id]) <= 1 else short_id + "_" + str(obj_map[short_id].index(obj[k][i]['@id']) + 1)
linked['@graph'][j] = obj
return(linked)
def json2linked_r(d, name, unique_id="", properties=False, objects=False):
# if 'record', create an object to represent the record pointing at the root oject(s)
type_map = {
int: "xsd:integer",
float: "xsd:float",
str: "xsd:string",
bool: "xsd:boolean"
}
ret = list()
obj_names = set()
#print("d: {0}, lbl: {1}, name: {2}, parent: {3}, child: {4}".format(d, lbl, name, parent, child))
try:
if type(d) == dict:
# Create object for dict
if objects:
obj = {
"@id": quote(f"{name}_obj{unique_id}"),
"@type": ["owl:NamedIndividual", quote(name)]
}
else:
obj = {
"@id": quote(f"{name}_obj{unique_id}"),
"@type": ["owl:NamedIndividual", "owl:Thing"]
}
for k, v in d.items():
if properties:
obj_prop = {
"@id": quote(f"{name}.{k}"),
"@type": [],
"rdfs:range": []
}
if type(v) in [bool, int, float, str]:
if properties:
obj_prop['@type'].append("owl:DatatypeProperty")
obj_prop['rdfs:range'].append("rdfs:Literal")
obj[quote(f"{name}.{k}")] = [{"@value": v, "@type": type_map[type(v)]}]
elif type(v) in [dict]:
if properties:
obj_prop['@type'].append("owl:ObjectProperty")
obj_prop['rdfs:range'].append("owl:Thing")
u = "_" + str(uuid4())[:8]
obj[quote(f"{name}.{k}")] = [{"@id": quote(f"{name}.{k}_obj{u}")}]
ret += json2linked_r(v, f"{name}.{k}", u, properties=properties, objects=objects)
elif type(v) in [list, tuple] and len(v) > 0:
v = flatten(v)
obj[quote(f"{name}.{k}")] = list()
for item in v:
if type(item) in [bool, int, float, str]:
if properties:
obj_prop['@type'].append("owl:DatatypeProperty")
obj_prop['rdfs:range'].append("rdfs:Literal")
obj[quote(f"{name}.{k}")].append({"@value": item, "@type": type_map[type(item)]})
elif type(item) in [dict, list, tuple]:
if properties:
obj_prop['@type'].append("owl:ObjectProperty")
obj_prop['rdfs:range'].append("owl:Thing")
u = "_" + str(uuid4())[:8]
obj[quote(f"{name}.{k}")].append({"@id": quote(f"{name}.{k}_obj{u}")})
ret += json2linked_r(item, f"{name}.{k}", u, properties=properties, objects=objects)
else:
pass # skipping 'none' objects
if properties:
obj_prop['rdfs:range'] = list(set(obj_prop['rdfs:range'])) # make unique
obj_prop['@type'] = list(set(obj_prop['@type'])) # make unique
if len(obj_prop['@type']) > 1:
logging.warning("object property {0}, ({1}) has @type longer than 1. That may cause problems.".format(obj_prop["@id"], obj_prop['@type']))
obj_prop['rdfs:range'] = [{"@id": rng} for rng in obj_prop['rdfs:range']] # add dict here because earlier breaks set function 3 lines above
ret.append(obj_prop)
# Add in objects (or add type to property objects)
if objects:
try:
i = [j['@id'] for j in ret].index(quote(f"{name}.{k}"))
obj_obj = ret[i]
obj_obj['@type'].append("owl:Thing")
obj_obj['@type'] = list(set(obj_obj['@type']))
ret[i] = obj_obj
except ValueError:
#print(ret, "\n")
#raise
ret.append({
'@id': quote(f"{name}.{k}"),
'@type': ["owl:Thing"]
})
ret.append(obj)
elif type(d) in [bool, int, float, str]:
logging.warning("Shouldn't get here.")
elif type(d) in [list, tuple] and len(d) > 0:
d = flatten(d)
for item in d:
if type(item) is dict and len(item) > 0:
ret += json2linked_r(item, name, "", properties=properties, objects=objects)
elif type(item) in [list, tuple]:
logging.warning("Flatten should prevent you getting a list of lists. name: {0}, d: {1}".format(name, d))
elif type(item) in [bool, int, float, str]:
logging.warning("I don't think we should get a list of values. name: {0}, d: {1}".format(name, d))
else:
pass # skip 'none' type
else:
pass # skip 'none'
#logging.warning("json schema type 'null' not currently supported. d: {0}, name: {1}, parent: {2}".format(d, name, parent))
except:
print("d: {0}, name: {1}".format(d, name))
raise
return(ret)
# Build JSONLD Graph
graph = {"@context": {}, "@graph": []}
# we query the records separately so that the property paths are the same for each record
for name, record in records_dict.items():
u = "_" + str(uuid4())[:8]
j = json.loads(record)
j['record'] = name
# set set a unique_id so root records are unique.
# we set objects = True so we can identify the root by it's object type
# We set simple_ids = False to prevent collisions in IDs between the separately queried records
# We _could_ set properties = True so that we could query paths of subProperties of the top properties
jld = json2linked(j, "root", "https://example.org/ns#", unique_id=u, objects=True, simple_ids=False)
graph['@context'] = jld['@context']
graph['@graph'] += jld['@graph']
del(j)
del(jld)
# because we joined records with the same structure, objects and properties will be duplicated.
# here we delete duplicates
objs = set()
graph2 = list()
for obj in victor['@graph']:
if obj['@id'] not in objs:
objs.add(obj['@id'])
graph2.append(obj)
graph['@graph'] = graph2
del(graph2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment