Skip to content

Instantly share code, notes, and snippets.

@mwatts15
Last active November 22, 2015 05:07
Show Gist options
  • Save mwatts15/11d029fd9c295746602e to your computer and use it in GitHub Desktop.
Save mwatts15/11d029fd9c295746602e to your computer and use it in GitHub Desktop.
Experiment with showing object differences in YAROM
from __future__ import print_function
try:
import cStringIO as io
except:
from io import StringIO as io
import sys
import rdflib
from rdflib.graph import ConjunctiveGraph
from uuid import uuid4
import random
import datetime
from hashlib import sha224
ADD_TYPE = rdflib.URIRef("urn:yarom:diff:add")
REMOVE_TYPE = rdflib.URIRef("urn:yarom:diff:remove")
QUERY_TYPE = rdflib.URIRef("urn:yarom:diff:query")
__all__ = ["calc_diff", "invert_diff", "revert_diff", "apply_diff"]
def calc_diff(label, g1, g2):
""" Only for graphs without BNodes """
res = ConjunctiveGraph(identifier=label)
rems = g1 - g2
adds = g2 - g1
ag = rdflib.URIRef("urn:yarom:diff:adds:" + str(uuid4()))
rg = rdflib.URIRef("urn:yarom:diff:removes:" + str(uuid4()))
actx = res.get_context(ag)
rctx = res.get_context(rg)
for a in adds:
actx.add(a)
for a in rems:
rctx.add(a)
res.add((ag, rdflib.RDF['type'], ADD_TYPE))
res.add((rg, rdflib.RDF['type'], REMOVE_TYPE))
return res
def pt(a, t, out=None):
out = sys.stdout
print(a, ' '.join(x.n3() for x in t), '.', file=out)
def diff_adds(data_set, label):
return get_graphs_of_type(data_set, ADD_TYPE, label)
def diff_removes(data_set, label):
return get_graphs_of_type(data_set, REMOVE_TYPE, label)
def get_graphs_of_type(data_set, rdf_type, label):
for t in data_set.triples((None, rdflib.RDF['type'], rdf_type, label)):
for s in data_set.triples((None, None, None, t[0])):
yield s
def to_rdf_patch(data_set, label, out=None):
for x in diff_adds(data_set, label):
pt('A', x, out)
for x in diff_removes(data_set, label):
pt('D', x, out)
def gen_graph(size, num_lim=None):
if num_lim is None:
num_lim = size * 2
r = range(num_lim)
return set((ns['s' + str(x)], ns['p' + str(x)], ns['o' + str(x)])
for x in random.sample(r, size))
def parent(data_set, parent, child, dest=None):
"""
Parameters
----------
data_set : rdflib.ConjunctiveGraph
The RDF graph object to store the relationship in
dest : rdflib.URIRef, optional
The name of the graph in which this relationship will be stored. If not
provided, the default graph will be used.
parent : rdflib.URIRef
The name of the parent commit
child : rdflib.URIRef
The name of the child commit
"""
def insert_query(data_set, query_string, label=None):
""" Insert a SPARQL update query. Adds a SPARQL update query to the graph
with a name based on the query content. Returns the name of the query in
the graph.
"""
hsh = sha224(query_string)
qn = rdflib.URIRef("urn:yarom:diff:queries:" + str(hsh))
data_set.add((qn, rdflib.RDF['type'], QUERY_TYPE, label))
data_set.add((qn, rdflib.URIRef("urn:yarom:diff:query:value"), query_string, label))
return qn
def make_commit(data_set, dgs_and_queries, author, committer, date_time, message):
""" Make a commit with the given diff graphs and SPARQL update queries.
Returns the name of the commit
Parameters
----------
data_set : rdflib.ConjunctiveGraph
The RDF graph object to store the commit in
dgs_and_queries : set of rdflib.URIRef
Labels for the diff graphs and SPARQL queries that make up the commit
author : rdflib.URIRef
The author of the commit
commiter : rdflib.URIRef
The commiter
date_time : datetime.datetime
The time to record for the commit
message : str
A message to attach to the commit
"""
def apply_diff(data_set, dg, label):
""" Apply the diff identified by the label in the diff_graph.
Parameters
----------
data_set : rdflib.ConjunctiveGraph
The data set to apply the diff to
dg : rdflib.ConjunctiveGraph
The data set containing the diff graph
label : rdflib.URIRef
The label for the diff in dg
"""
adds = diff_adds(dg, label)
rems = diff_removes(dg, label)
for t in adds:
data_set.add(t)
for t in rems:
data_set.remove(t)
def revert_diff(data_set, dg, label):
l = rdflib.URIRef("urn:yarom:diff:man:" + str(uuid4()))
inversion = invert_diff(dg, label, l)
apply_diff(data_set, inversion, l)
def invert_diff(dg, src, dest):
""" Swap the adds with the removes in the named diff graph.
Parameters
----------
dg : rdflib.ConjunctiveGraph
The graph containing the diff
src : rdflib.URIRef
The name for the diff
dest : rdflib.URIRef
The name for the new diff
"""
inversion = ConjunctiveGraph(identifier=dest)
ag = rdflib.URIRef("urn:yarom:diff:adds:" + str(uuid4()))
rg = rdflib.URIRef("urn:yarom:diff:removes:" + str(uuid4()))
actx = inversion.get_context(ag)
rctx = inversion.get_context(rg)
for t in diff_adds(dg, src):
rctx.add(t)
for t in diff_removes(dg, src):
actx.add(t)
inversion.add((ag, rdflib.RDF['type'], ADD_TYPE))
inversion.add((rg, rdflib.RDF['type'], REMOVE_TYPE))
return inversion
def print_rdf_patch(d, dg):
to_rdf_patch(d, dg, sys.stdout)
def print_graph(data_set):
print(data_set.serialize(format='nquads'))
if __name__ == '__main__':
ns = rdflib.Namespace("http://example.com#")
g1 = gen_graph(25)
g2 = gen_graph(25)
dg = rdflib.URIRef("urn:yarom:diff:man:" + str(uuid4()))
d = calc_diff(dg, g1, g2)
print_rdf_patch(d, dg)
eg = rdflib.URIRef("urn:yarom:diff:man:" + str(uuid4()))
inversion = invert_diff(d, dg, eg)
print("INVERTED")
print_rdf_patch(inversion, eg)
from __future__ import print_function
import sys
import uuid
import rdflib
import yarom
from yarom import connect, disconnect
from yarom.dataObject import DataObject, TypeDataObject
from yarom.configure import Configureable, Configuration
from yarom.mapper import Mapper
from diff import (calc_diff, apply_diff, revert_diff, print_rdf_patch)
class PropertyValueChange(tuple):
_map = dict(prop=0, added=1, removed=2)
def __getattr__(self, n):
return self[PropertyValueChange._map[n]]
def __new__(cls, prop, added, removed):
return super(PropertyValueChange, cls).__new__(cls, (prop, added, removed))
def __repr__(self):
return "PropertyValueChange({},{},{})".format(repr(self.prop), repr(self.added), repr(self.removed))
class PropertyDifference(tuple):
_map = dict(added=0, removed=1)
def __new__(cls, added, removed):
return super(PropertyDifference, cls).__new__(cls, (added, removed))
def __getattr__(self, n):
return self[PropertyDifference._map[n]]
def __repr__(self):
return "PropertyDifference({},{})".format(repr(self.added), repr(self.removed))
def object_diff(o1, o2, value_extractor):
changes = list()
p1 = set(o1.properties)
p2 = set(o2.properties)
links_1 = {x.link:x for x in p1}
links_2 = {x.link:x for x in p2}
k1 = set(links_1.keys())
k2 = set(links_2.keys())
if k1 != k2:
rems = {links_1[z] for z in k1 - k2}
adds = {links_2[z] for z in k2 - k1}
changes.append(PropertyDifference(adds, rems))
for p in p1:
print('property', p)
if hasattr(o2, p.linkName):
v1 = set(value_extractor(o1, p))
v2 = set(value_extractor(o2, getattr(o2, p.linkName)))
print('values', v1, v2)
if v1 != v2:
changes.append(PropertyValueChange(p, v2 - v1, v1 - v2))
return changes
def graph_diff_to_object_diff(cls, ident, dg, label=None):
""" Get the object diffs on one object from the graph diff.
The base graph is the configured RDF graph for the object
Parameters
----------
ident : yarom.DataObject
The object which is to get its object diff from the graph diff
dg : rdflib.ConjunctiveGraph
The diff graph containing the add and remove graphs
label : rdflib.URIRef
The label for the named graph containing the add/remove graphs
Returns
-------
a list of changes
"""
g = cls.conf['rdf.graph']
print(g.serialize(format='n3').decode('UTF-8'), file=sys.stdout)
a = cls(ident=ident)
a.resolve()
apply_diff(g, dg, label)
print_rdf_patch(dg, label)
print(g.serialize(format='n3').decode('UTF-8'), file=sys.stdout)
b = cls(ident=ident)
b.resolve()
# get the adds for this object
# get the removes for this object
# calculate renames/changes from adds/rems
# produce PropertyDifference and PropertyValueChange
revert_diff(g, dg, label)
return object_diff(a, b, lambda o, p: p.values)
def basic():
connect('query-guy.conf')
o1 = DataObject(key="o1")
o2 = DataObject(key="o2")
o3 = DataObject(key="o3")
o1.relate('has_brother', o2)
o2.relate('has_brother', o3)
o1.relate('size', 22)
print_object_diff(o1, o2)
od = object_diff(o1, o2)
print_object_diff(od)
disconnect()
def print_object_diff(od):
for x in od:
if isinstance(x, PropertyDifference):
if len(x.added) > 0:
print("{} addded properties [{}] ".format(o1, ", ".join(str(z) for z in x.added)))
if len(x.removed) > 0:
print("{} removed properties [{}] ".format(o1, ", ".join(str(z) for z in x.removed)))
elif isinstance(x, PropertyValueChange):
if len(x.added) > 0:
print( "{} added values [{}] for property '{}'".format(x.prop.owner, ", ".join(str(z) for z in x.added), x.prop.linkName))
if len(x.removed) > 0:
print( "{} removed values [{}] from property '{}'".format(x.prop.owner, ", ".join(str(z) for z in x.removed), x.prop.linkName))
def through_graph():
o_ident = rdflib.URIRef("http://example.com/diff/{}".format(uuid.uuid4()))
connect('query-guy.conf')
class A(yarom.DataObject):
_ = ['has_brother', 'size']
Mapper.get_instance().remap()
o1 = A(ident=o_ident)
o3 = A(key="o3")
o1.relate('has_brother', o3)
o1.relate('size', 22)
o1.save()
g1 = o1.rdf
disconnect()
connect('query-guy.conf')
class A(yarom.DataObject):
_ = ['has_brother']
Mapper.get_instance().remap()
o2 = A(ident=o_ident)
o4 = A(key="o4")
o2.relate('has_brother', o4)
o2.save()
g2 = o2.rdf
disconnect()
e = rdflib.URIRef("http://example.com/diff/{}".format(uuid.uuid4()))
dg = calc_diff(e, g1, g2)
connect('query-guy.conf', data=g1)
class A(yarom.DataObject):
_ = ['has_brother', 'size']
Mapper.get_instance().remap()
print_object_diff(graph_diff_to_object_diff(A, o_ident, dg, e))
disconnect()
#print(g1.serialize(format='turtle').decode('UTF-8'))
#print(g2.serialize(format='turtle').decode('UTF-8'))
if __name__ == '__main__':
through_graph()
{
"rdf.source" : "default",
"rdf.store" : "default",
"user.email" : "jerry@cn.com",
"rdf.upload_block_statement_count" : 50
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment