Skip to content

Instantly share code, notes, and snippets.

@mwatts15
Created February 26, 2015 21:47
Show Gist options
  • Save mwatts15/6dfdaf06b0e672e1ddd7 to your computer and use it in GitHub Desktop.
Save mwatts15/6dfdaf06b0e672e1ddd7 to your computer and use it in GitHub Desktop.
A thing for doing graph queries
import yarom as Y
import rdflib as R
from rdflib.tools.rdf2dot import rdf2dot
from pprint import pprint as PP
from time import time
from object_name_dict import onames as ONAMES
from object_name_dict import pnames as PNAMES
import sys
NOBJS = 10
NTRIPS = 20
SEED = None
def objects(count):
res = []
for x in range(count):
res.append(Y.DataObject(key=ONAMES[x]))
return res
def query_objects(count):
res = []
for x in range(count):
res.append(Y.DataObject(var=ONAMES[x]))
return res
def predicates(count):
res = []
for x in range(count):
res.append(PNAMES[x])
return res
def numbers(count, *maxes):
res = []
from random import randint as RND
import random
global SEED
if SEED is not None:
random.seed(SEED)
else:
random.seed(23)
for z in range(count):
t = tuple(random.randint(0, m) for m in maxes)
res.append(t)
return res
def triples(count, objects, preds):
res = []
max_object_index = len(objects) - 1
max_pred_index = len(preds) - 1
ns = numbers(count, max_object_index, max_object_index, max_pred_index)
for x,y,z in ns:
o1 = objects[x]
o2 = objects[y]
p = preds[z]
o1.set_parent(p, o2)
print(o1, p, o2)
res.append((o1.idl, p, o2.idl))
return res
def pred_avg(graph, s, p):
res = dict()
for x in graph.triples((None, p, s)):
print(x)
res[x[2]] = res.get(x[2], 0) + 1
return res
class L(object):
def __init__(self, start):
self.seen = []
self.leaves = set()
self.start = start
def __call__(self, node=False):
if not node:
node = self.start
if node in self.seen:
return
else:
self.seen.append(node)
print(node, node.p, node.o)
if ((len(node.p) == 0 or len(node.o) == 0) and (node != self.start)):
self.leaves.add(node)
for x in node.o:
self(x.value)
for x in node.p:
self(x.owner)
self.seen.pop()
return self.leaves
def hoc(l):
res = dict()
for x in l:
if len(x) > 0:
tmp = res.get(x[0], [])
tmp.append(x[1:])
res[x[0]] = tmp
for x in res:
res[x] = hoc(res[x])
return res
def pr(h, i, d = False):
if not d:
d = dict()
for x in h:
if x not in d:
if len(d.keys()) == 0:
s = 0
else:
s = max(d[z] for z in d)
d[x] = s + 1
print(" "*4*i + str(x) + " " + str(d[x]))
pr(h[x], i+1, d)
else:
print(" "*4*i + str(d[x]))
def print_graph(g):
print(g.serialize(format="n3").decode("UTF-8"))
def render_graph_dot(g):
import tempfile
from subprocess import call
from os import fork
from sys import exit
with tempfile.TemporaryFile() as f:
rdf2dot(g, f)
f.seek(0, 0) # start of stream
g,gname = tempfile.mkstemp()
call("dot -T png -o "+gname, stdin = f, shell=True)
call("feh "+gname, shell = True)
def fixName(n):
if isinstance(n, R.URIRef):
return n[53:]
else:
return n
def qpr(g, h, toset=False, i=0):
def prl(*x):
print(" "*4*i+" ".join(map(str, x)))
prl(i)
join_args = []
for x in h:
sub_answers = set()
sub = h[x]
idx = x.index(None)
if idx == 2:
other_idx = 0
else:
other_idx = 2
if isinstance(x[other_idx], R.Variable):
for z in qpr(g, sub, False, i+1):
if idx == 2:
qx = (z, x[1], None)
else:
qx = (None, x[1], z)
prl("QUERYING",tuple(fixName(y) for y in qx))
for y in g.triples(qx):
prl("QUERY GOT", fixName(y[idx]))
sub_answers.add(y[idx])
else:
prl("QUERYING",tuple(fixName(y) for y in x))
for y in g.triples(x):
prl("QUERY GOT", fixName(y[idx]))
sub_answers.add(y[idx])
join_args.append(sub_answers)
if len(join_args) > 0:
prl("Got", list(set(fixName(x) for x in y) for y in join_args))
res = join_args[0]
for x in join_args[1:]:
res = res & x
return res
else:
return set()
#for x in h:
#idx = x.index(None)
#v = set()
#if toset:
#if idx == 2:
#qx = (toset, x[1], None)
#else:
#qx = (None, x[1], toset)
#else:
#qx = x
#prl("QUERY", tuple(map(fixName, qx)))
#for y in g.triples(qx):
#k = h[x]
#prl(y)
#if len(k) > 0:
#for z in qpr(g, k, y[idx], i+1):
#v.add(z)
#else:
#v.add(y[idx])
#if join is None:
#prl("setting join", v)
#join = v
#else:
#prl(join, "&" , v)
#join = join & v
return join
def qq():
qos = query_objects(NOBJS)
ps = predicates(10)
tr = triples(12, qos, ps)
g = boh()
leaves = L(qos[0])()
for x in leaves:
x.setKey(x.idl)
print("SET", [x for x in qos if x.defined])
qu = Y.dataObject.QU()
print("QUERYING", qos[0])
qu(qos[0])
PP(list((tuple(fixName(t) for t in x[0]), tuple(fixName(t) for t in x[-1])) for x in qu.paths))
h = hoc(qu.paths)
pr(h, 0)
for y in qpr(g, h):
print(y)
#render_graph_dot(g)
def boh():
os = objects(NOBJS)
ps = predicates(10)
tripbase = triples(NTRIPS, os, ps)
print("GRAPH START", os[0])
result = Y.dataObject.SV()(os[0])
result.namespace_manager = Y.config("rdf.namespace_manager")
return result
def timed(f, *args):
t0 = time()
r = f(*args)
t1 = time()
print(f, "took", t1 - t0)
return r
if __name__ == "__main__":
Y.connect()
#A = Y.DataObject
#z = A(key="z")
#a = A(key="a")
#b = A(key="b")
#c = A(key="c")
#z.set_parent('saw_on_TV', a)
#c.set_parent('saw_on_TV', a)
#a.set_parent('voted_for', b)
#result = Y.dataObject.SV()(b)
#result.namespace_manager=Y.config("rdf.namespace_manager")
#print(result.serialize(format="n3").decode("UTF-8"))
#PP.pprint(Y.dataObject.QU()(z))
if len(sys.argv) > 1:
SEED = int(sys.argv[1])
print("NOBJS", NOBJS, "NTRIPS", NTRIPS)
timed(qq)
#print(s.getvalue())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment