Skip to content

Instantly share code, notes, and snippets.

@adamnagel
Last active December 4, 2018 23:38
Show Gist options
  • Save adamnagel/e4ef28de5ebe4016a325b2469f5807f0 to your computer and use it in GitHub Desktop.
Save adamnagel/e4ef28de5ebe4016a325b2469f5807f0 to your computer and use it in GitHub Desktop.
RDFLib performance testing
from timeit import default_timer as timer
from rdflib import Namespace, Graph, Literal, ConjunctiveGraph
from rdflib.namespace import RDF, RDFS, DCTERMS
import xml.dom.minidom
import json
import xml.etree.cElementTree as ET
import re
from rdflib.store import NO_STORE, VALID_STORE
def time_msg(msg, start):
print ('{:.4f} ms\t| {}'.format((timer() - start) * 1000, msg))
start = timer()
mg = Graph()
time_msg('creating graph in mem', start)
scg = ConjunctiveGraph(store='Sleepycat')
rt = scg.open('cat', create=False)
if rt == NO_STORE:
# There is no underlying Sleepycat infrastructure, create it
scg.open('cat', create=True)
else:
assert rt == VALID_STORE, 'The underlying store is corrupt'
time_msg('creating graph in Sleepycat', start)
# Use the in-memory version
g = mg
# Use the Sleepycat version instead
# g = scg
def ResultDictViaXml(res):
xml_qres = res.serialize()
root = ET.fromstring(xml_qres)
# print(root)
new_res = []
for result in root.findall('.//{http://www.w3.org/2005/sparql-results#}result'):
# print(result)
d = {}
for binding in result.findall('.//{http://www.w3.org/2005/sparql-results#}binding'):
# print (binding)
name = binding.attrib['name']
tag_value = binding.find('.*')
# print (tag_value.text)
d[name] = tag_value.text
new_res.append(d)
return new_res
ns_temp = Namespace('https://rdf.someplace.com/test-data#')
for iter in range(3):
uri_subject = ns_temp['subject{}'.format(iter)]
uri_predicate = ns_temp['verb{}'.format(iter)]
start = timer()
num_triples = 10000
for i in range(num_triples):
uri_object = Literal(i)
g.add((uri_subject, uri_predicate, uri_object))
time_msg('add {:,d} triples individually {}'.format(num_triples, iter), start)
start = timer()
qres = g.query(
"""SELECT DISTINCT ?s ?p
WHERE {
?s ?p ?o
}""")
time_msg('query', start)
start = timer()
first = True
for row in qres:
if first:
first = False
time_msg('iterate first result'.format(len(qres)), start)
pass
time_msg('iterate {} results'.format(len(qres)), start)
num_results = len(qres)
qres = g.query(
"""SELECT DISTINCT ?s ?p
WHERE {
?s ?p ?o
}""")
start = timer()
for row in qres:
row.asdict()
# print (json.dumps(row.asdict(), indent=2))
time_msg('iterate {} results (with .asdict())'.format(num_results), start)
qres = g.query(
"""SELECT DISTINCT ?s ?p
WHERE {
?s ?p ?o
}""")
start = timer()
new_res = ResultDictViaXml(qres)
time_msg('results via XML', start)
qres = g.query(
"""SELECT DISTINCT ?s ?p
WHERE {
?s ?p ?o
}""")
start = timer()
qres.serialize(format='json')
time_msg('results serialize JSON', start)
qres = g.query(
"""SELECT DISTINCT ?s ?p
WHERE {
?s ?p ?o
}""")
start = timer()
qres.serialize(format='csv')
time_msg('results serialize CSV', start)
g.serialize('test.ttl', format='ttl')
start = timer()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment