Last active
December 4, 2018 23:38
-
-
Save adamnagel/e4ef28de5ebe4016a325b2469f5807f0 to your computer and use it in GitHub Desktop.
RDFLib performance testing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from timeit import default_timer as timer | |
from rdflib import Namespace, Graph, Literal, ConjunctiveGraph | |
from rdflib.namespace import RDF, RDFS, DCTERMS | |
import xml.dom.minidom | |
import json | |
import xml.etree.cElementTree as ET | |
import re | |
from rdflib.store import NO_STORE, VALID_STORE | |
def time_msg(msg, start): | |
print ('{:.4f} ms\t| {}'.format((timer() - start) * 1000, msg)) | |
start = timer() | |
mg = Graph() | |
time_msg('creating graph in mem', start) | |
scg = ConjunctiveGraph(store='Sleepycat') | |
rt = scg.open('cat', create=False) | |
if rt == NO_STORE: | |
# There is no underlying Sleepycat infrastructure, create it | |
scg.open('cat', create=True) | |
else: | |
assert rt == VALID_STORE, 'The underlying store is corrupt' | |
time_msg('creating graph in Sleepycat', start) | |
# Use the in-memory version | |
g = mg | |
# Use the Sleepycat version instead | |
# g = scg | |
def ResultDictViaXml(res): | |
xml_qres = res.serialize() | |
root = ET.fromstring(xml_qres) | |
# print(root) | |
new_res = [] | |
for result in root.findall('.//{http://www.w3.org/2005/sparql-results#}result'): | |
# print(result) | |
d = {} | |
for binding in result.findall('.//{http://www.w3.org/2005/sparql-results#}binding'): | |
# print (binding) | |
name = binding.attrib['name'] | |
tag_value = binding.find('.*') | |
# print (tag_value.text) | |
d[name] = tag_value.text | |
new_res.append(d) | |
return new_res | |
ns_temp = Namespace('https://rdf.someplace.com/test-data#') | |
for iter in range(3): | |
uri_subject = ns_temp['subject{}'.format(iter)] | |
uri_predicate = ns_temp['verb{}'.format(iter)] | |
start = timer() | |
num_triples = 10000 | |
for i in range(num_triples): | |
uri_object = Literal(i) | |
g.add((uri_subject, uri_predicate, uri_object)) | |
time_msg('add {:,d} triples individually {}'.format(num_triples, iter), start) | |
start = timer() | |
qres = g.query( | |
"""SELECT DISTINCT ?s ?p | |
WHERE { | |
?s ?p ?o | |
}""") | |
time_msg('query', start) | |
start = timer() | |
first = True | |
for row in qres: | |
if first: | |
first = False | |
time_msg('iterate first result'.format(len(qres)), start) | |
pass | |
time_msg('iterate {} results'.format(len(qres)), start) | |
num_results = len(qres) | |
qres = g.query( | |
"""SELECT DISTINCT ?s ?p | |
WHERE { | |
?s ?p ?o | |
}""") | |
start = timer() | |
for row in qres: | |
row.asdict() | |
# print (json.dumps(row.asdict(), indent=2)) | |
time_msg('iterate {} results (with .asdict())'.format(num_results), start) | |
qres = g.query( | |
"""SELECT DISTINCT ?s ?p | |
WHERE { | |
?s ?p ?o | |
}""") | |
start = timer() | |
new_res = ResultDictViaXml(qres) | |
time_msg('results via XML', start) | |
qres = g.query( | |
"""SELECT DISTINCT ?s ?p | |
WHERE { | |
?s ?p ?o | |
}""") | |
start = timer() | |
qres.serialize(format='json') | |
time_msg('results serialize JSON', start) | |
qres = g.query( | |
"""SELECT DISTINCT ?s ?p | |
WHERE { | |
?s ?p ?o | |
}""") | |
start = timer() | |
qres.serialize(format='csv') | |
time_msg('results serialize CSV', start) | |
g.serialize('test.ttl', format='ttl') | |
start = timer() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment