Skip to content

Instantly share code, notes, and snippets.

@pebbie
Created February 25, 2015 13:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pebbie/1ceca1168785722c264f to your computer and use it in GitHub Desktop.
Save pebbie/1ceca1168785722c264f to your computer and use it in GitHub Desktop.
Wrapper of Linked Data Fragment as a Graph Store in RDFLib
from rdflib.store import Store
from rdflib import Graph, RDF, URIRef, Namespace
import urllib.parse as urlparse
def add_params(origUrl, paramsDict):
url_parts = list(urlparse.urlparse(origUrl))
query = dict(urlparse.parse_qsl(url_parts[4]))
query.update(dict([i for i in iter(paramsDict.items()) if i[1] is not None]))
url_parts[4] = urlparse.urlencode(query)
return urlparse.urlunparse(url_parts)
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')
VOID = Namespace('http://rdfs.org/ns/void#')
class LDFStore(Store):
"""
Linked Data Fragment Store
"""
def __init__(self,endpoint,configuration=None):
super(LDFStore, self).__init__(configuration)
self.endpoint=endpoint
self.__namespace = {}
self.__prefix = {}
self.__len = None
def add(self, triple_pattern, context, quoted=False):
pass
#throw NotImplementedError('Linked Data Fragment is immutable')
def remove(self, triple_pattern, context=None):
pass
#throw NotImplementedError('Linked Data Fragment is immutable')
def triples(self, triple_pattern, context=None):
subj, pred, obj = triple_pattern
target_url = add_params(self.endpoint, {'subject':subj.toPython() if subj is not None else '', 'predicate':pred.toPython() if pred is not None else '', 'object':obj.toPython() if obj is not None else ''})
skip_subjects = []
origUrl = target_url
tlen = 0
olen = 0
g_uri = URIRef(target_url)
g = Graph()
g.parse(target_url)
olen = g.value(g_uri, HYDRA.totalItems).toPython()
print(triple_pattern, olen)
while True:
ds = [s for s in g.subjects(RDF.type, HYDRA.Collection)]
skip_subjects += [s for s in ds if s not in skip_subjects]
for s,p,o in g:
if s in skip_subjects or origUrl in s:
pass
print (s,'\n\t',p,'\n\t\t',o)
else:
tlen += 1
yield (s,p,o), self.__contexts()
if tlen >= olen or tlen > 100:
break
else:
print(tlen, olen)
g_uri = g.value(g_uri, HYDRA.nextPage)
target_url = g_uri.toPython()
g = Graph()
g.parse(target_url)
def __len__(self, context=None):
if self.__len is None:
target_url = add_params(self.endpoint, {'subject':'', 'predicate':'', 'object':''})
skip_subjects = []
origUrl = target_url
tlen = 0
g_uri = URIRef(target_url)
g = Graph()
g.parse(target_url)
#print(list(g.predicate_objects(g_uri)))
try:
self.__len = g.value(g_uri, HYDRA.totalItems).toPython()
except:
pass
return self.__len
def bind(self, prefix, namespace):
self.__prefix[namespace] = prefix
self.__namespace[prefix] = namespace
def namespace(self, prefix):
return self.__namespace.get(prefix, None)
def prefix(self, namespace):
return self.__prefix.get(namespace, None)
def namespaces(self):
for prefix, namespace in self.__namespace.iteritems():
yield prefix, namespace
def __contexts(self):
return (c for c in [])
@pebbie
Copy link
Author

pebbie commented Feb 25, 2015

#Sample code how to use the Store with RDFLib
from ldfstore import LDFStore
from rdflib import Graph,Namespace, RDF

if __name__=="__main__":
    VOID = Namespace('http://rdfs.org/ns/void#')

    g = Graph(store=LDFStore('http://ldf.lodlaundromat.org'))
    print(len(g))
    print(list(g.subjects(RDF.type, VOID.Dataset)))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment