Skip to content

Instantly share code, notes, and snippets.

@arademaker
Created November 28, 2018 02:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arademaker/dcedb56952f5aa014c6729211cdb2540 to your computer and use it in GitHub Desktop.
Save arademaker/dcedb56952f5aa014c6729211cdb2540 to your computer and use it in GitHub Desktop.
Wilbur vs rdflib
import sys
import jsonlines
import rdflib
from rdflib.namespace import RDFS, SKOS, RDF
from rdflib import Namespace
import json
ont = sys.argv[1]
g=rdflib.Graph()
g.parse(ont, format="nt")
DU = Namespace("http://br.ibm.com/document-understanding/")
def get_relations(sent):
return [ dict(origin = g.value(rel, DU.origin),
subject = g.value(rel, DU.subject),
predicate = g.value(rel, DU.predicate),
object = g.value(rel, DU.object))
for rel in g.subjects(sent,DU.hasRelation)]
def get_tokens(sent):
tks = g.subjects(DU.sentence, sent)
objs = []
for tk in tks:
type = g.value(tk, RDF.type)
lbl = g.value(tk, RDFS.label)
if type == DU.Token:
wn = g.value(tk, DU.wn30_sense)
geo = g.value(tk, DU.geoname_sense)
if wn != None or geo != None:
obj = dict(txt = lbl,
type = type,
wn = wn,
geo = geo)
objs = objs + [obj]
else:
obj = dict(txt = lbl, type = type)
objs = objs + [obj]
return objs
def sentence(sent):
return dict(text = g.value(sent, RDFS.label),
status = g.value(sent, DU.complete),
relations = get_relations(sent),
tokens = get_tokens(sent))
with jsonlines.open(sys.argv[2], mode='w') as writer:
for a,b,c in g.triples( (None, RDF.type, DU.Sentence) ):
writer.write(sentence(a))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment