Skip to content

Instantly share code, notes, and snippets.

@appachan
Last active May 8, 2019 10:46
Show Gist options
  • Save appachan/f1f9ab1a5661d996241dd2b2e33298cb to your computer and use it in GitHub Desktop.
Save appachan/f1f9ab1a5661d996241dd2b2e33298cb to your computer and use it in GitHub Desktop.
from predpatt import PredPatt, PredPattOpts
from predpatt.util.ud import dep_v2, dep_v1
from predpatt.UDParse import UDParse, DepTriple
import spacy
import stanfordnlp
import argparse
STANFORDNLP = None
GINZA = None
def main(text, without_stanfordnlp, without_ginza):
options = PredPattOpts(ud=dep_v2.VERSION) # set UD version to v2.0
#options = PredPattOpts(ud=dep_v1.VERSION) # set UD version to v1.0
sentence = "太郎は本と熟れたりんごを購入した."
sentence = "太郎は本と熟れたリンゴを購入した."
#sentence = "リュック・ベッソンはミラ・ジョヴォヴィッチと結婚した."
sentence = "太郎は本を買った."
sentence = text
# Stanfordnlp
if not without_stanfordnlp:
doc = STANFORDNLP(sentence)
for sent in doc.sentences:
deps = sent.words
tokens = [e.text for e in deps]
tags = [e.upos for e in deps]
# PARSER indexing starts at one, but we want
# indexing to start at zero. Hence the -1 below.
triples = list(map(lambda e:DepTriple(rel=e.dependency_relation, gov=e.governor-1, dep=int(e.index)-1), deps))
parse = UDParse(tokens=tokens, tags=tags, triples=triples)
pp = PredPatt(parse, opts=options)
print(pp.pprint())
print(tokens)
print(tags)
print(triples)
if len(pp.instances) <= 0:
continue
print(pp.instances[0].arguments)
print(pp.instances[0].subj())
print(pp.instances[0].obj())
# GiNZA
if not without_ginza:
doc = GINZA(sentence)
for sent in doc.sents:
tokens = [token.orth_ for token in sent]
tags = [token.pos_ for token in sent]
#triples = list(map(lambda token: DepTriple(rel=token.dep_, gov=token.head.i, dep=token.i), sent))
triples = []
for token in sent:
gov = token.head.i - sent.start
dep = token.i - sent.start
if token.dep_ in ["root", "ROOT"]:
gov = -1
triple = DepTriple(rel=token.dep_, gov=gov, dep=dep)
triples.append(triple)
parse = UDParse(tokens=tokens, tags=tags, triples=triples)
pp = PredPatt(parse, opts=options)
print(pp.pprint())
print(tokens)
print(tags)
print(triples)
if len(pp.instances) <= 0:
continue
print(pp.instances[0].arguments)
print(pp.instances[0].subj())
print(pp.instances[0].obj())
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument('--without-stanfordnlp', action='store_true')
argparser.add_argument('--without-ginza', action='store_true')
args = argparser.parse_args()
STANFORDNLP = stanfordnlp.Pipeline(lang='ja') if not args.without_stanfordnlp else None
GINZA = spacy.load('ja_ginza_nopn') if not args.without_ginza else None
while True:
print("\n>> input text:")
text = input().strip()
main(text, args.without_stanfordnlp, args.without_ginza)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment