appachan/predpatt-ginza.py

## predpatt-ginza.py
from predpatt import PredPatt, PredPattOpts
from predpatt.util.ud import dep_v2, dep_v1
from predpatt.UDParse import UDParse, DepTriple

import spacy
import stanfordnlp

import argparse

STANFORDNLP = None
GINZA = None

def main(text, without_stanfordnlp, without_ginza):
    options = PredPattOpts(ud=dep_v2.VERSION) # set UD version to v2.0
    #options = PredPattOpts(ud=dep_v1.VERSION) # set UD version to v1.0
    sentence = "太郎は本と熟れたりんごを購入した．"
    sentence = "太郎は本と熟れたリンゴを購入した．"
    #sentence = "リュック・ベッソンはミラ・ジョヴォヴィッチと結婚した．"
    sentence = "太郎は本を買った．"
    sentence = text

    # Stanfordnlp
    if not without_stanfordnlp:
        doc = STANFORDNLP(sentence)
        for sent in doc.sentences:
            deps = sent.words
            tokens = [e.text for e in deps]
            tags = [e.upos for e in deps]
            # PARSER indexing starts at one, but we want
            # indexing to start at zero. Hence the -1 below.
            triples = list(map(lambda e:DepTriple(rel=e.dependency_relation, gov=e.governor-1, dep=int(e.index)-1), deps))
            parse = UDParse(tokens=tokens, tags=tags, triples=triples)
            pp = PredPatt(parse, opts=options)
            print(pp.pprint())
            print(tokens)
            print(tags)
            print(triples)
            if len(pp.instances) <= 0:
                continue
            print(pp.instances[0].arguments)
            print(pp.instances[0].subj())
            print(pp.instances[0].obj())

    # GiNZA
    if not without_ginza:
        doc = GINZA(sentence)
        for sent in doc.sents:
            tokens = [token.orth_ for token in sent]
            tags = [token.pos_ for token in sent]
            #triples = list(map(lambda token: DepTriple(rel=token.dep_, gov=token.head.i, dep=token.i), sent))
            triples = []
            for token in sent:
                gov = token.head.i - sent.start
                dep = token.i - sent.start
                if token.dep_ in ["root", "ROOT"]:
                    gov = -1
                triple = DepTriple(rel=token.dep_, gov=gov, dep=dep)
                triples.append(triple)

            parse = UDParse(tokens=tokens, tags=tags, triples=triples)
            pp = PredPatt(parse, opts=options)
            print(pp.pprint())
            print(tokens)
            print(tags)
            print(triples)
            if len(pp.instances) <= 0:
                continue
            print(pp.instances[0].arguments)
            print(pp.instances[0].subj())
            print(pp.instances[0].obj())

if __name__ == "__main__":
    argparser = argparse.ArgumentParser()
    argparser.add_argument('--without-stanfordnlp', action='store_true')
    argparser.add_argument('--without-ginza', action='store_true')
    args = argparser.parse_args()

    STANFORDNLP = stanfordnlp.Pipeline(lang='ja') if not args.without_stanfordnlp else None
    GINZA = spacy.load('ja_ginza_nopn') if not args.without_ginza else None

    while True:
        print("\n>> input text:")
        text = input().strip()
        main(text, args.without_stanfordnlp, args.without_ginza)
	from predpatt import PredPatt, PredPattOpts
	from predpatt.util.ud import dep_v2, dep_v1
	from predpatt.UDParse import UDParse, DepTriple

	import spacy
	import stanfordnlp

	import argparse

	STANFORDNLP = None
	GINZA = None

	def main(text, without_stanfordnlp, without_ginza):
	options = PredPattOpts(ud=dep_v2.VERSION) # set UD version to v2.0
	#options = PredPattOpts(ud=dep_v1.VERSION) # set UD version to v1.0
	sentence = "太郎は本と熟れたりんごを購入した．"
	sentence = "太郎は本と熟れたリンゴを購入した．"
	#sentence = "リュック・ベッソンはミラ・ジョヴォヴィッチと結婚した．"
	sentence = "太郎は本を買った．"
	sentence = text

	# Stanfordnlp
	if not without_stanfordnlp:
	doc = STANFORDNLP(sentence)
	for sent in doc.sentences:
	deps = sent.words
	tokens = [e.text for e in deps]
	tags = [e.upos for e in deps]
	# PARSER indexing starts at one, but we want
	# indexing to start at zero. Hence the -1 below.
	triples = list(map(lambda e:DepTriple(rel=e.dependency_relation, gov=e.governor-1, dep=int(e.index)-1), deps))
	parse = UDParse(tokens=tokens, tags=tags, triples=triples)
	pp = PredPatt(parse, opts=options)
	print(pp.pprint())
	print(tokens)
	print(tags)
	print(triples)
	if len(pp.instances) <= 0:
	continue
	print(pp.instances[0].arguments)
	print(pp.instances[0].subj())
	print(pp.instances[0].obj())

	# GiNZA
	if not without_ginza:
	doc = GINZA(sentence)
	for sent in doc.sents:
	tokens = [token.orth_ for token in sent]
	tags = [token.pos_ for token in sent]
	#triples = list(map(lambda token: DepTriple(rel=token.dep_, gov=token.head.i, dep=token.i), sent))
	triples = []
	for token in sent:
	gov = token.head.i - sent.start
	dep = token.i - sent.start
	if token.dep_ in ["root", "ROOT"]:
	gov = -1
	triple = DepTriple(rel=token.dep_, gov=gov, dep=dep)
	triples.append(triple)

	parse = UDParse(tokens=tokens, tags=tags, triples=triples)
	pp = PredPatt(parse, opts=options)
	print(pp.pprint())
	print(tokens)
	print(tags)
	print(triples)
	if len(pp.instances) <= 0:
	continue
	print(pp.instances[0].arguments)
	print(pp.instances[0].subj())
	print(pp.instances[0].obj())

	if __name__ == "__main__":
	argparser = argparse.ArgumentParser()
	argparser.add_argument('--without-stanfordnlp', action='store_true')
	argparser.add_argument('--without-ginza', action='store_true')
	args = argparser.parse_args()

	STANFORDNLP = stanfordnlp.Pipeline(lang='ja') if not args.without_stanfordnlp else None
	GINZA = spacy.load('ja_ginza_nopn') if not args.without_ginza else None

	while True:
	print("\n>> input text:")
	text = input().strip()
	main(text, args.without_stanfordnlp, args.without_ginza)