Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Stanford CoreNLPをPythonから使う ref: http://qiita.com/yubessy/items/1869ac2c66f4e76cd6c5
import pprint
import json
import corenlp
# パーサの生成
corenlp_dir = "/usr/local/lib/stanford-corenlp-full-2013-06-20/"
parser = corenlp.StanfordCoreNLP(corenlp_path=corenlp_dir)
# パースして結果をpretty print
result_json = json.loads(parser.parse("I am Alice."))
pprint.pprint(result_json)
import pprint
import json
import corenlp
# パーサの生成
corenlp_dir = "/usr/local/lib/stanford-corenlp-full-2013-06-20/"
properties_file = "./user.properties"
parser = corenlp.StanfordCoreNLP(
corenlp_path=corenlp_dir,
properties=properties_file) # propertiesを設定
# パースして結果をpretty print
result_json = json.loads(parser.parse("I am Alice."))
pprint.pprint(result_json)
$ curl -L -O http://nlp.stanford.edu/software/stanford-corenlp-full-2013-06-20.zip
$ unzip ./stanford-corenlp-full-2013-06-20.zip -d /usr/local/lib/
$ pip install corenlp-python
{u'coref': [[[[u'I', 0, 0, 0, 1], [u'Alice', 0, 2, 2, 3]]]],
u'sentences': [{u'dependencies': [[u'nsubj', u'Alice', u'I'],
[u'cop', u'Alice', u'am'],
[u'root', u'ROOT', u'Alice']],
u'parsetree': u'(ROOT (S (NP (PRP I)) (VP (VBP am) (NP (NNP Alice))) (. .)))',
u'text': u'I am Alice.',
u'words': [[u'I',
{u'CharacterOffsetBegin': u'0',
u'CharacterOffsetEnd': u'1',
u'Lemma': u'I',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'PRP'}],
[u'am',
{u'CharacterOffsetBegin': u'2',
u'CharacterOffsetEnd': u'4',
u'Lemma': u'be',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'VBP'}],
[u'Alice',
{u'CharacterOffsetBegin': u'5',
u'CharacterOffsetEnd': u'10',
u'Lemma': u'Alice',
u'NamedEntityTag': u'PERSON',
u'PartOfSpeech': u'NNP'}],
[u'.',
{u'CharacterOffsetBegin': u'10',
u'CharacterOffsetEnd': u'11',
u'Lemma': u'.',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'.'}]]}]}
{u'sentences': [{u'dependencies': [],
u'parsetree': [],
u'text': u'I am Alice.',
u'words': [[u'I',
{u'CharacterOffsetBegin': u'0',
u'CharacterOffsetEnd': u'1'}],
[u'am',
{u'CharacterOffsetBegin': u'2',
u'CharacterOffsetEnd': u'4'}],
[u'Alice',
{u'CharacterOffsetBegin': u'5',
u'CharacterOffsetEnd': u'10'}],
[u'.',
{u'CharacterOffsetBegin': u'10',
u'CharacterOffsetEnd': u'11'}]]}]}
annotators = tokenize, ssplit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment