Skip to content

Instantly share code, notes, and snippets.

@ainoya
Last active August 14, 2023 09:04
Show Gist options
  • Save ainoya/b7b15dd7fb7de10fbac9 to your computer and use it in GitHub Desktop.
Save ainoya/b7b15dd7fb7de10fbac9 to your computer and use it in GitHub Desktop.
extract phrasal verbs
#!/usr/bin/env python
import pprint
import json
import corenlp
corenlp_dir = "/tmp/stanford-corenlp-full-2013-06-20"
parser = corenlp.StanfordCoreNLP(corenlp_path=corenlp_dir)
result_json = json.loads(parser.parse("hey shut down the station"))
pprint.pprint(result_json)
➜ /tmp ./test.py
{u'sentences': [{u'dependencies': [[u'nsubj', u'shut', u'hey'],
[u'root', u'ROOT', u'shut'],
[u'prt', u'shut', u'down'],
[u'det', u'station', u'the'],
[u'dobj', u'shut', u'station']],
u'indexeddependencies': [[u'nsubj', u'shut-2', u'hey-1'],
[u'root', u'ROOT-0', u'shut-2'],
[u'prt', u'shut-2', u'down-3'],
[u'det', u'station-5', u'the-4'],
[u'dobj',
u'shut-2',
u'station-5']],
u'parsetree': u'(ROOT (S (NP (NN hey)) (VP (VBD shut) (PRT (RP down)) (NP (DT the) (NN station)))))',
u'text': u'hey shut down the station',
u'words': [[u'hey',
{u'CharacterOffsetBegin': u'0',
u'CharacterOffsetEnd': u'3',
u'Lemma': u'hey',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'NN'}],
[u'shut',
{u'CharacterOffsetBegin': u'4',
u'CharacterOffsetEnd': u'8',
u'Lemma': u'shut',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'VBD'}],
[u'down',
{u'CharacterOffsetBegin': u'9',
u'CharacterOffsetEnd': u'13',
u'Lemma': u'down',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'RP'}],
[u'the',
{u'CharacterOffsetBegin': u'14',
u'CharacterOffsetEnd': u'17',
u'Lemma': u'the',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'DT'}],
[u'station',
{u'CharacterOffsetBegin': u'18',
u'CharacterOffsetEnd': u'25',
u'Lemma': u'station',
u'NamedEntityTag': u'O',
u'PartOfSpeech': u'NN'}]]}]}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment