Last active
January 19, 2020 06:06
-
-
Save victoriastuart/c4e3230ade5266c9330a618be560b849 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# https://gist.github.com/victoriastuart/c4e3230ade5266c9330a618be560b849 | |
# https://www.reddit.com/r/LanguageTechnology/comments/b8xdb1/find_independent_clause_from_a_statement/ | |
import nltk | |
from nltk.tag import pos_tag | |
import re | |
def getSentenceType(statement): | |
sentenceParts = pos_tag(statement.split()) | |
return sentenceParts | |
def sentSegs(taggedSent, grammar, loops): | |
cp = nltk.RegexpParser(grammar, loop=loops) | |
result = cp.parse(taggedSent) | |
return result | |
grammar = r""" | |
NP: {<DT|JJ|NN.*>+} | |
PP: {<IN><NP>} | |
VP: {<VB.*><NP|PP|CLAUSE>+$} | |
CLAUSE: {<NP><VP>} | |
""" | |
sentence = getSentenceType("He is smart, and he is punctual") | |
result = sentSegs(sentence, grammar, loops=1) | |
print(result) | |
''' | |
(S | |
He/PRP | |
is/VBZ | |
(NP smart,/JJ) | |
and/CC | |
he/PRP | |
(VP is/VBZ (NP punctual/JJ))) | |
''' | |
print(result.pretty_print()) | |
''' | |
S | |
____________________|______________________ | |
| | | | | VP | |
| | | | | _____|_______ | |
| | | | NP | NP | |
| | | | | | | | |
He/PRP is/VBZ and/CC he/PRP smart,/JJ is/VBZ punctual/JJ | |
None | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment