Skip to content

Instantly share code, notes, and snippets.

@pbexe
Created December 12, 2015 16:07
Show Gist options
  • Save pbexe/7262a1082c6f13d230fd to your computer and use it in GitHub Desktop.
Save pbexe/7262a1082c6f13d230fd to your computer and use it in GitHub Desktop.
An example of NLP chunking in python
import nltk
def prepareForNLP(text):
sentences = nltk.sent_tokenize(text)
sentences = [nltk.word_tokenize(sent) for sent in sentences]
sentences = [nltk.pos_tag(sent) for sent in sentences]
return sentences
def chunk(sentence):
chunkToExtract = """
NP: {<NNP>*}
{<DT>?<JJ>?<NNS>}
{<NN><NN>}"""
parser = nltk.RegexpParser(chunkToExtract)
result = parser.parse(sentence)
for subtree in result.subtrees():
if subtree.label() == 'NP':
t = subtree
t = ' '.join(word for word, pos in t.leaves())
print(t)
sentences = prepareForNLP("A prison riot left six members of staff needing hospital treatment earlier this month, the BBC learns")
for sentence in sentences:
chunk(sentence)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment