theaspect/nltk.07.15.py

## nltk.07.15.py
#Develop an NP chunker that converts POS-tagged text into a list of tuples, where each tuple
#consists of a verb followed by a sequence of noun phrases and prepositions, e.g. the little cat
#sat on the mat becomes ('sat', 'on', 'NP')...

import nltk
# Tagged corpus
brown = nltk.corpus.brown

# Grammar from chapter 7
grammar = r"""
    NOUNP: {<DT>?<JJ.*>*<NN.*>+} # Noun phrase
    CLAUSE: {<VB><IN><NOUNP>}    # Verb
    """
cp = nltk.RegexpParser(grammar)

tuples = set()

# Find required clauses
for sent in brown.tagged_sents():
    tree = cp.parse(sent)
    for subtree in tree.subtrees():
        if subtree.node == 'CLAUSE':
            tuples.add((subtree[0][0],subtree[1][0], "NP"))

# Output
for t in sorted(tuples):
    print t
	#Develop an NP chunker that converts POS-tagged text into a list of tuples, where each tuple
	#consists of a verb followed by a sequence of noun phrases and prepositions, e.g. the little cat
	#sat on the mat becomes ('sat', 'on', 'NP')...

	import nltk
	# Tagged corpus
	brown = nltk.corpus.brown

	# Grammar from chapter 7
	grammar = r"""
	NOUNP: {<DT>?<JJ.><NN.*>+} # Noun phrase
	CLAUSE: {<VB><IN><NOUNP>} # Verb
	"""
	cp = nltk.RegexpParser(grammar)

	tuples = set()

	# Find required clauses
	for sent in brown.tagged_sents():
	tree = cp.parse(sent)
	for subtree in tree.subtrees():
	if subtree.node == 'CLAUSE':
	tuples.add((subtree[0][0],subtree[1][0], "NP"))

	# Output
	for t in sorted(tuples):
	print t