Created
March 26, 2015 14:27
-
-
Save haje01/88ab158e0728ed36f193 to your computer and use it in GitHub Desktop.
NLTK Book Ch7-Ex
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:cd3679987cb24ec1c4bd7fc9d70b6eb5c880b01bdab784b91811dd3652185cde" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Excercise 7-2\n", | |
"\n", | |
"Write a tag pattern to match noun phrases containing plural head nouns. \n", | |
"\n", | |
"*Examples:*\n", | |
"\n", | |
" many/JJ researchers/NNS\n", | |
" two/CD weeks/NNS\n", | |
" both/DT new/JJ potisions/NNS\n", | |
"\n", | |
"Try to do this by generalizing the tag pattern that handled singular noun phrases. " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"import nltk" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 1 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"grammar = r\"\"\"\n", | |
" NPS: {<CD|DT>?<JJ>*<NNS>}\n", | |
"\n", | |
" NP: {<DT|PP\\$>?<JJ>*<NN>}\n", | |
" {<NNP>+}\n", | |
" NPS: {<CD>?<NP>}\n", | |
"\"\"\"" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 9 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"grammar = r\"\"\"\n", | |
" NPH: {<DT|PP\\$>?<JJ>*}\n", | |
" NP: {<NPH><NN>}\n", | |
" {<NNP>+}\n", | |
" NPS: {<NPH><NNS>}\n", | |
"\"\"\"\n", | |
"cp = nltk.RegexpParser(grammar)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [], | |
"prompt_number": 138 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"sentence = [(\"Rapunzel\", \"NNP\"), (\"let\", \"VBD\"), (\"down\", \"RP\"), \n", | |
" (\"her\", \"PP$\"), (\"long\", \"JJ\"), (\"golden\", \"JJ\"), \n", | |
" (\"hair\", \"NN\")]\n", | |
"print cp.parse(sentence)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(S\n", | |
" (NP Rapunzel/NNP)\n", | |
" let/VBD\n", | |
" down/RP\n", | |
" (NP (NPH her/PP$ long/JJ golden/JJ) hair/NN))\n" | |
] | |
} | |
], | |
"prompt_number": 139 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"sentence = [(\"Many\", \"JJ\"), (\"researchers\", \"NNS\"), (\"saw\", \"VBD\"), \n", | |
" (\"it\", \"PPS\")]\n", | |
"print cp.parse(sentence)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"(S (NPS (NPH Many/JJ) researchers/NNS) saw/VBD it/PPS)\n" | |
] | |
} | |
], | |
"prompt_number": 140 | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Excercise 7-15\n", | |
"Develop an NP chunker that convert POS tagged text into a list of tuples, where each tuple consists of a verb followed by a sequence of noun phrase and prepositions\n", | |
"\n", | |
"*Example:*\n", | |
"\n", | |
" the little cat sat on the mat -> ('sat', 'on', 'NP')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": true, | |
"input": [ | |
"grammar = r\"\"\"\n", | |
" NP: {<DT|PP\\$>?<JJ>*<NN>}\n", | |
" {<NNP>+}\n", | |
" VFNP: {<VBD><IN><NP>} \n", | |
"\"\"\"\n", | |
"cp = nltk.RegexpParser(grammar)\n", | |
"\n", | |
"def print_vfnp(tree):\n", | |
" for subtree in tree:\n", | |
" if type(subtree) == nltk.tree.Tree:\n", | |
" if subtree.label() == 'VFNP':\n", | |
" for e in subtree:\n", | |
" if type(e) == nltk.tree.Tree:\n", | |
" print e.label()\n", | |
" else:\n", | |
" print e[0],\n", | |
" else:\n", | |
" print_vfnp(subtree)\n", | |
"\n", | |
"sent = [('the', 'DT'), ('little', 'JJ'), ('cat', 'NN'), ('sat', 'VBD'), \n", | |
" ('on', 'IN'), ('the', 'DT'), ('mat', 'NN')]\n", | |
"trees = cp.parse(sent)\n", | |
"print_vfnp(trees)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"sat on NP\n" | |
] | |
} | |
], | |
"prompt_number": 141 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"for i in range(1, 100):\n", | |
" sent = nltk.corpus.treebank.tagged_sents()[i]\n", | |
" trees = cp.parse(sent)\n", | |
" print_vfnp(trees)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"heard of NP\n", | |
"was" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" under NP\n", | |
"lengthened" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" by NP\n", | |
"dropped" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" at NP\n", | |
"was" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
" for NP\n" | |
] | |
} | |
], | |
"prompt_number": 142 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment