Skip to content

Instantly share code, notes, and snippets.

@oplatek
Created February 20, 2015 16:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oplatek/f5d5b5bc6a25c6a0085a to your computer and use it in GitHub Desktop.
Save oplatek/f5d5b5bc6a25c6a0085a to your computer and use it in GitHub Desktop.
nonprojective parser nltk develop
{
"metadata": {
"name": "",
"signature": "sha256:947955ff46129b4f2cf4026bf234e38dac3fc59722c394c2b136612fb5d7bbf0"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"from nltk.parse.dependencygraph import treebank_data, conll_data2\n",
"from nltk.parse.dependencygraph import DependencyGraph\n",
"from nltk.parse.nonprojectivedependencyparser import ProbabilisticNonprojectiveParser\n",
"from nltk.parse.nonprojectivedependencyparser import NaiveBayesDependencyScorer\n",
"from IPython.display import display\n",
"\n",
"def test(sentence, tags, conll_data_str):\n",
" graphs = [\n",
" DependencyGraph(entry) for entry in conll_data_str.split('\\n\\n') if entry\n",
" ]\n",
" npp = ProbabilisticNonprojectiveParser()\n",
" npp.train(graphs, NaiveBayesDependencyScorer())\n",
" for parse_graph in npp.parse(sentence, tags):\n",
" print(parse_graph)\n",
" display(parse_graph)\n",
" \n",
"test(['Vinken', 'will', 'join', 'the', 'board'], ['NNP', 'MD', 'VB', 'DT', 'NN',], treebank_data)\n",
"test(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc'], conll_data2)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"defaultdict(<function <lambda> at 0x1076b7398>, {0: {u'ctag': u'TOP', u'head': None, u'word': None, u'rel': u'TOP', u'lemma': None, u'tag': u'TOP', u'deps': {'NTOP': [1, 2, 3, 4, 5]}, u'address': 0, u'feats': None}, 1: {u'ctag': None, u'head': None, u'word': 'Vinken', 'rel': 'NTOP', u'lemma': None, u'tag': 'NNP', u'deps': {}, u'address': 1, u'feats': None}, 2: {u'ctag': None, u'head': None, u'word': 'will', 'rel': 'NTOP', u'lemma': None, u'tag': 'MD', u'deps': {}, u'address': 2, u'feats': None}, 3: {u'ctag': None, u'head': None, u'word': 'join', 'rel': 'NTOP', u'lemma': None, u'tag': 'VB', u'deps': {}, u'address': 3, u'feats': None}, 4: {u'ctag': None, u'head': None, u'word': 'the', 'rel': 'NTOP', u'lemma': None, u'tag': 'DT', u'deps': {}, u'address': 4, u'feats': None}, 5: {u'ctag': None, u'head': None, u'word': 'board', 'rel': 'NTOP', u'lemma': None, u'tag': 'NN', u'deps': {}, u'address': 5, u'feats': None}})\n"
]
},
{
"metadata": {},
"output_type": "display_data",
"svg": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
" -->\n",
"<!-- Title: G Pages: 1 -->\n",
"<svg width=\"394pt\" height=\"130pt\"\n",
" viewBox=\"0.00 0.00 393.95 130.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 126)\">\n",
"<title>G</title>\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-126 389.952,-126 389.952,4 -4,4\"/>\n",
"<!-- 0 -->\n",
"<g id=\"node1\" class=\"node\"><title>0</title>\n",
"<text text-anchor=\"middle\" x=\"121.102\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">0 (None)</text>\n",
"</g>\n",
"<!-- 1 -->\n",
"<g id=\"node2\" class=\"node\"><title>1</title>\n",
"<text text-anchor=\"middle\" x=\"38.1021\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">1 (Vinken)</text>\n",
"</g>\n",
"<!-- None -->\n",
"<g id=\"node3\" class=\"node\"><title>None</title>\n",
"<text text-anchor=\"middle\" x=\"199.102\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">None</text>\n",
"</g>\n",
"<!-- None&#45;&gt;1 -->\n",
"<g id=\"edge1\" class=\"edge\"><title>None&#45;&gt;1</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M171.742,-89.6425C168.847,-88.3607 165.929,-87.1221 163.102,-86 139.074,-76.4614 130.912,-79.5632 107.79,-68 93.7389,-60.9734 79.3508,-51.342 67.3242,-42.4771\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"69.1118,-39.441 59.023,-36.2058 64.8922,-45.0264 69.1118,-39.441\"/>\n",
"<text text-anchor=\"middle\" x=\"126.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 2 -->\n",
"<g id=\"node4\" class=\"node\"><title>2</title>\n",
"<text text-anchor=\"middle\" x=\"123.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">2 (will)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;2 -->\n",
"<g id=\"edge2\" class=\"edge\"><title>None&#45;&gt;2</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M177.349,-85.6875C171.088,-80.2953 164.431,-74.1451 158.79,-68 152.1,-60.7123 145.511,-52.17 139.851,-44.2771\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"142.707,-42.2538 134.114,-36.0573 136.968,-46.2605 142.707,-42.2538\"/>\n",
"<text text-anchor=\"middle\" x=\"177.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 3 -->\n",
"<g id=\"node5\" class=\"node\"><title>3</title>\n",
"<text text-anchor=\"middle\" x=\"199.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">3 (join)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;3 -->\n",
"<g id=\"edge3\" class=\"edge\"><title>None&#45;&gt;3</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M199.102,-85.5951C199.102,-74.2572 199.102,-59.2271 199.102,-46.3153\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"202.602,-46.0951 199.102,-36.0952 195.602,-46.0952 202.602,-46.0951\"/>\n",
"<text text-anchor=\"middle\" x=\"217.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 4 -->\n",
"<g id=\"node6\" class=\"node\"><title>4</title>\n",
"<text text-anchor=\"middle\" x=\"273.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">4 (the)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;4 -->\n",
"<g id=\"edge4\" class=\"edge\"><title>None&#45;&gt;4</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M220.766,-85.7473C226.982,-80.3567 233.568,-74.1925 239.102,-68 245.519,-60.8198 251.76,-52.3897 257.107,-44.5656\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"260.119,-46.3573 262.726,-36.0882 254.284,-42.4902 260.119,-46.3573\"/>\n",
"<text text-anchor=\"middle\" x=\"268.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 5 -->\n",
"<g id=\"node7\" class=\"node\"><title>5</title>\n",
"<text text-anchor=\"middle\" x=\"352.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">5 (board)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;5 -->\n",
"<g id=\"edge5\" class=\"edge\"><title>None&#45;&gt;5</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M226.345,-94.9186C244.949,-88.833 269.807,-79.5659 290.102,-68 302.64,-60.8545 315.342,-51.3083 325.957,-42.5407\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"328.262,-45.1751 333.629,-36.0405 323.737,-39.8341 328.262,-45.1751\"/>\n",
"<text text-anchor=\"middle\" x=\"327.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text": [
"<DependencyGraph with 6 nodes>"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"defaultdict(<function <lambda> at 0x1076b7398>, {0: {u'ctag': u'TOP', u'head': None, u'word': None, u'rel': u'TOP', u'lemma': None, u'tag': u'TOP', u'deps': {'NTOP': [1, 2, 3, 4, 5]}, u'address': 0, u'feats': None}, 1: {u'ctag': None, u'head': None, u'word': 'Cathy', 'rel': 'NTOP', u'lemma': None, u'tag': 'N', u'deps': {}, u'address': 1, u'feats': None}, 2: {u'ctag': None, u'head': None, u'word': 'zag', 'rel': 'NTOP', u'lemma': None, u'tag': 'V', u'deps': {}, u'address': 2, u'feats': None}, 3: {u'ctag': None, u'head': None, u'word': 'hen', 'rel': 'NTOP', u'lemma': None, u'tag': 'Pron', u'deps': {}, u'address': 3, u'feats': None}, 4: {u'ctag': None, u'head': None, u'word': 'zwaaien', 'rel': 'NTOP', u'lemma': None, u'tag': 'Adj', u'deps': {}, u'address': 4, u'feats': None}, 5: {u'ctag': None, u'head': None, u'word': '.', 'rel': 'NTOP', u'lemma': None, u'tag': 'N', u'deps': {}, u'address': 5, u'feats': None}})\n"
]
},
{
"metadata": {},
"output_type": "display_data",
"svg": [
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n",
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n",
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n",
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n",
" -->\n",
"<!-- Title: G Pages: 1 -->\n",
"<svg width=\"397pt\" height=\"130pt\"\n",
" viewBox=\"0.00 0.00 396.63 130.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n",
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 126)\">\n",
"<title>G</title>\n",
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-126 392.633,-126 392.633,4 -4,4\"/>\n",
"<!-- 0 -->\n",
"<g id=\"node1\" class=\"node\"><title>0</title>\n",
"<text text-anchor=\"middle\" x=\"110.633\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">0 (None)</text>\n",
"</g>\n",
"<!-- 1 -->\n",
"<g id=\"node2\" class=\"node\"><title>1</title>\n",
"<text text-anchor=\"middle\" x=\"34.6328\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">1 (Cathy)</text>\n",
"</g>\n",
"<!-- None -->\n",
"<g id=\"node3\" class=\"node\"><title>None</title>\n",
"<text text-anchor=\"middle\" x=\"188.633\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">None</text>\n",
"</g>\n",
"<!-- None&#45;&gt;1 -->\n",
"<g id=\"edge1\" class=\"edge\"><title>None&#45;&gt;1</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M161.273,-89.6425C158.378,-88.3607 155.459,-87.1221 152.633,-86 128.605,-76.4614 120.142,-80.1447 97.3203,-68 84.3962,-61.1224 71.4288,-51.541 60.6691,-42.6734\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"62.7981,-39.8897 52.9088,-36.088 58.2689,-45.227 62.7981,-39.8897\"/>\n",
"<text text-anchor=\"middle\" x=\"115.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 2 -->\n",
"<g id=\"node4\" class=\"node\"><title>2</title>\n",
"<text text-anchor=\"middle\" x=\"114.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">2 (zag)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;2 -->\n",
"<g id=\"edge2\" class=\"edge\"><title>None&#45;&gt;2</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M166.774,-85.7835C160.509,-80.394 153.877,-74.2212 148.32,-68 141.925,-60.8397 135.73,-52.4151 130.434,-44.5898\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"133.284,-42.553 124.875,-36.1087 127.43,-46.3908 133.284,-42.553\"/>\n",
"<text text-anchor=\"middle\" x=\"166.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 3 -->\n",
"<g id=\"node5\" class=\"node\"><title>3</title>\n",
"<text text-anchor=\"middle\" x=\"188.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">3 (hen)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;3 -->\n",
"<g id=\"edge3\" class=\"edge\"><title>None&#45;&gt;3</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M188.633,-85.5951C188.633,-74.2572 188.633,-59.2271 188.633,-46.3153\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"192.133,-46.0951 188.633,-36.0952 185.133,-46.0952 192.133,-46.0951\"/>\n",
"<text text-anchor=\"middle\" x=\"206.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 4 -->\n",
"<g id=\"node6\" class=\"node\"><title>4</title>\n",
"<text text-anchor=\"middle\" x=\"275.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">4 (zwaaien)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;4 -->\n",
"<g id=\"edge4\" class=\"edge\"><title>None&#45;&gt;4</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M208.879,-85.8893C215.302,-80.302 222.361,-73.9915 228.633,-68 236.688,-60.3055 245.2,-51.6147 252.735,-43.7117\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"255.567,-45.81 259.891,-36.1373 250.479,-41.0029 255.567,-45.81\"/>\n",
"<text text-anchor=\"middle\" x=\"260.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"<!-- 5 -->\n",
"<g id=\"node7\" class=\"node\"><title>5</title>\n",
"<text text-anchor=\"middle\" x=\"361.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">5 (.)</text>\n",
"</g>\n",
"<!-- None&#45;&gt;5 -->\n",
"<g id=\"edge5\" class=\"edge\"><title>None&#45;&gt;5</title>\n",
"<path fill=\"none\" stroke=\"black\" d=\"M215.97,-94.427C235.054,-88.0578 260.899,-78.6549 282.633,-68 298.087,-60.4239 314.374,-50.6569 328.119,-41.8389\"/>\n",
"<polygon fill=\"black\" stroke=\"black\" points=\"330.283,-44.6063 336.75,-36.2139 326.461,-38.7418 330.283,-44.6063\"/>\n",
"<text text-anchor=\"middle\" x=\"324.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n",
"</g>\n",
"</g>\n",
"</svg>\n"
],
"text": [
"<DependencyGraph with 6 nodes>"
]
}
],
"prompt_number": 7
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment