Created
February 20, 2015 16:22
-
-
Save oplatek/f5d5b5bc6a25c6a0085a to your computer and use it in GitHub Desktop.
nonprojective parser nltk develop
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"metadata": { | |
"name": "", | |
"signature": "sha256:947955ff46129b4f2cf4026bf234e38dac3fc59722c394c2b136612fb5d7bbf0" | |
}, | |
"nbformat": 3, | |
"nbformat_minor": 0, | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [ | |
"from nltk.parse.dependencygraph import treebank_data, conll_data2\n", | |
"from nltk.parse.dependencygraph import DependencyGraph\n", | |
"from nltk.parse.nonprojectivedependencyparser import ProbabilisticNonprojectiveParser\n", | |
"from nltk.parse.nonprojectivedependencyparser import NaiveBayesDependencyScorer\n", | |
"from IPython.display import display\n", | |
"\n", | |
"def test(sentence, tags, conll_data_str):\n", | |
" graphs = [\n", | |
" DependencyGraph(entry) for entry in conll_data_str.split('\\n\\n') if entry\n", | |
" ]\n", | |
" npp = ProbabilisticNonprojectiveParser()\n", | |
" npp.train(graphs, NaiveBayesDependencyScorer())\n", | |
" for parse_graph in npp.parse(sentence, tags):\n", | |
" print(parse_graph)\n", | |
" display(parse_graph)\n", | |
" \n", | |
"test(['Vinken', 'will', 'join', 'the', 'board'], ['NNP', 'MD', 'VB', 'DT', 'NN',], treebank_data)\n", | |
"test(['Cathy', 'zag', 'hen', 'zwaaien', '.'], ['N', 'V', 'Pron', 'Adj', 'N', 'Punc'], conll_data2)" | |
], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"defaultdict(<function <lambda> at 0x1076b7398>, {0: {u'ctag': u'TOP', u'head': None, u'word': None, u'rel': u'TOP', u'lemma': None, u'tag': u'TOP', u'deps': {'NTOP': [1, 2, 3, 4, 5]}, u'address': 0, u'feats': None}, 1: {u'ctag': None, u'head': None, u'word': 'Vinken', 'rel': 'NTOP', u'lemma': None, u'tag': 'NNP', u'deps': {}, u'address': 1, u'feats': None}, 2: {u'ctag': None, u'head': None, u'word': 'will', 'rel': 'NTOP', u'lemma': None, u'tag': 'MD', u'deps': {}, u'address': 2, u'feats': None}, 3: {u'ctag': None, u'head': None, u'word': 'join', 'rel': 'NTOP', u'lemma': None, u'tag': 'VB', u'deps': {}, u'address': 3, u'feats': None}, 4: {u'ctag': None, u'head': None, u'word': 'the', 'rel': 'NTOP', u'lemma': None, u'tag': 'DT', u'deps': {}, u'address': 4, u'feats': None}, 5: {u'ctag': None, u'head': None, u'word': 'board', 'rel': 'NTOP', u'lemma': None, u'tag': 'NN', u'deps': {}, u'address': 5, u'feats': None}})\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "display_data", | |
"svg": [ | |
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", | |
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", | |
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", | |
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n", | |
" -->\n", | |
"<!-- Title: G Pages: 1 -->\n", | |
"<svg width=\"394pt\" height=\"130pt\"\n", | |
" viewBox=\"0.00 0.00 393.95 130.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", | |
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 126)\">\n", | |
"<title>G</title>\n", | |
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-126 389.952,-126 389.952,4 -4,4\"/>\n", | |
"<!-- 0 -->\n", | |
"<g id=\"node1\" class=\"node\"><title>0</title>\n", | |
"<text text-anchor=\"middle\" x=\"121.102\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">0 (None)</text>\n", | |
"</g>\n", | |
"<!-- 1 -->\n", | |
"<g id=\"node2\" class=\"node\"><title>1</title>\n", | |
"<text text-anchor=\"middle\" x=\"38.1021\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">1 (Vinken)</text>\n", | |
"</g>\n", | |
"<!-- None -->\n", | |
"<g id=\"node3\" class=\"node\"><title>None</title>\n", | |
"<text text-anchor=\"middle\" x=\"199.102\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">None</text>\n", | |
"</g>\n", | |
"<!-- None->1 -->\n", | |
"<g id=\"edge1\" class=\"edge\"><title>None->1</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M171.742,-89.6425C168.847,-88.3607 165.929,-87.1221 163.102,-86 139.074,-76.4614 130.912,-79.5632 107.79,-68 93.7389,-60.9734 79.3508,-51.342 67.3242,-42.4771\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"69.1118,-39.441 59.023,-36.2058 64.8922,-45.0264 69.1118,-39.441\"/>\n", | |
"<text text-anchor=\"middle\" x=\"126.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 2 -->\n", | |
"<g id=\"node4\" class=\"node\"><title>2</title>\n", | |
"<text text-anchor=\"middle\" x=\"123.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">2 (will)</text>\n", | |
"</g>\n", | |
"<!-- None->2 -->\n", | |
"<g id=\"edge2\" class=\"edge\"><title>None->2</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M177.349,-85.6875C171.088,-80.2953 164.431,-74.1451 158.79,-68 152.1,-60.7123 145.511,-52.17 139.851,-44.2771\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"142.707,-42.2538 134.114,-36.0573 136.968,-46.2605 142.707,-42.2538\"/>\n", | |
"<text text-anchor=\"middle\" x=\"177.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 3 -->\n", | |
"<g id=\"node5\" class=\"node\"><title>3</title>\n", | |
"<text text-anchor=\"middle\" x=\"199.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">3 (join)</text>\n", | |
"</g>\n", | |
"<!-- None->3 -->\n", | |
"<g id=\"edge3\" class=\"edge\"><title>None->3</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M199.102,-85.5951C199.102,-74.2572 199.102,-59.2271 199.102,-46.3153\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"202.602,-46.0951 199.102,-36.0952 195.602,-46.0952 202.602,-46.0951\"/>\n", | |
"<text text-anchor=\"middle\" x=\"217.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 4 -->\n", | |
"<g id=\"node6\" class=\"node\"><title>4</title>\n", | |
"<text text-anchor=\"middle\" x=\"273.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">4 (the)</text>\n", | |
"</g>\n", | |
"<!-- None->4 -->\n", | |
"<g id=\"edge4\" class=\"edge\"><title>None->4</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M220.766,-85.7473C226.982,-80.3567 233.568,-74.1925 239.102,-68 245.519,-60.8198 251.76,-52.3897 257.107,-44.5656\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"260.119,-46.3573 262.726,-36.0882 254.284,-42.4902 260.119,-46.3573\"/>\n", | |
"<text text-anchor=\"middle\" x=\"268.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 5 -->\n", | |
"<g id=\"node7\" class=\"node\"><title>5</title>\n", | |
"<text text-anchor=\"middle\" x=\"352.102\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">5 (board)</text>\n", | |
"</g>\n", | |
"<!-- None->5 -->\n", | |
"<g id=\"edge5\" class=\"edge\"><title>None->5</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M226.345,-94.9186C244.949,-88.833 269.807,-79.5659 290.102,-68 302.64,-60.8545 315.342,-51.3083 325.957,-42.5407\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"328.262,-45.1751 333.629,-36.0405 323.737,-39.8341 328.262,-45.1751\"/>\n", | |
"<text text-anchor=\"middle\" x=\"327.258\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"</g>\n", | |
"</svg>\n" | |
], | |
"text": [ | |
"<DependencyGraph with 6 nodes>" | |
] | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": [ | |
"defaultdict(<function <lambda> at 0x1076b7398>, {0: {u'ctag': u'TOP', u'head': None, u'word': None, u'rel': u'TOP', u'lemma': None, u'tag': u'TOP', u'deps': {'NTOP': [1, 2, 3, 4, 5]}, u'address': 0, u'feats': None}, 1: {u'ctag': None, u'head': None, u'word': 'Cathy', 'rel': 'NTOP', u'lemma': None, u'tag': 'N', u'deps': {}, u'address': 1, u'feats': None}, 2: {u'ctag': None, u'head': None, u'word': 'zag', 'rel': 'NTOP', u'lemma': None, u'tag': 'V', u'deps': {}, u'address': 2, u'feats': None}, 3: {u'ctag': None, u'head': None, u'word': 'hen', 'rel': 'NTOP', u'lemma': None, u'tag': 'Pron', u'deps': {}, u'address': 3, u'feats': None}, 4: {u'ctag': None, u'head': None, u'word': 'zwaaien', 'rel': 'NTOP', u'lemma': None, u'tag': 'Adj', u'deps': {}, u'address': 4, u'feats': None}, 5: {u'ctag': None, u'head': None, u'word': '.', 'rel': 'NTOP', u'lemma': None, u'tag': 'N', u'deps': {}, u'address': 5, u'feats': None}})\n" | |
] | |
}, | |
{ | |
"metadata": {}, | |
"output_type": "display_data", | |
"svg": [ | |
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n", | |
"<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n", | |
" \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n", | |
"<!-- Generated by graphviz version 2.38.0 (20140413.2041)\n", | |
" -->\n", | |
"<!-- Title: G Pages: 1 -->\n", | |
"<svg width=\"397pt\" height=\"130pt\"\n", | |
" viewBox=\"0.00 0.00 396.63 130.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n", | |
"<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 126)\">\n", | |
"<title>G</title>\n", | |
"<polygon fill=\"white\" stroke=\"none\" points=\"-4,4 -4,-126 392.633,-126 392.633,4 -4,4\"/>\n", | |
"<!-- 0 -->\n", | |
"<g id=\"node1\" class=\"node\"><title>0</title>\n", | |
"<text text-anchor=\"middle\" x=\"110.633\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">0 (None)</text>\n", | |
"</g>\n", | |
"<!-- 1 -->\n", | |
"<g id=\"node2\" class=\"node\"><title>1</title>\n", | |
"<text text-anchor=\"middle\" x=\"34.6328\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">1 (Cathy)</text>\n", | |
"</g>\n", | |
"<!-- None -->\n", | |
"<g id=\"node3\" class=\"node\"><title>None</title>\n", | |
"<text text-anchor=\"middle\" x=\"188.633\" y=\"-99.8\" font-family=\"Times,serif\" font-size=\"14.00\">None</text>\n", | |
"</g>\n", | |
"<!-- None->1 -->\n", | |
"<g id=\"edge1\" class=\"edge\"><title>None->1</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M161.273,-89.6425C158.378,-88.3607 155.459,-87.1221 152.633,-86 128.605,-76.4614 120.142,-80.1447 97.3203,-68 84.3962,-61.1224 71.4288,-51.541 60.6691,-42.6734\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"62.7981,-39.8897 52.9088,-36.088 58.2689,-45.227 62.7981,-39.8897\"/>\n", | |
"<text text-anchor=\"middle\" x=\"115.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 2 -->\n", | |
"<g id=\"node4\" class=\"node\"><title>2</title>\n", | |
"<text text-anchor=\"middle\" x=\"114.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">2 (zag)</text>\n", | |
"</g>\n", | |
"<!-- None->2 -->\n", | |
"<g id=\"edge2\" class=\"edge\"><title>None->2</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M166.774,-85.7835C160.509,-80.394 153.877,-74.2212 148.32,-68 141.925,-60.8397 135.73,-52.4151 130.434,-44.5898\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"133.284,-42.553 124.875,-36.1087 127.43,-46.3908 133.284,-42.553\"/>\n", | |
"<text text-anchor=\"middle\" x=\"166.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 3 -->\n", | |
"<g id=\"node5\" class=\"node\"><title>3</title>\n", | |
"<text text-anchor=\"middle\" x=\"188.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">3 (hen)</text>\n", | |
"</g>\n", | |
"<!-- None->3 -->\n", | |
"<g id=\"edge3\" class=\"edge\"><title>None->3</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M188.633,-85.5951C188.633,-74.2572 188.633,-59.2271 188.633,-46.3153\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"192.133,-46.0951 188.633,-36.0952 185.133,-46.0952 192.133,-46.0951\"/>\n", | |
"<text text-anchor=\"middle\" x=\"206.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 4 -->\n", | |
"<g id=\"node6\" class=\"node\"><title>4</title>\n", | |
"<text text-anchor=\"middle\" x=\"275.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">4 (zwaaien)</text>\n", | |
"</g>\n", | |
"<!-- None->4 -->\n", | |
"<g id=\"edge4\" class=\"edge\"><title>None->4</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M208.879,-85.8893C215.302,-80.302 222.361,-73.9915 228.633,-68 236.688,-60.3055 245.2,-51.6147 252.735,-43.7117\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"255.567,-45.81 259.891,-36.1373 250.479,-41.0029 255.567,-45.81\"/>\n", | |
"<text text-anchor=\"middle\" x=\"260.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"<!-- 5 -->\n", | |
"<g id=\"node7\" class=\"node\"><title>5</title>\n", | |
"<text text-anchor=\"middle\" x=\"361.633\" y=\"-13.8\" font-family=\"Times,serif\" font-size=\"14.00\">5 (.)</text>\n", | |
"</g>\n", | |
"<!-- None->5 -->\n", | |
"<g id=\"edge5\" class=\"edge\"><title>None->5</title>\n", | |
"<path fill=\"none\" stroke=\"black\" d=\"M215.97,-94.427C235.054,-88.0578 260.899,-78.6549 282.633,-68 298.087,-60.4239 314.374,-50.6569 328.119,-41.8389\"/>\n", | |
"<polygon fill=\"black\" stroke=\"black\" points=\"330.283,-44.6063 336.75,-36.2139 326.461,-38.7418 330.283,-44.6063\"/>\n", | |
"<text text-anchor=\"middle\" x=\"324.789\" y=\"-56.8\" font-family=\"Times,serif\" font-size=\"14.00\">NTOP</text>\n", | |
"</g>\n", | |
"</g>\n", | |
"</svg>\n" | |
], | |
"text": [ | |
"<DependencyGraph with 6 nodes>" | |
] | |
} | |
], | |
"prompt_number": 7 | |
}, | |
{ | |
"cell_type": "code", | |
"collapsed": false, | |
"input": [], | |
"language": "python", | |
"metadata": {}, | |
"outputs": [] | |
} | |
], | |
"metadata": {} | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment