andreasvc/Fragments in TSG derivations.ipynb

## Fragments in TSG derivations.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Getting tree fragments from TSG derivations\n",
    "-------------------------------------------\n",
    "\n",
    "Below we extract a simple Tree-Substitution Grammar (TSG) and parse sentences with it,\n",
    "and show which tree fragments were used in the derivations and how to extract them."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import io\n",
    "from discodop import parser, runexp, tree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/tmp\n"
     ]
    }
   ],
   "source": [
    "# Go to a temporary directory where we will create a simple treebank\n",
    "# and store the extract grammar\n",
    "%cd /tmp\n",
    "with io.open('treebankExample.mrg', 'w', encoding='utf8') as out:\n",
    "    out.write(u\"\"\"(S (NP (DT The) (NN cat)) (VP (VBP saw) (NP (DT the) (JJ hungry) (NN dog))))\n",
    "(S (NP (DT The) (NN cat)) (VP (VBP saw) (NP (DT the) (NN dog))))\n",
    "(S (NP (DT The) (NN mouse)) (VP (VBP saw) (NP (DT the) (NN cat))))\n",
    "(S (NP (DT The) (NN mouse)) (VP (VBP saw) (NP (DT the) (JJ yellow) (NN cat))))\n",
    "(S (NP (DT The) (JJ little) (NN mouse)) (VP (VBP saw) (NP (DT the) (NN cat))))\n",
    "(S (NP (DT The) (NN cat)) (VP (VBP ate) (NP (DT the) (NN dog))))\n",
    "(S (NP (DT The) (NN mouse)) (VP (VBP ate) (NP (DT the) (NN cat))))\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Write a paramater file containing the specification for the grammar we will extract.\n",
    "with io.open('mygrammar.prm', 'w', encoding='utf8') as out:\n",
    "    out.write(u\"\"\"stages=[\n",
    "    dict(name='dop', mode='pcfg', dop='doubledop',\n",
    "        m=1000, estimator='rfe', objective = 'mpp')\n",
    "],\n",
    "corpusfmt='bracket',\n",
    "traincorpus=dict(\n",
    "    path='treebankExample.mrg', encoding='utf8',\n",
    "    numsents=7, maxwords=100),\n",
    "testcorpus=dict(\n",
    "    path='treebankExample.mrg', encoding='utf8',\n",
    "    numsents=7, maxwords=100, skiptrain=False),\n",
    "postagging=dict(\n",
    "    method='unknownword', model='4',\n",
    "    unknownthreshold=1, openclassthreshold=50,\n",
    "    simplelexsmooth=True),\n",
    "binarization=dict(\n",
    "    method='default', factor='right',\n",
    "    h=1, v=1),\n",
    "numproc=1, punct=None, functions=None, morphology=None, transformations=None, relationalrealizational=False, removeempty=False, ensureroot=False,\n",
    "\"\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "7 training sentences after length restriction <= 100\n",
      "known words: 10, signature types seen: 0\n",
      "open class tags: \n",
      "\n",
      "closed class tags: DT:1 JJ:3 NN:3 VBP:2\n",
      "treebank fan-out before binarization: 1 #6\n",
      "(S (NP (DT 0) (NN 1)) (VP (VBP 2) (NP (DT 3) (NN 4))))\n",
      "The mouse ate the cat\n",
      "binarization: default right h=1 v=1 ; cpu time elapsed: 0.001019s\n",
      "binarized treebank fan-out: 1 #6\n",
      "extracting recurring fragments\n",
      "finished 0--7\n",
      "getting exact counts for 25 fragments\n",
      "exact indices chunk 1 of 1\n",
      "merged 9 cover fragments up to depth 1 with max 999 frontier non-terminals.\n",
      "found 34 fragments\n",
      "DOP model based on 7 sentences, 69 nodes, 45 nonterminals\n",
      "labels: 45 of which preterminals: 11\n",
      "clauses: 74  lexical clauses: 21 non-lexical clauses: 53\n",
      "max fan-out: 1 in 7/7 01\tVP VBP NP mean: 1\n",
      "max variables: 2 in 7/7 01\tVP VBP NP\n",
      "max parsing complexity: 3 in 1/1 01\tS}<13> S}<8> VBP mean 2.43243\n",
      "All left hand sides sum to 1 +/- epsilon=1e-16\n",
      "equal number of nodes, but not equivalent:\n",
      "coarse labels without mapping: { DT, DT@The, DT@the, JJ, NN, NN@cat, NN@dog, NN@mouse, NP, NP|<JJ>, ... }\n",
      "wrote grammar to mygrammar/dop.{rules,lex,backtransform}.gz\n"
     ]
    }
   ],
   "source": [
    "# Extract the grammar using the command line interface;\n",
    "# the grammar will end up in several files under /tmp/mygrammar/\n",
    "!discodop grammar param mygrammar.prm mygrammar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(NP (DT 0=) (NN 1=))\t11\r\n",
      "(NP (DT 0=) (NP|<JJ> (JJ 1=) (NN 2=)))\t3\r\n",
      "(NP (DT 0=) (NN 1=cat))\t6\r\n",
      "(NP (DT 0=) (NP|<JJ> 1=))\t3\r\n",
      "(NP|<JJ> (JJ 0=) (NN 1=))\t3\r\n",
      "(S (NP (DT 0=The) (NN 1=mouse)) (VP (VBP 2=saw) (NP 3=)))\t2\r\n",
      "(S (NP (DT 0=The) (NN 1=)) (VP (VBP 2=saw) (NP (DT 3=the) (NP|<JJ> (JJ 4=) (NN 5=)))))\t2\r\n",
      "(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))\t5\r\n",
      "(S (NP 0=) (VP (VBP 1=saw) (NP (DT 2=the) (NN 3=))))\t3\r\n",
      "(S (NP (DT 0=The) (NN 1=)) (VP (VBP 2=) (NP (DT 3=the) (NN 4=))))\t4\r\n"
     ]
    }
   ],
   "source": [
    "# The fragments that this grammar is composed of are listed in mygrammar/dop.fragments.gz\n",
    "! zcat mygrammar/dop.fragments.gz | head"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load the grammar and construct a Parser object for it\n",
    "top = 'S'  # the root label in the treebank\n",
    "directory = 'mygrammar'\n",
    "params = parser.readparam(directory + '/params.prm')\n",
    "parser.readgrammars(directory, params.stages, params.postagging, top=getattr(params, 'top', top))\n",
    "myparser = parser.Parser(params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[DictObj(msg='DOP:\\titems 20, edges 22, blocked 0\\n\\tdisambiguation: 4 derivations, 1 parsetrees, 0.000451s\\n\\tp=6.407e-05 0.00s cpu time elapsed\\n',\n",
       " \tname='dop',\n",
       " \tprob=6.406868578758892e-05,\n",
       " \tparsetrees=[('(S (NP (DT 0) (NP|<JJ> (JJ 1) (NN 2))) (VP (VBP 3) (NP (DT 4) (NN 5))))', 6.406868578758892e-05, ['(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))', '(NP (DT 0=) (NP|<JJ> (JJ 1=) (NN 2=)))', '(DT 0=The)', '(JJ 0=hungry)', '(NN 0=dog)', '(VBP 0=ate)', '(NN 0=dog)'])],\n",
       " \tnoparse=False,\n",
       " \tgolditems=0,\n",
       " \ttotalgolditems=0,\n",
       " \telapsedtime=0.0015919999999999268,\n",
       " \tparsetree=ParentedTree('S', [ParentedTree('NP', [ParentedTree('DT', [0]), ParentedTree('JJ', [1]), ParentedTree('NN', [2])]), ParentedTree('VP', [ParentedTree('VBP', [3]), ParentedTree('NP', [ParentedTree('DT', [4]), ParentedTree('NN', [5])])])]),\n",
       " \tnumitems=20,\n",
       " \tfragments=['(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))', '(NP (DT 0=) (NP|<JJ> (JJ 1=) (NN 2=)))', '(DT 0=The)', '(JJ 0=hungry)', '(NN 0=dog)', '(VBP 0=ate)', '(NN 0=dog)'])]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# We now parse a sentence with two different probability models.\n",
    "# The first is the relative frequency estimate (RFE),\n",
    "# the second is the shortest derivation criterion, with ties\n",
    "# broken by relative frequencies (the most probable shortest derivation, MPSD).\n",
    "# With this small treebank they give the same result, but with\n",
    "# a larger treebank these two disambiguation methods can select\n",
    "# a different best parse.\n",
    "sent = 'The hungry dog ate the dog'\n",
    "myparser.stages[-1].estimator = 'rfe'\n",
    "result = list(myparser.parse(sent.split()))\n",
    "result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))',\n",
       " '(NP (DT 0=) (NP|<JJ> (JJ 1=) (NN 2=)))',\n",
       " '(DT 0=The)',\n",
       " '(JJ 0=hungry)',\n",
       " '(NN 0=dog)',\n",
       " '(VBP 0=ate)',\n",
       " '(NN 0=dog)']"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# These are the fragments used in the Most Probable Derivation\n",
    "result[0].fragments"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         S             \n",
      " ┌───────┴───┐          \n",
      " │           VP        \n",
      " │   ┌───────┴───┐      \n",
      " │   │           NP    \n",
      " │   │       ┌───┴───┐  \n",
      " NP VBP      DT      NN\n",
      " │   │       │       │  \n",
      "... ...     the     ...\n",
      "\n",
      "     NP            \n",
      " ┌───┴─────┐        \n",
      " │      NP|<JJ>    \n",
      " │   ┌─────┴─────┐  \n",
      " DT  JJ          NN\n",
      " │   │           │  \n",
      "... ...         ...\n",
      "\n",
      " DT\n",
      " │  \n",
      "The\n",
      "\n",
      "  JJ  \n",
      "  │    \n",
      "hungry\n",
      "\n",
      " NN\n",
      " │  \n",
      "dog\n",
      "\n",
      "VBP\n",
      " │  \n",
      "ate\n",
      "\n",
      " NN\n",
      " │  \n",
      "dog\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for a in result[0].fragments: print(tree.DrawTree(a))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "         S             \n",
      " ┌───────┴───┐          \n",
      " │           VP        \n",
      " │   ┌───────┴───┐      \n",
      " │   │           NP    \n",
      " │   │       ┌───┴───┐  \n",
      " NP VBP      DT      NN\n",
      " │   │       │       │  \n",
      "... ...     the     ...\n",
      "\n",
      "     NP            \n",
      " ┌───┴─────┐        \n",
      " │      NP|<JJ>    \n",
      " │   ┌─────┴─────┐  \n",
      " DT  JJ          NN\n",
      " │   │           │  \n",
      "... ...         ...\n",
      "\n",
      " DT\n",
      " │  \n",
      "The\n",
      "\n",
      "  JJ  \n",
      "  │    \n",
      "hungry\n",
      "\n",
      " NN\n",
      " │  \n",
      "dog\n",
      "\n",
      "VBP\n",
      " │  \n",
      "ate\n",
      "\n",
      " NN\n",
      " │  \n",
      "dog\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Now we switch to the Most Probable Shortest Derivation\n",
    "myparser.stages[-1].estimator = 'shortest'\n",
    "result = list(myparser.parse(sent.split()))\n",
    "for a in result[0].fragments: print(tree.DrawTree(a))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"Getting tree fragments from TSG derivations\n",
	"-------------------------------------------\n",
	"\n",
	"Below we extract a simple Tree-Substitution Grammar (TSG) and parse sentences with it,\n",
	"and show which tree fragments were used in the derivations and how to extract them."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import io\n",
	"from discodop import parser, runexp, tree"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"/tmp\n"
	]
	}
	],
	"source": [
	"# Go to a temporary directory where we will create a simple treebank\n",
	"# and store the extract grammar\n",
	"%cd /tmp\n",
	"with io.open('treebankExample.mrg', 'w', encoding='utf8') as out:\n",
	" out.write(u\"\"\"(S (NP (DT The) (NN cat)) (VP (VBP saw) (NP (DT the) (JJ hungry) (NN dog))))\n",
	"(S (NP (DT The) (NN cat)) (VP (VBP saw) (NP (DT the) (NN dog))))\n",
	"(S (NP (DT The) (NN mouse)) (VP (VBP saw) (NP (DT the) (NN cat))))\n",
	"(S (NP (DT The) (NN mouse)) (VP (VBP saw) (NP (DT the) (JJ yellow) (NN cat))))\n",
	"(S (NP (DT The) (JJ little) (NN mouse)) (VP (VBP saw) (NP (DT the) (NN cat))))\n",
	"(S (NP (DT The) (NN cat)) (VP (VBP ate) (NP (DT the) (NN dog))))\n",
	"(S (NP (DT The) (NN mouse)) (VP (VBP ate) (NP (DT the) (NN cat))))\n",
	"\"\"\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Write a paramater file containing the specification for the grammar we will extract.\n",
	"with io.open('mygrammar.prm', 'w', encoding='utf8') as out:\n",
	" out.write(u\"\"\"stages=[\n",
	" dict(name='dop', mode='pcfg', dop='doubledop',\n",
	" m=1000, estimator='rfe', objective = 'mpp')\n",
	"],\n",
	"corpusfmt='bracket',\n",
	"traincorpus=dict(\n",
	" path='treebankExample.mrg', encoding='utf8',\n",
	" numsents=7, maxwords=100),\n",
	"testcorpus=dict(\n",
	" path='treebankExample.mrg', encoding='utf8',\n",
	" numsents=7, maxwords=100, skiptrain=False),\n",
	"postagging=dict(\n",
	" method='unknownword', model='4',\n",
	" unknownthreshold=1, openclassthreshold=50,\n",
	" simplelexsmooth=True),\n",
	"binarization=dict(\n",
	" method='default', factor='right',\n",
	" h=1, v=1),\n",
	"numproc=1, punct=None, functions=None, morphology=None, transformations=None, relationalrealizational=False, removeempty=False, ensureroot=False,\n",
	"\"\"\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"7 training sentences after length restriction <= 100\n",
	"known words: 10, signature types seen: 0\n",
	"open class tags: \n",
	"\n",
	"closed class tags: DT:1 JJ:3 NN:3 VBP:2\n",
	"treebank fan-out before binarization: 1 #6\n",
	"(S (NP (DT 0) (NN 1)) (VP (VBP 2) (NP (DT 3) (NN 4))))\n",
	"The mouse ate the cat\n",
	"binarization: default right h=1 v=1 ; cpu time elapsed: 0.001019s\n",
	"binarized treebank fan-out: 1 #6\n",
	"extracting recurring fragments\n",
	"finished 0--7\n",
	"getting exact counts for 25 fragments\n",
	"exact indices chunk 1 of 1\n",
	"merged 9 cover fragments up to depth 1 with max 999 frontier non-terminals.\n",
	"found 34 fragments\n",
	"DOP model based on 7 sentences, 69 nodes, 45 nonterminals\n",
	"labels: 45 of which preterminals: 11\n",
	"clauses: 74 lexical clauses: 21 non-lexical clauses: 53\n",
	"max fan-out: 1 in 7/7 01\tVP VBP NP mean: 1\n",
	"max variables: 2 in 7/7 01\tVP VBP NP\n",
	"max parsing complexity: 3 in 1/1 01\tS}<13> S}<8> VBP mean 2.43243\n",
	"All left hand sides sum to 1 +/- epsilon=1e-16\n",
	"equal number of nodes, but not equivalent:\n",
	"coarse labels without mapping: { DT, DT@The, DT@the, JJ, NN, NN@cat, NN@dog, NN@mouse, NP, NP\|<JJ>, ... }\n",
	"wrote grammar to mygrammar/dop.{rules,lex,backtransform}.gz\n"
	]
	}
	],
	"source": [
	"# Extract the grammar using the command line interface;\n",
	"# the grammar will end up in several files under /tmp/mygrammar/\n",
	"!discodop grammar param mygrammar.prm mygrammar"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"(NP (DT 0=) (NN 1=))\t11\r\n",
	"(NP (DT 0=) (NP\|<JJ> (JJ 1=) (NN 2=)))\t3\r\n",
	"(NP (DT 0=) (NN 1=cat))\t6\r\n",
	"(NP (DT 0=) (NP\|<JJ> 1=))\t3\r\n",
	"(NP\|<JJ> (JJ 0=) (NN 1=))\t3\r\n",
	"(S (NP (DT 0=The) (NN 1=mouse)) (VP (VBP 2=saw) (NP 3=)))\t2\r\n",
	"(S (NP (DT 0=The) (NN 1=)) (VP (VBP 2=saw) (NP (DT 3=the) (NP\|<JJ> (JJ 4=) (NN 5=)))))\t2\r\n",
	"(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))\t5\r\n",
	"(S (NP 0=) (VP (VBP 1=saw) (NP (DT 2=the) (NN 3=))))\t3\r\n",
	"(S (NP (DT 0=The) (NN 1=)) (VP (VBP 2=) (NP (DT 3=the) (NN 4=))))\t4\r\n"
	]
	}
	],
	"source": [
	"# The fragments that this grammar is composed of are listed in mygrammar/dop.fragments.gz\n",
	"! zcat mygrammar/dop.fragments.gz \| head"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Load the grammar and construct a Parser object for it\n",
	"top = 'S' # the root label in the treebank\n",
	"directory = 'mygrammar'\n",
	"params = parser.readparam(directory + '/params.prm')\n",
	"parser.readgrammars(directory, params.stages, params.postagging, top=getattr(params, 'top', top))\n",
	"myparser = parser.Parser(params)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"[DictObj(msg='DOP:\\titems 20, edges 22, blocked 0\\n\\tdisambiguation: 4 derivations, 1 parsetrees, 0.000451s\\n\\tp=6.407e-05 0.00s cpu time elapsed\\n',\n",
	" \tname='dop',\n",
	" \tprob=6.406868578758892e-05,\n",
	" \tparsetrees=[('(S (NP (DT 0) (NP\|<JJ> (JJ 1) (NN 2))) (VP (VBP 3) (NP (DT 4) (NN 5))))', 6.406868578758892e-05, ['(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))', '(NP (DT 0=) (NP\|<JJ> (JJ 1=) (NN 2=)))', '(DT 0=The)', '(JJ 0=hungry)', '(NN 0=dog)', '(VBP 0=ate)', '(NN 0=dog)'])],\n",
	" \tnoparse=False,\n",
	" \tgolditems=0,\n",
	" \ttotalgolditems=0,\n",
	" \telapsedtime=0.0015919999999999268,\n",
	" \tparsetree=ParentedTree('S', [ParentedTree('NP', [ParentedTree('DT', [0]), ParentedTree('JJ', [1]), ParentedTree('NN', [2])]), ParentedTree('VP', [ParentedTree('VBP', [3]), ParentedTree('NP', [ParentedTree('DT', [4]), ParentedTree('NN', [5])])])]),\n",
	" \tnumitems=20,\n",
	" \tfragments=['(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))', '(NP (DT 0=) (NP\|<JJ> (JJ 1=) (NN 2=)))', '(DT 0=The)', '(JJ 0=hungry)', '(NN 0=dog)', '(VBP 0=ate)', '(NN 0=dog)'])]"
	]
	},
	"execution_count": 7,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# We now parse a sentence with two different probability models.\n",
	"# The first is the relative frequency estimate (RFE),\n",
	"# the second is the shortest derivation criterion, with ties\n",
	"# broken by relative frequencies (the most probable shortest derivation, MPSD).\n",
	"# With this small treebank they give the same result, but with\n",
	"# a larger treebank these two disambiguation methods can select\n",
	"# a different best parse.\n",
	"sent = 'The hungry dog ate the dog'\n",
	"myparser.stages[-1].estimator = 'rfe'\n",
	"result = list(myparser.parse(sent.split()))\n",
	"result"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"['(S (NP 0=) (VP (VBP 1=) (NP (DT 2=the) (NN 3=))))',\n",
	" '(NP (DT 0=) (NP\|<JJ> (JJ 1=) (NN 2=)))',\n",
	" '(DT 0=The)',\n",
	" '(JJ 0=hungry)',\n",
	" '(NN 0=dog)',\n",
	" '(VBP 0=ate)',\n",
	" '(NN 0=dog)']"
	]
	},
	"execution_count": 8,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# These are the fragments used in the Most Probable Derivation\n",
	"result[0].fragments"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" S \n",
	" ┌───────┴───┐ \n",
	" │ VP \n",
	" │ ┌───────┴───┐ \n",
	" │ │ NP \n",
	" │ │ ┌───┴───┐ \n",
	" NP VBP DT NN\n",
	" │ │ │ │ \n",
	"... ... the ...\n",
	"\n",
	" NP \n",
	" ┌───┴─────┐ \n",
	" │ NP\|<JJ> \n",
	" │ ┌─────┴─────┐ \n",
	" DT JJ NN\n",
	" │ │ │ \n",
	"... ... ...\n",
	"\n",
	" DT\n",
	" │ \n",
	"The\n",
	"\n",
	" JJ \n",
	" │ \n",
	"hungry\n",
	"\n",
	" NN\n",
	" │ \n",
	"dog\n",
	"\n",
	"VBP\n",
	" │ \n",
	"ate\n",
	"\n",
	" NN\n",
	" │ \n",
	"dog\n",
	"\n"
	]
	}
	],
	"source": [
	"for a in result[0].fragments: print(tree.DrawTree(a))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" S \n",
	" ┌───────┴───┐ \n",
	" │ VP \n",
	" │ ┌───────┴───┐ \n",
	" │ │ NP \n",
	" │ │ ┌───┴───┐ \n",
	" NP VBP DT NN\n",
	" │ │ │ │ \n",
	"... ... the ...\n",
	"\n",
	" NP \n",
	" ┌───┴─────┐ \n",
	" │ NP\|<JJ> \n",
	" │ ┌─────┴─────┐ \n",
	" DT JJ NN\n",
	" │ │ │ \n",
	"... ... ...\n",
	"\n",
	" DT\n",
	" │ \n",
	"The\n",
	"\n",
	" JJ \n",
	" │ \n",
	"hungry\n",
	"\n",
	" NN\n",
	" │ \n",
	"dog\n",
	"\n",
	"VBP\n",
	" │ \n",
	"ate\n",
	"\n",
	" NN\n",
	" │ \n",
	"dog\n",
	"\n"
	]
	}
	],
	"source": [
	"# Now we switch to the Most Probable Shortest Derivation\n",
	"myparser.stages[-1].estimator = 'shortest'\n",
	"result = list(myparser.parse(sent.split()))\n",
	"for a in result[0].fragments: print(tree.DrawTree(a))"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.5.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 1
	}