kognate/lark.ipynb

## lark.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from lark import Lark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tree(start, [Token(WORD, 'Hello'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
     ]
    }
   ],
   "source": [
    "\n",
    "l = Lark('''start: (WORD | NUMBER)+\",\" STRING NUMBER\"!\"\n",
    "            %import common.WORD   // imports from terminal library\n",
    "            %import common.ESCAPED_STRING   -> STRING\n",
    "            %import common.SIGNED_NUMBER    -> NUMBER\n",
    "            %import common.WS\n",
    "            %ignore WS\n",
    "         ''')\n",
    "\n",
    "print( l.parse(\"Hello, \\\"World dude\\\" -40!\") )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tree(start, [Token(NUMBER, '50'), Token(WORD, 'or'), Token(WORD, 'so'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
     ]
    }
   ],
   "source": [
    "print( l.parse(\"50 or so, \\\"World dude\\\" -40!\") )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tree(start, [Token(WORD, 'Hello'), Token(NUMBER, '20'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
     ]
    }
   ],
   "source": [
    "print( l.parse(\"Hello 20, \\\"World dude\\\" -40!\") )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "l = Lark('''start: wonum\",\" estring only_numbers\"!\"\n",
    "            only_numbers: NUMBER\n",
    "            wonum: (WORD | NUMBER)+\n",
    "            estring: STRING\n",
    "            %import common.WORD   // imports from terminal library\n",
    "            %import common.ESCAPED_STRING   -> STRING\n",
    "            %import common.SIGNED_NUMBER    -> NUMBER\n",
    "            %import common.WS\n",
    "            %ignore WS\n",
    "         ''')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "from lark import Transformer\n",
    "\n",
    "class MyTransformer(Transformer):\n",
    "    def start(self, lvalues):\n",
    "        return \"|\".join(lvalues)\n",
    "    def estring(self, quoted_string):\n",
    "        return \" \".join([x[1:-1] for x in quoted_string])\n",
    "    def wonum(self, items):\n",
    "        return \" \".join(items)\n",
    "    def only_numbers(self, item):\n",
    "        return str(item[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "input_string = \"\"\"Hello 30 there, \"some stuff\" 20!\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Hello 30 there|some stuff|20'"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "MyTransformer().transform(l.parse(input_string))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# A More Complicated Example\n",
    "\n",
    "We get a string that looks like this:\n",
    "\n",
    "```TOKEN : NUMBER : quoted or unquoted string : SIGNED INT```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "l = Lark('''start: token \":\" NUMBER \":\" qornot \":\" NUMBER\n",
    "            token: tire | wheel | caster | disc | mechano |tirer\n",
    "            tire: \"TIRE\"\n",
    "            wheel: \"WHEEL\"\n",
    "            caster: \"CASTOR\"\n",
    "            disc: \"DISC\"\n",
    "            tirer: \"TIRER\"\n",
    "            mechano: \"MECHANO\"\n",
    "            qornot: WORD+ | STRING\n",
    "            %import common.WORD   // imports from terminal library\n",
    "            %import common.ESCAPED_STRING   -> STRING\n",
    "            %import common.SIGNED_NUMBER    -> NUMBER\n",
    "            %import common.WS\n",
    "            %ignore WS\n",
    "         ''')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Tree(start, [Tree(token, [Tree(tire, [])]), Token(NUMBER, '30'), Tree(qornot, [Token(STRING, '\"hello there\"')]), Token(NUMBER, '10')])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "l.parse(\"\"\"TIRE : 30                  : \"hello there\" : 10\"\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pydantic import BaseModel\n",
    "\n",
    "class Order(BaseModel):\n",
    "    kind: str\n",
    "    description: str\n",
    "    have: int\n",
    "    need: int\n",
    "\n",
    "\n",
    "class OrderTransformer(Transformer):\n",
    "    def start(self, items):\n",
    "        return Order(kind=items[0],\n",
    "                    description=items[2],\n",
    "                    have=items[1],\n",
    "                    need=items[3])\n",
    "    def token(self, items):\n",
    "        return str(items[0].data)\n",
    "    def qornot(self, items):\n",
    "        return \" \".join(items)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Order(kind='tirer', description='\"this won\\'t work\"', have=30, need=-10)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "OrderTransformer().transform(l.parse(\"\"\"TIRER :          30 : \"this won't work\": -10\"\"\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.5rc1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}

## requirements.txt
jupyterlab
lark-parser
pydantic
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"from lark import Lark"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Tree(start, [Token(WORD, 'Hello'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
	]
	}
	],
	"source": [
	"\n",
	"l = Lark('''start: (WORD \| NUMBER)+\",\" STRING NUMBER\"!\"\n",
	" %import common.WORD // imports from terminal library\n",
	" %import common.ESCAPED_STRING -> STRING\n",
	" %import common.SIGNED_NUMBER -> NUMBER\n",
	" %import common.WS\n",
	" %ignore WS\n",
	" ''')\n",
	"\n",
	"print( l.parse(\"Hello, \\\"World dude\\\" -40!\") )"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Tree(start, [Token(NUMBER, '50'), Token(WORD, 'or'), Token(WORD, 'so'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
	]
	}
	],
	"source": [
	"print( l.parse(\"50 or so, \\\"World dude\\\" -40!\") )"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Tree(start, [Token(WORD, 'Hello'), Token(NUMBER, '20'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
	]
	}
	],
	"source": [
	"print( l.parse(\"Hello 20, \\\"World dude\\\" -40!\") )"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [],
	"source": [
	"l = Lark('''start: wonum\",\" estring only_numbers\"!\"\n",
	" only_numbers: NUMBER\n",
	" wonum: (WORD \| NUMBER)+\n",
	" estring: STRING\n",
	" %import common.WORD // imports from terminal library\n",
	" %import common.ESCAPED_STRING -> STRING\n",
	" %import common.SIGNED_NUMBER -> NUMBER\n",
	" %import common.WS\n",
	" %ignore WS\n",
	" ''')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"from lark import Transformer\n",
	"\n",
	"class MyTransformer(Transformer):\n",
	" def start(self, lvalues):\n",
	" return \"\|\".join(lvalues)\n",
	" def estring(self, quoted_string):\n",
	" return \" \".join([x[1:-1] for x in quoted_string])\n",
	" def wonum(self, items):\n",
	" return \" \".join(items)\n",
	" def only_numbers(self, item):\n",
	" return str(item[0])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [],
	"source": [
	"input_string = \"\"\"Hello 30 there, \"some stuff\" 20!\"\"\""
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'Hello 30 there\|some stuff\|20'"
	]
	},
	"execution_count": 20,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"MyTransformer().transform(l.parse(input_string))"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"# A More Complicated Example\n",
	"\n",
	"We get a string that looks like this:\n",
	"\n",
	"```TOKEN : NUMBER : quoted or unquoted string : SIGNED INT```"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [],
	"source": [
	"l = Lark('''start: token \":\" NUMBER \":\" qornot \":\" NUMBER\n",
	" token: tire \| wheel \| caster \| disc \| mechano \|tirer\n",
	" tire: \"TIRE\"\n",
	" wheel: \"WHEEL\"\n",
	" caster: \"CASTOR\"\n",
	" disc: \"DISC\"\n",
	" tirer: \"TIRER\"\n",
	" mechano: \"MECHANO\"\n",
	" qornot: WORD+ \| STRING\n",
	" %import common.WORD // imports from terminal library\n",
	" %import common.ESCAPED_STRING -> STRING\n",
	" %import common.SIGNED_NUMBER -> NUMBER\n",
	" %import common.WS\n",
	" %ignore WS\n",
	" ''')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Tree(start, [Tree(token, [Tree(tire, [])]), Token(NUMBER, '30'), Tree(qornot, [Token(STRING, '\"hello there\"')]), Token(NUMBER, '10')])"
	]
	},
	"execution_count": 30,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"l.parse(\"\"\"TIRE : 30 : \"hello there\" : 10\"\"\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [],
	"source": [
	"from pydantic import BaseModel\n",
	"\n",
	"class Order(BaseModel):\n",
	" kind: str\n",
	" description: str\n",
	" have: int\n",
	" need: int\n",
	"\n",
	"\n",
	"class OrderTransformer(Transformer):\n",
	" def start(self, items):\n",
	" return Order(kind=items[0],\n",
	" description=items[2],\n",
	" have=items[1],\n",
	" need=items[3])\n",
	" def token(self, items):\n",
	" return str(items[0].data)\n",
	" def qornot(self, items):\n",
	" return \" \".join(items)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 35,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Order(kind='tirer', description='\"this won\\'t work\"', have=30, need=-10)"
	]
	},
	"execution_count": 35,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"OrderTransformer().transform(l.parse(\"\"\"TIRER : 30 : \"this won't work\": -10\"\"\"))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.5rc1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}