Skip to content

Instantly share code, notes, and snippets.

@kognate
Created November 12, 2019 20:08
Show Gist options
  • Save kognate/2b4e9812177600f0085aeb409f9efdf8 to your computer and use it in GitHub Desktop.
Save kognate/2b4e9812177600f0085aeb409f9efdf8 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from lark import Lark"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tree(start, [Token(WORD, 'Hello'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
]
}
],
"source": [
"\n",
"l = Lark('''start: (WORD | NUMBER)+\",\" STRING NUMBER\"!\"\n",
" %import common.WORD // imports from terminal library\n",
" %import common.ESCAPED_STRING -> STRING\n",
" %import common.SIGNED_NUMBER -> NUMBER\n",
" %import common.WS\n",
" %ignore WS\n",
" ''')\n",
"\n",
"print( l.parse(\"Hello, \\\"World dude\\\" -40!\") )"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tree(start, [Token(NUMBER, '50'), Token(WORD, 'or'), Token(WORD, 'so'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
]
}
],
"source": [
"print( l.parse(\"50 or so, \\\"World dude\\\" -40!\") )"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Tree(start, [Token(WORD, 'Hello'), Token(NUMBER, '20'), Token(STRING, '\"World dude\"'), Token(NUMBER, '-40')])\n"
]
}
],
"source": [
"print( l.parse(\"Hello 20, \\\"World dude\\\" -40!\") )"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"l = Lark('''start: wonum\",\" estring only_numbers\"!\"\n",
" only_numbers: NUMBER\n",
" wonum: (WORD | NUMBER)+\n",
" estring: STRING\n",
" %import common.WORD // imports from terminal library\n",
" %import common.ESCAPED_STRING -> STRING\n",
" %import common.SIGNED_NUMBER -> NUMBER\n",
" %import common.WS\n",
" %ignore WS\n",
" ''')"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"from lark import Transformer\n",
"\n",
"class MyTransformer(Transformer):\n",
" def start(self, lvalues):\n",
" return \"|\".join(lvalues)\n",
" def estring(self, quoted_string):\n",
" return \" \".join([x[1:-1] for x in quoted_string])\n",
" def wonum(self, items):\n",
" return \" \".join(items)\n",
" def only_numbers(self, item):\n",
" return str(item[0])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"input_string = \"\"\"Hello 30 there, \"some stuff\" 20!\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Hello 30 there|some stuff|20'"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"MyTransformer().transform(l.parse(input_string))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# A More Complicated Example\n",
"\n",
"We get a string that looks like this:\n",
"\n",
"```TOKEN : NUMBER : quoted or unquoted string : SIGNED INT```"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"l = Lark('''start: token \":\" NUMBER \":\" qornot \":\" NUMBER\n",
" token: tire | wheel | caster | disc | mechano |tirer\n",
" tire: \"TIRE\"\n",
" wheel: \"WHEEL\"\n",
" caster: \"CASTOR\"\n",
" disc: \"DISC\"\n",
" tirer: \"TIRER\"\n",
" mechano: \"MECHANO\"\n",
" qornot: WORD+ | STRING\n",
" %import common.WORD // imports from terminal library\n",
" %import common.ESCAPED_STRING -> STRING\n",
" %import common.SIGNED_NUMBER -> NUMBER\n",
" %import common.WS\n",
" %ignore WS\n",
" ''')"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Tree(start, [Tree(token, [Tree(tire, [])]), Token(NUMBER, '30'), Tree(qornot, [Token(STRING, '\"hello there\"')]), Token(NUMBER, '10')])"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"l.parse(\"\"\"TIRE : 30 : \"hello there\" : 10\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"from pydantic import BaseModel\n",
"\n",
"class Order(BaseModel):\n",
" kind: str\n",
" description: str\n",
" have: int\n",
" need: int\n",
"\n",
"\n",
"class OrderTransformer(Transformer):\n",
" def start(self, items):\n",
" return Order(kind=items[0],\n",
" description=items[2],\n",
" have=items[1],\n",
" need=items[3])\n",
" def token(self, items):\n",
" return str(items[0].data)\n",
" def qornot(self, items):\n",
" return \" \".join(items)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Order(kind='tirer', description='\"this won\\'t work\"', have=30, need=-10)"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"OrderTransformer().transform(l.parse(\"\"\"TIRER : 30 : \"this won't work\": -10\"\"\"))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5rc1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
jupyterlab
lark-parser
pydantic
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment