Skip to content

Instantly share code, notes, and snippets.

@jg-rp
Last active September 2, 2022 06:14
Show Gist options
  • Save jg-rp/c13dc2e186e853552a7b56199a0378de to your computer and use it in GitHub Desktop.
Save jg-rp/c13dc2e186e853552a7b56199a0378de to your computer and use it in GitHub Desktop.
Liquid expression lexer and parser with SLY

A SLY lexer and parser for Liquid filtered expressions.

Filtered expressions are those found in Liquid output statements, echo tags and assign tags.

The result of FilteredExpressionParser().parse() is an AST built from existing Python Liquid Expression objects, rooted at a FilteredExpression.

This is intended to be an example of how one might use SLY to parse an expression for a custom tag in Python Liquid. The tags built-in to Python Liquid do not use SLY (or PLY or any other general purpose parsing package), instead we chose to write our own lexers and recursive descent parsers, one for each of the common Liquid expression types. This decision was made on performance grounds. Benchmarks show Python Liquid's parser to be roughly three times faster for filtered expressions than this SLY implementation.

The following grammar defines the "standard" Liquid filtered expression, following BNF conventions from the SLY documentation.

expr : left { "|" filter }
left : literal
| path
| range
literal : FLOAT
| INTEGER
| STRING
| TRUE
| FALSE
| NIL
| NULL
| EMPTY
| BLANK
path : ID { prop }
prop : bracketed
| dotted
bracketed : "[" elem "]"
dotted : DOT ID
elem : path
| INTEGER
| STRING
range : "(" rangearg DOUBLEDOT rangearg ")"
rangearg : INTEGER
| FLOAT
| path
filter : ID [ ":" args ]
args : arg { "," arg }
arg : parg
| kwarg
parg : literal
| path
kwarg : ID ":" parg
from sly import Lexer
from sly import Parser
from liquid.expression import Filter
from liquid.expression import FilteredExpression
from liquid.expression import FloatLiteral
from liquid.expression import IntegerLiteral
from liquid.expression import StringLiteral
from liquid.expression import Identifier
from liquid.expression import IdentifierPathElement
from liquid.expression import RangeLiteral
from liquid.expression import TRUE
from liquid.expression import FALSE
from liquid.expression import NIL
from liquid.expression import BLANK
from liquid.expression import EMPTY
from liquid.exceptions import LiquidSyntaxError
class FilteredExpressionLexer(Lexer):
tokens = {
DOUBLEDOT,
FLOAT,
DOT,
INTEGER,
ID,
TRUE,
FALSE,
NIL,
NULL,
EMPTY,
BLANK,
STRING,
}
literals = {"(", ")", "[", "]", ",", ":", "|"}
ignore = " \t\r"
DOUBLEDOT = r"\.\."
DOT = r"\."
@_(r"-?\d+\.(?!\.)\d*")
def FLOAT(self, t):
t.value = float(t.value)
return t
@_(r"-?\d+")
def INTEGER(self, t):
t.value = int(t.value)
return t
@_(r"'")
def SINGLEQUOTE(self, t):
self.begin(SingleQuoteStringLexer)
@_(r"\"")
def DOUBLEQUOTE(self, t):
self.begin(DoubleQuoteStringLexer)
ID = r"\w[\w\-]*\??"
ID["true"] = TRUE
ID["false"] = FALSE
ID["nil"] = NIL
ID["null"] = NULL
ID["empty"] = EMPTY
ID["blank"] = BLANK
@_(r"\n+")
def ignore_newline(self, t):
self.lineno += t.value.count("\n")
def error(self, t):
print("Line %d: Bad character %r" % (self.lineno, t.value[0]))
self.index += 1
class SingleQuoteStringLexer(Lexer):
tokens = {ESCAPE, SINGLEQUOTE, STRING}
STRING = r"[^\\']+"
@_(r"\\'")
def ESCAPE(self, t):
t.value = "'"
return t
@_(r"'")
def SINGLEQUOTE(self, t):
self.begin(FilteredExpressionLexer)
def error(self, t):
print("Line %d: Bad character %r" % (self.lineno, t.value[0]))
self.index += 1
class DoubleQuoteStringLexer(Lexer):
tokens = {ESCAPE, DOUBLEQUOTE, STRING}
STRING = r'[^\\"]+'
@_(r'\\"')
def ESCAPE(self, t):
t.value = '"'
return t
@_(r'"')
def DOUBLEQUOTE(self, t):
self.begin(FilteredExpressionLexer)
def error(self, t):
print("Line %d: Bad character %r" % (self.lineno, t.value[0]))
self.index += 1
class FilteredExpressionParser(Parser):
tokens = FilteredExpressionLexer.tokens
@_('left { "|" filter }')
def expr(self, p):
return FilteredExpression(p.left, p.filter)
@_("literal")
def left(self, p):
return p.literal
@_("path")
def left(self, p):
assert isinstance(p.path, Identifier)
return p.path
@_("range")
def left(self, p):
assert isinstance(p.range, RangeLiteral)
return p.range
@_("FLOAT")
def literal(self, p):
return FloatLiteral(p.FLOAT)
@_("INTEGER")
def literal(self, p):
return IntegerLiteral(p.INTEGER)
@_("STRING")
def literal(self, p):
return StringLiteral(p.STRING)
@_("TRUE")
def literal(self, p):
return TRUE
@_("FALSE")
def literal(self, p):
return FALSE
@_(
"NIL",
"NULL",
)
def literal(self, p):
return NIL
@_("EMPTY")
def literal(self, p):
return EMPTY
@_("BLANK")
def literal(self, p):
return BLANK
@_('ID [ ":" args ]')
def filter(self, p):
args = []
kwargs = {}
for arg in p.args or []:
if isinstance(arg, tuple):
assert len(arg) == 2
kwargs[arg[0]] = arg[1]
else:
args.append(arg)
return Filter(p.ID, args, kwargs)
@_('arg { "," arg }')
def args(self, p):
return [p.arg0] + p.arg1
@_("parg")
def arg(self, p):
return p.parg
@_("kwarg")
def arg(self, p):
return p.kwarg
@_(
"literal",
"path",
)
def parg(self, p):
return p[0]
@_('ID ":" parg')
def kwarg(self, p):
return (p.ID, p.parg)
@_("ID { prop }")
def path(self, p):
return Identifier([p.ID, *p.prop])
@_(
"bracketed",
"dotted",
)
def prop(self, p):
return p[0]
@_('"[" elem "]"')
def bracketed(self, p):
return p.elem
@_("DOT ID")
def dotted(self, p):
return p.ID
@_(
"INTEGER",
"STRING",
)
def elem(self, p):
return IdentifierPathElement(p[0])
@_("path")
def elem(self, p):
return p.path
@_('"(" rangearg DOUBLEDOT rangearg ")"')
def range(self, p):
return RangeLiteral(p.rangearg0, p.rangearg1)
@_("path")
def rangearg(self, p):
assert isinstance(p.path, Identifier)
return p.path
@_("FLOAT")
def rangearg(self, p):
return FloatLiteral(p.FLOAT)
@_("INTEGER")
def rangearg(self, p):
return IntegerLiteral(p.INTEGER)
def error(self, p):
raise LiquidSyntaxError(
f"unexpected {p.value[0]!r}",
linenum=p.lineno,
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment