Skip to content

Instantly share code, notes, and snippets.

@moser
Last active September 22, 2022 09:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moser/cbf5eb26a8111cd35811987b865e72b7 to your computer and use it in GitHub Desktop.
Save moser/cbf5eb26a8111cd35811987b865e72b7 to your computer and use it in GitHub Desktop.
parse_text_interpolation.py
"""
pip install parsimonious
"""
import dataclasses as _dc
from parsimonious.grammar import Grammar
from parsimonious.nodes import NodeVisitor
TEXT = """
Here are some {variable}s. I don't care about spaces in the { interpolations}.
More lines...
{and_one_more}
Linebreaks are not ok in the curlies: {
i_will_not_be_recognized_as_var}
"""
main_grammar = Grammar(
r"""
expr = (interpolation / text)*
interpolation = LPAR var_usage RPAR
text = TEXT / LPAR / RPAR
var_usage = space* VAR_NAME space*
space = " "
VAR_NAME = ~"[a-zA-Z0-9_]+"
TEXT = ~"[^{}]+"
LPAR = ~"{"
RPAR = "}"
"""
)
class MainVisitor(NodeVisitor):
def visit_expr(self, node, visited_children):
return Node(children=list(flatten_list(visited_children)))
def visit_text(self, node, visited_children):
return TextNode(node.text)
def visit_interpolation(self, node, visited_children):
# according to grammar contains exactly one "var_name"
return InterpolationNode(var_name=[child.name for child in visited_children if isinstance(child, VarName)][0])
def visit_var_usage(self, node, visited_children):
# according to grammar contains exactly one "var_name"
return [child for child in visited_children if isinstance(child, VarName)][0]
def visit_VAR_NAME(self, node, visited_children):
return VarName(name=node.text)
def generic_visit(self, node, visited_children):
return visited_children or node
def flatten_list(lst):
for item in lst:
if isinstance(item, list):
yield from flatten_list(item)
else:
yield item
@_dc.dataclass
class Node:
children: list
def flatten(self):
return [child.flatten() for child in self.children]
@_dc.dataclass
class TextNode:
text: str
def flatten(self):
return self
@_dc.dataclass
class InterpolationNode:
var_name: str
def flatten(self):
return self
@_dc.dataclass
class VarName:
name: str
tree = main_grammar.parse(TEXT)
out = MainVisitor().visit(tree)
print(out.flatten())
print("Used vars")
print([node.var_name for node in out.flatten() if isinstance(node, InterpolationNode)])
/*
Paste the grammar below in the online peggy parser builder: https://peggyjs.org/online.html
And use this as the test text:
Here are some {variable}s. I don't care about spaces in the { interpolations}.
More lines...
{and_one_more}
Linebreaks are not ok in the curlies: {
i_will_not_be_recognized_as_var}
*/
expr
= one_expr *
one_expr
= interp:interpolation { return interp }
/ text { return {type:"text", text: text()} }
interpolation
= LPAR var_use:var_usage RPAR { return var_use }
text
= TEXT / LPAR / RPAR { return text() }
var_usage
= space* var_name:VAR_NAME space* { return {type:"var", var: var_name} }
space
= " "
VAR_NAME
= [a-zA-Z0-9_] + { return text() }
TEXT
= [^{}]+
LPAR
= "{"
RPAR
= "}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment