Skip to content

Instantly share code, notes, and snippets.

@merlight
Last active January 5, 2016 00:35
Show Gist options
  • Save merlight/5fc60cc56cdc87fa1c73 to your computer and use it in GitHub Desktop.
Save merlight/5fc60cc56cdc87fa1c73 to your computer and use it in GitHub Desktop.
-- debug utils
local df = df or function(...) print(string.format(...)) end
local function dstring(arg)
if type(arg) == "string" then
return string.format("%q", arg)
else
return tostring(arg)
end
end
----------------------------
-- BEGIN GENERIC PARSER LIB
local strfind = string.find
local strformat = string.format
local strsub = string.sub
local strgsub = string.gsub
local pop = table.remove
local push = table.insert
local operator_function =
{
__call = function(self, ...)
local op, lhs, rhs = self[1], self[2], self[3]
return op(lhs, rhs, ...)
end
}
local function flush_ast(have_args, arg_stack, op_stack, op_next)
for i = #op_stack, 1, -1 do
local op_last = op_stack[i]
if type(op_last) == "number" then -- reached '('
break
end
if op_next then
local prec = op_last.precedence - op_next.precedence
if prec >= 0 and (prec > 0 or op_next.assoc == 'right') then
break
end
end
df("# flush operator '%s'", op_last.name)
if op_last.unary_func then
if have_args < 1 then
error(strformat("missing argument for operator '%s'", op_last.name))
end
local n = #arg_stack
local op = {op_last.unary_func, nil, arg_stack[n]}
arg_stack[n] = setmetatable(op, operator_function)
elseif op_last.binary_func then
if have_args < 2 then
error(strformat("missing argument for operator '%s'", op_last.name))
end
local n = #arg_stack
local op = {op_last.binary_func, arg_stack[n-1], arg_stack[n]}
arg_stack[n-1] = setmetatable(op, operator_function)
arg_stack[n] = nil
have_args = have_args - 1
else
error(strformat("what is operator '%s' supposed to do?", op_last.name))
end
op_stack[i] = nil
end
return have_args
end
function build_ast(tokenizer, str, pos)
local arg_stack = {}
local op_stack = {}
local have_args = 0
while pos do
local npos, op_next, token = tokenizer(str, pos)
if not npos then
error(strformat("malformed expression near '%s'", strsub(str, pos + 1)))
else
pos = npos
npos = pos < 0 and -pos or pos -- need actual position for error messages
end
if type(op_next) == "table" then
df("- pos=%3d op=%s token=%s", pos, dstring(op_next.name), dstring(token))
else
df("- pos=%3d op=%s token=%s", pos, dstring(op_next), dstring(token))
end
if op_next then
if op_next == '(' then
push(op_stack, have_args)
have_args = 0
elseif op_next == ')' then
have_args = flush_ast(have_args, arg_stack, op_stack)
local had_args = pop(op_stack)
if type(had_args) ~= "number" then
error(strformat("unmatched closing parenthesis at '%s'", strsub(str, npos)))
end
assert(have_args <= 1)
have_args = have_args + had_args
elseif op_next == '$' then -- end of expression
have_args = flush_ast(have_args, arg_stack, op_stack)
break
else
have_args = flush_ast(have_args, arg_stack, op_stack, op_next)
push(op_stack, op_next)
end
end
if token then
push(arg_stack, token)
have_args = have_args + 1
end
end
if #op_stack > 0 then
error(strformat("unmatched opening parenthesis in '%s'", str))
elseif #arg_stack > 1 then
error(strformat("malformed expression '%s'", str))
end
return unpack(arg_stack)
end
-- END OF GENERIC PARSER LIB
-----------------------------
---------------------------------
-- THIS PART IS USING THE PARSER
-- helper function to evaluate an AST node:
-- if it's a string, it's a pattern to be matched,
-- otherwise it's a compound node to be called
local function match_end(node, str, pos)
if type(node) == "string" then
local ms, me = strfind(str, node, pos + 1)
return me
else
return node(str, pos)
end
end
-- define operator functions
-- they all receive the left- and right-hand-side AST nodes as their
-- first two arguments (unary operators receive lhs==nil)
-- further arguments are arbitrary context passed from user code
local function match_negation(lhs, rhs, str, pos)
local rpos = match_end(rhs, str, 0)
if rpos then
return false
else
return pos
end
end
function match_alternation(lhs, rhs, str, pos)
local lpos = match_end(lhs, str, pos)
local rpos = match_end(rhs, str, pos)
if lpos and rpos then
return lpos < rpos and lpos or rpos
else
return lpos or rpos
end
end
function match_concatenation(lhs, rhs, str, pos)
local lpos = match_end(lhs, str, pos)
if lpos then
return match_end(rhs, str, lpos)
else
return false
end
end
local operators =
{
['!'] = {name='!', assoc='right', precedence=200, unary_func=match_negation},
[' '] = {name='.', assoc='left', precedence=300, binary_func=match_concatenation},
['+'] = {name='+', assoc='left', precedence=500, binary_func=match_alternation},
}
-- define tokenizer;
-- it works like an iterator function, except that a nil return means syntax error;
-- for well-formed expression, it returns '$' marker at the end
function nexttoken(str, pos)
local op_concat = nil
if pos < 0 then -- the previous token was a string or ')'
pos = -pos
-- if the next token is a string, prepend it
-- with a synthesized concatenation operator
op_concat = operators[' ']
-- if the next token is '(' or a unary operator,
-- synthesize a concatenation operator first
if strfind(str, '^%s*[(!]', pos + 1) then
return pos, op_concat, nil
end
end
-- match everything up to operator
local ms, me, m1 = strfind(str, '^%s*([^"()+!^~]+)', pos + 1)
if m1 then
-- escape metacharacters
local pat = strgsub(m1, "[-*+?^$().[%]%%]", "%%%0")
-- replace whitespace with "match shortest anything"
pat = strgsub(pat, "%s+", ".-")
return -me, op_concat, pat
end
-- match quoted string
local ms, me, m1 = strfind(str, '^%s*"(.-)"', pos + 1)
if m1 then
-- escape metacharacters
local pat = strgsub(m1, "[-*+?^$().[%]%%]", "%%%0")
return -me, op_concat, pat
end
-- match operator or empty string at end of expression
local ms, me, m1 = strfind(str, '^%s*(.?)', pos + 1)
if m1 == ')' then
-- return negative position to tell the next round
-- there was a value preceding it
return -me, m1, nil
elseif m1 == '(' then
return me, m1, nil
elseif m1 == '' then
return me, '$', nil -- end of expression marker
else
return me, assert(operators[m1]), nil
end
end
function eval_ast(node, str)
return match_end(node, str, 0)
end
-- and this is how you put it together
local some_strings = {'book chapter one', 'one book chapter', 'book chapter two', 'book chapter three', 'book chapter twenty one'}
local filter_expr = 'chapter (one + two) !twenty'
df("\nPARSE TEST : %s\n", filter_expr)
local ok, ast = pcall(build_ast, nexttoken, filter_expr, 0)
if not ok then
error(ast)
end
df("\nMATCH TEST\n")
for i, str in ipairs(some_strings) do
local ok, match = pcall(match_end, ast, str, 0)
if not ok then
df("! error : %q : %q", match, str)
elseif match then
df("+ match : %q", str)
else
df("- no match : %q", str)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment