-
-
Save randrews/5eab368f35ab8e774433 to your computer and use it in GitHub Desktop.
setmetatable(_ENV, { __index=lpeg }) | |
VARS = {} | |
function eval(...) | |
local args = {...} | |
local accum = args[1] | |
for i = 2, #args, 2 do | |
local operator = args[i] | |
local num2 = args[i+1] | |
if operator == '+' then | |
accum = accum + num2 | |
elseif operator == '-' then | |
accum = accum - num2 | |
elseif operator == '*' then | |
accum = accum * num2 | |
elseif operator == '/' then | |
accum = accum / num2 | |
end | |
end | |
return accum | |
end | |
function assign(ref, value) | |
ref.scope[ref.index] = value | |
return value | |
end | |
function lookup(ref) | |
return ref.scope[ref.index] | |
end | |
function makeref(...) | |
local indices = {...} | |
local tbl = VARS | |
for i = 1, #indices-1 do | |
tbl = tbl[indices[i]] | |
end | |
return {scope=tbl, index=indices[#indices]} | |
end | |
spc = S(" \t\n")^0 | |
digit = R('09') | |
number = C( (P("-") + digit) * | |
digit^0 * | |
( P('.') * digit^0 )^-1 ) / tonumber * spc | |
lparen = "(" * spc | |
rparen = ")" * spc | |
lbrack = "[" * spc | |
rbrack = "]" * spc | |
comma = "," * spc | |
expr_op = C( S('+-') ) * spc | |
term_op = C( S('*/') ) * spc | |
letter = R('AZ','az') | |
name = C( letter * (digit+letter+"_")^0 ) * spc | |
stmt = spc * P{ | |
"STMT"; | |
STMT = | |
V("REF") * "=" * spc * V("VAL") / assign + | |
V("EXPR"), | |
EXPR = V("TERM") * ( expr_op * V("TERM") )^0 / eval, | |
TERM = V("FACT") * ( term_op * V("FACT") )^0 / eval, | |
REF = name * (lbrack * V("EXPR") * rbrack)^0 / makeref, | |
FACT = | |
number / eval + | |
lparen * V("EXPR") * rparen / eval + | |
V("REF") / lookup, | |
ARRAY = lbrack * Ct( V("VAL_LIST")^-1 ) * rbrack, | |
VAL_LIST = V("VAL") * (comma * V("VAL"))^0, | |
VAL = V("EXPR") + V("ARRAY") | |
} | |
function test(expr) | |
assert(expr:match(" 1 + 2 ") == 3) | |
assert(expr:match("1+2+3+4+5") == 15) | |
assert(expr:match("2*3*4 + 5*6*7") == 234) | |
assert(expr:match(" 1 * 2 + 3") == 5) | |
assert(expr:match("( 2 +2) *6") == 24) | |
stmt:match("a=3"); assert(VARS.a == 3) | |
assert(stmt:match("a") == 3) | |
assert(stmt:match("a * 5") == 15); VARS.a=nil | |
stmt:match("a = [ 4, 5, 6 ]"); | |
assert(VARS.a[1] == 4) | |
assert(VARS.a[2] == 5) | |
assert(VARS.a[3] == 6) | |
VARS.a=nil | |
stmt:match("b = [ ]"); | |
assert(VARS.b[1] == nil) | |
VARS.b=nil | |
stmt:match("c = [[1,2], [3,4]]") | |
assert(VARS.c[1][1] == 1) | |
assert(VARS.c[1][2] == 2) | |
assert(VARS.c[2][1] == 3) | |
assert(VARS.c[2][2] == 4) | |
assert(stmt:match("c[4/2][1]") == 3) | |
stmt:match("c[3] = 5") | |
assert(VARS.c[3] == 5) | |
VARS.c=nil | |
end | |
function repl(file) | |
file = file or io.input() | |
parser = stmt | |
for line in file:lines() do | |
print(parser:match(line)) | |
end | |
end |
what happens if you have a name like 9foo ? Surely its going to fail ? What if you want to enable a variable name like 9foo ?
Just change the order of alternative, so name / VARS go first.
Also, this is what is meant by no backtracking:
> re = require 're'
> = re.match('hello', ".* .") -- no backtrack
nil
> = string.match('hello', ".*.") -- backtrack
hello
There is a reason why no programming language I know allows variable names to start with digits.
But you can add * -l.R('az','AZ')
or something after the number pattern to make it fail if it is part of an identifier.
If you make the name go first and allow digits, you need to care about 123 being treated as a name...
There is a reason why no programming language I know allows variable names to start with digits.
How about scheme ?
(define (2X value) (+ value value))
I know about scheme but I don`t know it, certainly not well enough to know that it allows identifiers to start with digits (or be completely made of digits?) but the syntax of scheme makes it easy enough to parse anyway...
S expressions were my first LPeg pattern to learn how to generate a nice ast.
But I was wrong: I know enough Forth, that I should have remembered that it allows this too.
Hmm...
There must be some pattern that accepts :
123a # variable
123 # number
I am wondering why the compiler gods do not allow it though, JavaScript certainly throws an error.
I guess the benefit of allowing it does not outweigh the cost to the compiler writer.
I already gave the solution above but here an implementation of the two important patterns (number must be tried before variable):
number = l.P'-'^-1 * l.R'09'^1 * (l.P'.' * l.R'09'^1)^-1 * (l.S'eE' * l.S'+-'^-1 * l.R'09'^1)^-1 * -l.R('az','AZ', '__')
variable = `l.R('az','AZ','09','__')^1
The negative lookahead makes it possible.
The problem is, that it can get really confusing: '1e2' is a number but '1f2' and '1e' are names...
And it makes the parser a little more complicated for no real benefit.
Yeah, exactly, what eric-2 said. :)
You are correct, as you can see in the first Try It Online link in my 2nd post.
By the way: just testing and trying this stuff is sometimes easier than to explain it in terms of Peg semantics which are well explained in the Paper by Roberto Ierusalimschy.
Going back to the original example, the number pattern will not fail but match the empty string, therefore preventing LPeg from trying later options.
TIO
There is no reason to systematically avoid optional prefixes in patterns, however patterns should only match the empty string when absolutely necessary because they normally can`t be used as the non-last choice and therefore also not in repetitions.