Skip to content

Instantly share code, notes, and snippets.

@pygy
Last active December 15, 2015 20:40
Show Gist options
  • Save pygy/5320097 to your computer and use it in GitHub Desktop.
Save pygy/5320097 to your computer and use it in GitHub Desktop.
-- A more complete version is now available here:
-- https://github.com/pygy/LuLPeg
-- PureLPeg.lua
-- a WIP LPeg implementation in pure Lua, by Pierre-Yves Gérardy
-- released under the Romantic WTF Public License (see the end of the file).
-- Captures and locales are not yet implemented, but the rest works quite well.
-- UTF-8 is supported out of the box
--
-- PL.set_charset"UTF-8"
-- s = PL.S"ß∂ƒ©˙"
-- s:match"©" --> 3 (since © is two bytes wide).
--
-- More encodings can be easily added (see the charset section), by adding a
-- few appropriate functions.
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- Shorthands------------------------------------------------------------------
-------------------------------------------------------------------------------
local t_concat, t_insert, t_remove
, t_sort, t_unpack
= table.concat, table.insert, table.remove
, table.sort, table.unpack or unpack
local s_byte, s_char
= string.byte, string.char
local m_max, m_min
= math.max, math.min
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- Helpers --------------------------------------------------------------------
-------------------------------------------------------------------------------
local
function weakkey (t)
return setmetatable(t,{__mode=="k"})
end
local
function map (ary, func, ...)
local res = {}
for i = 1,#ary do
res[i] = func(ary[i], ...)
end
return res
end
local
function map_all (tbl, func, ...)
local res = {}
for k, v in next, tbl do
res[k]=func(v, ...)
end
return res
end
local
function fold (ary, func, acc)
local i0 = 1
if not acc then
acc = ary[1]
i0 = 2
end
for i = i0, #ary do
acc = func(acc,ary[i])
end
return ary
end
local
function zip(a1, a2)
local res, len = {}, m_max(#a1,#a2)
for i = 1,len do
res[i] = {a1[i], a2[i]}
end
return res
end
local
function zip_all(t1, t2)
local res = {}
for k,v in pairs(t1) do
res[k] = {v, t2[k]}
end
for k,v in pairs(t2) do
if res[k] == nil then
res[k] = {t1[k], v}
end
end
return res
end
local
function filter(a1,func)
local res = {}
for i = 1,#ary do
if func(ary[i]) then
t_insert(res, ary[i])
end
end
end
local function id (...) return ... end
local function nop()end
local function AND (a,b) return a and b end
local function OR (a,b) return a or b end
local function copy (tbl) return map_all(tbl, id) end
local function all (ary) return fold(ary,AND) end
local function any (ary) return fold(ary,OR) end
local function lt128(val) return val < 128 end
--- Sets, From PiL:
--
local set_mt = {}
local
function newset (t)
local set = {}
setmetatable(set, set_mt)
for _, l in ipairs(t) do set[l] = true end
return set
end
local
function set_union (a,b)
local res = newset{}
for k in pairs(a) do res[k] = true end
for k in pairs(b) do res[k] = true end
return res
end
local
function set_tolist (s)
local list = {}
for el in pairs(s) do
t_insert(list,el)
end
return list
end
local
function set_isset (s)
return getmetatable(s) == set_mt
end
--- Ranges
--
local range_mt = {}
local
function newrange (v1, v2)
if v1>v2 then
v1,v2 = v2,v1
end
return setmetatable({v1,v2}, range_mt)
end
local
function range_overlap (r1, r2)
return r1[1] <= r2[2] and r2[1] <= r1[2]
end
local
function range_merge (r1, r2)
if not range_overlap(r1, r2) then return nil end
local v1, v2 =
r1[1] < r2[1] and r1[1] or r2[1],
r1[2] > r2[2] and r1[2] or r2[2]
return newrange(v1,v2)
end
local
function range_isrange (r)
return getmetatable(r) == range_mt
end
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- CharSet handling -----------------------------------------------------------
-------------------------------------------------------------------------------
--- UTF-8
--
-- We provide:
-- * utf8_validate(subject, start, finish) -- validator
-- * utf8_split_int(subject) --> table{int}
-- * utf8_split_char(subject) --> table{char}
-- * utf8_next_int(subject, index) -- iterator
-- * utf8_next_char(subject, index) -- iterator
-- * utf8_get_int(subject, index) -- Julia-style iterator
-- * utf8_get_char(subject, index) -- Julia-style iterator
--
-- See each function for usage.
-- Utility function.
-- Modified from code by Kein Hong Man <khman@users.sf.net>,
-- found at http://lua-users.org/wiki/SciteUsingUnicode.
local
function utf8_offset (byte)
if byte < 128 then return 0, byte
elseif byte < 192 then
error("Byte values between 0x80 to 0xBF cannot start a multibyte sequence")
elseif byte < 224 then return 1, byte - 192
elseif byte < 240 then return 2, byte - 224
elseif byte < 248 then return 3, byte - 240
elseif byte < 252 then return 4, byte - 248
elseif byte < 254 then return 5, byte - 252
else
error("Byte values between 0xFE and OxFF cannot start a multibyte sequence")
end
end
--[[
validate a given (sub)string.
returns two values:
* The first is either true, false or nil, respectively on success, error, or
incomplete subject.
* The second is the index of the last byte of the last valid char.
--]]
local
function utf8_validate (subject, start, finish)
start = start or 1
finish = finish or #subject
local offset, char
= 0
for i = start,finish do
b = s_byte(subject,i)
if offset == 0 then
char = i
success, offset = pcall(utf8_offset, b)
if not success then return false, char - 1 end
else
if not (127 < b and b < 192) then
return false, char - 1
end
offset = offset -1
end
end
if offset ~= 0 then return nil, char - 1 end -- Incomplete input.
return true, finish
end
--[[
Usage:
for _end, start, cpt in utf8_next_int, "˙†ƒ˙©√" do
print(cpt)
end
`start` and `_end` being the bounds of the character, and `cpt` being the UTF-8 code point.
It produces:
729
8224
402
729
169
8730
--]]
local
function utf8_next_int (subject, i)
i = i and i+1 or 1
if i > #subject then return end
local c = s_byte(subject, i)
local offset, val = utf8_offset(c)
for i = i+1, i+offset do
c = s_byte(subject, i)
val = val * 64 + (c-128)
end
return i + offset, i, val
end
--[[
Usage:
for _end, start, cpt in utf8_next_int, "˙†ƒ˙©√" do
print(cpt)
end
`start` and `_end` being the bounds of the character, and `cpt` being the UTF-8 code point.
It produces:
˙
ƒ
˙
©
--]]
local
function utf8_next_char (subject, i)
i = i and i+1 or 1
if i > #subject then return end
local offset = utf8_offset(s_byte(subject,i))
return i + offset, i, s_sub(subject, i, i + offset)
end
--[[
Takes a string, returns an array of code points.
--]]
local
function utf8_split_int (subject)
local chars = {}
for _, _, c in utf8_next_int, subject do
t_insert(chars,c)
end
return chars
end
--[[
Takes a string, returns an array of characters.
--]]
local
function utf8_split_char (subject)
local chars = {}
for _, _, c in utf8_next_char, subject do
t_insert(chars,c)
end
return chars
end
local
function utf8_get_int(subject, i)
if i > #subject then return end
local c = s_byte(subject, i)
local offset, val = utf8_offset(c)
for i = i+1, i+offset do
c = s_byte(subject, i)
val = val * 64 + ( c - 128 )
end
return val, i + offset + 1
end
local
function utf8_get_char(subject, i)
if i > #subject then return end
local offset = utf8_offset(s_byte(subject,i))
return s_sub(subject, i, i + offset), i + offset + 1
end
--- ASCII and binary.
--
-- See UTF-8 above for the API docs.
local
function ascii_validate (subject, start, finish)
start = start or 1
finish = finish or #subject
for i = start,finish do
b = s_byte(subject,i)
if b > 127 then return false, i - 1 end
end
return true, finish
end
local
function printable_ascii_validate (subject, start, finish)
start = start or 1
finish = finish or #subject
for i = start,finish do
b = s_byte(subject,i)
if 32 > b or b >127 then return false, i - 1 end
end
return true, finish
end
local
function binary_validate (subject, start, finish)
start = start or 1
finish = finish or #subject
return true, finish
end
local
function binary_next_int (subject, i)
i = i and i+1 or 1
if i >= #subject then return end
return i, i, s_sub(subject, i, i)
end
local
function binary_next_char (subject, i)
i = i and i+1 or 1
if i > #subject then return end
return i, i, s_byte(subject,i)
end
local
function binary_split_int (subject)
local chars = {}
for i = 1, #subject do
t_insert(chars, s_byte(subject,i))
end
return chars
end
local
function binary_split_char (subject)
local chars = {}
for i = 1, #subject do
t_insert(chars, s_sub(subject,i,i))
end
return chars
end
local
function binary_get_int(subject, i)
return s_byte(subject, i), i + 1
end
local
function binary_get_char(subject, i)
return s_sub(subject, i, i), i + 1
end
--- The table
--
local CharSets = {
binary = {
validate = binary_validate,
split_char = binary_split_char,
split_int = binary_split_int,
next_char = binary_next_char,
next_int = binary_next_int,
get_char = binary_get_char,
get_int = binary_get_int
},
ASCII = {
validate = ascii_validate,
split_char = binary_split_char,
split_int = binary_split_int,
next_char = binary_next_char,
next_int = binary_next_int,
get_char = binary_get_char,
get_int = binary_get_int
},
["printable ASCII"] = {
validate = printable_ascii_validate,
split_char = binary_split_char,
split_int = binary_split_int,
next_char = binary_next_char,
next_int = binary_next_int,
get_char = binary_get_char,
get_int = binary_get_int
},
["UTF-8"] = {
validate = utf8_validate,
split_char = utf8_split_char,
split_int = utf8_split_int,
next_char = utf8_next_char,
next_int = utf8_next_int,
get_char = utf8_get_char,
get_int = utf8_get_int
}
}
local charset, validate, split_int, split_char
, next_int, next_char, get_int, get_char
= "binary"
, binary_validate
, binary_split_int
, binary_split_char
, binary_next_int
, binary_next_char
, binary_get_int
, binary_get_char
local
function PL_set_charset(set)
local s = CharSets[set]
if s then
charset, validate, split_int, split_char
, next_int, next_char, get_int, get_char
= s, s.validate, s.split_int, s.split_char
, s.next_int, s.next_char, s.get_int, s.get_char
else
error("Bad Charset: " .. tostring(s))
end
function PL.setCharset()
error("Charsets are forever (attempt to redefine the charset).")
end
end
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- The module -----------------------------------------------------------------
-------------------------------------------------------------------------------
local PL = {}
PL.__index = PL
PL.version = function() return "v0.0.0" end
PL.setmaxstack = nop --Just a stub. Is it even necessary for this version?
local
function PL_type(pt)
if getmetatable(pt) == PL then
return "pattern"
else
return nil
end
end
PL.type = PL_type
PL.set_charset = PL_set_charset
PL.charsets = CharSets
--- Lua 5.1/5.2 compat
--
local
function newpattern(pt)
return setmetatable(pt,PL)
end
do
if newproxy then
local proxycache = weakkey{}
local __index_PL = {__index = PL}
PL.proxycache = proxycache
newpattern = function(cons)
local pt = newproxy()
setmetatable(cons, __index_PL)
proxycache[pt]=cons
debug.setmetatable(pt,PL)
return pt
end
function PL:__index(k)
return proxycache[self][k]
end
function PL:__newindex(k, v)
proxycache[self][k] = v
end
elseif #setmetatable({},{__len = function()return 10 end}) ~= 10 then
print("Warning: The `__len` metatethod won't work with patterns, "
.."use `PL.L(pattern)` for lookaheads.")
end
end
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- Constructors ---------------------------------------------------------------
-------------------------------------------------------------------------------
local ptcache, backcache, setcache, rangecache
, refcache, repeatcache, unmcache, lookaheadcache
= weakkey{}, weakkey{}, weakkey{}, weakkey{}
, weakkey{}, weakkey{}, weakkey{}, weakkey{}
local constructors = {}
-- The interface between the API (lpeg.P, lpeg.V, lpeg.C...), the main
-- pattern cache and the pattern constructors.
local function construct (typ, v, ...)
-- This line also handles `P(pattern)` -> pattern.
--See `ptcache[pt] = pt` below.
if ptcache[v] then return ptcache[v] end
local pt = constructors[typ]
and constructors[typ](v, ...)
or error("No pattern constructor for type '".. typ.."'.")
ptcache[pt] = pt
return pt
end
-- constant patterns
local truept, falsept, eospt, onept =
newpattern{type = "true"},
newpattern{type = "false"},
newpattern{type = "eos"},
newpattern{type = "one"}
constructors["string"] = function(s)
local success = validate(s)
if success then
local pt = newpattern{
type = "string",
data = binary_split_int(s),
as_is = s
}
ptcache[s] = pt
return pt
else
error("Character at position ".. index+1
.." is not a valid "..charset.." one.")
end
end
constructors["number"] = function(n)
local pt
if n == 0 then
pt = truept
elseif n == 1 then
pt = onept
elseif n == -1 then
pt = eospt
elseif n > 0 then
pt = newpattern{
type = "any",
data = n
}
else
pt = -newpattern{
type = "any",
data = -n
}
end
ptcache[n] = pt
return pt
end
constructors["boolean"] = function(b)
return b and truept or falsept
end
-- Grammar
constructors["table"] = function(g)
g = copy(g)
if type(g[1]) == string then g[1] = PL.V(g[1]) end
return newpattern{
type = "grammar",
data = g
}
end
constructors["function"] = function(f)
local c = newpattern{
type = "function",
data = f
}
ptcache[f] = c
return c
end
-- B"abc"
constructors["back"] = function(_, s)
if backcache[s] then
return backcache[s]
end
local success, index = validate(s)
if success then
local pt = newpattern{
type = "back",
data = binary_split_int(s),
as_is = s
}
backcache[s] = pt
return pt
else
error("Character at position ".. index+1
.." is not a valid "..charset.." one.")
end
end
-- R"az"
constructors["range"] = function(_, r)
local as_is = r
if not range_isrange(r) then
local success, index = validate(r)
if not success then
error("Character at position ".. index+1
.." is not a valid "..charset.." one.")
end
r = newrange(t_unpack(split_int(r)))
end
local cacheKey = r[1]..":"..r[2]
if rangecache[cacheKey] then
return rangecache[cacheKey]
else
local pt = newpattern{
type = "range",
data = r,
as_is = as_is
}
rangecache[cacheKey] = pt
return pt
end
end
-- S"abc"
constructors["set"] = function(_, s)
local as_is = s
if set_isset(s) then
s = set_tolist(s)
else
local success, index = validate(s)
if not success then
error("Character at position ".. index+1
.." is not a valid "..charset.." one.")
end
s = split_int(s)
end
t_sort(s)
local cacheKey = t_concat(s,"$")
if setcache[cacheKey] then
return setcache[cacheKey]
else
local pt = newpattern{
type = "set",
data = newset(s),
as_is = as_is
}
setcache[cacheKey] = pt
return pt
end
end
-- V"name"
constructors["ref"] = function(_, name)
local cacheKey = "Ref: "..name
if refcache[cacheKey] then
return refcache[cacheKey]
else
local pt = newpattern{
type = "ref",
data = name
}
refcache[cacheKey] = pt
return pt
end
end
-- p1 + p2
constructors["alternate"] = function(_, alt)
return newpattern{
type = "alternate",
data = alt
}
end
-- p1 * p2
constructors["sequence"] = function(_, seq)
return newpattern{
type = "sequence",
data = seq
}
end
-- pt^n
constructors["repeat"] = function(_, p, n)
repeatcache[n] = repeatcache[n] or {}
if repeatcache[n][p] then
return repeatcache[n][p]
else
local pt = newpattern{
type = "repeat",
data = p,
times = n
}
repeatcache[n][p] = pt
return pt
end
end
-- -pt
constructors["unm"] = function(_, p)
if unmcache[p] then
return unmcache[p]
else
local pt = newpattern{
type = "unm",
data = p
}
unmcache[p] = pt
return pt
end
end
-- #pt
constructors["lookahead"] = function(_, p)
if lookaheadcache[p] then
return lookaheadcache[p]
else
local pt = newpattern{
type = "lookahead",
data = p
}
lookaheadcache[p] = pt
return pt
end
end
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- API ------------------------------------------------------------------------
-------------------------------------------------------------------------------
local
function PL_P (v)
return construct(type(v), v)
end
PL.P = PL_P
local
function PL_S (set)
if set == "" then
return PL_P(false)
else
return construct("set", nil, set)
end
end
PL.S = PL_S
local
function PL_R (...)
if select('#', ...) == 0 then
return PL_P(false)
else
local r = ...
return construct("range", nil, r)
end
end
PL.R = PL_R
local
function PL_V (name)
return construct("ref", nil, name)
end
PL.V = PL_V
local
function PL_B (name)
return construct("back", nil, name)
end
PL.B = PL_B
local
function sequence (a,b)
a,b = PL_P(a), PL_P(b)
local seq1, seq2 = {}, {}
-- A few optimizations:
-- 1. flatten the sequence (a * b) * (c * d) => a * b * c * d
for _, p in ipairs{a,b} do
if p.type == "sequence" then
for _, q in ipairs(p.data) do
t_insert(seq1, q)
end
else
t_insert(seq1,p)
end
end
-- Concatenate `string` and `any` patterns.
-- TODO: Repeat patterns?
seq2[1] = seq1[1]
for i = 2,#seq1 do
local p1, p2 = seq2[#seq2], seq1[i]
if p1.type == "string" and p2.type == "string" then
seq2[#seq2] = PL_P(p1.as_is .. p2.as_is)
elseif p1.type == "any" and p2.type == "any" then
seq2[#seq2] = P(p1.data + p2.data)
else
t_insert(seq2,p2)
end
end
if #seq2 == 1
then return seq2[1]
else return construct("sequence", _, seq2) end
end
PL.__mul = sequence
local
function PL_alternate (a,b)
a,b = PL_P(a), PL_P(b)
local alt1, alt2 = {}, {}
-- A few optimizations:
-- 1. flatten (a + b) + (c + d) => a + b + c + d
for _, p in ipairs{a,b} do
if p.type == "alternate" then
for _, q in ipairs(p.data) do
t_insert(alt1, q)
end
else
t_insert(alt1,p)
end
end
-- Merge `set` patterns.
-- TODO: merge captures who share the same structure?
-- so that C(P1) + C(P2) become C(P1+P2)?
alt2[1] = alt1[1]
for i = 2,#alt1 do
local p1, p2 = alt2[#alt2], alt1[i]
if p1.type == "set" and p2.type == "set" then
alt2[#alt2] = PL_S(p1.as_is..p2.as_is)
else
t_insert(alt2,p2)
end
end
if #alt2 == 1
then return alt2[1]
else return construct("alternate", _, alt2) end
end
PL.__add = PL_alternate
local
function PL_lookahead (pt)
return construct("lookahead", _, pt)
end
PL.__len = PL_lookahead
PL.L = PL_lookahead
local
function PL_unm(pt)
return construct("unm", _, pt)
end
PL.__unm = PL_unm
local
function PL_sub (a, b)
a, b = PL_P(a), PL_P(b)
return PL_unm(b) * a
end
PL.__sub = PL_sub
local
function PL_repeat (pt,n)
if PL_type(n) == "pattern" then
error"Pattern encountered at the wrong side of ^."
end
return construct("repeat", _, pt, n)
end
PL.__pow = PL_repeat
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- Compilers ------------------------------------------------------------------
-------------------------------------------------------------------------------
local compilers = {}
local function compile(pt, cpstate)
if PL_type(pt) ~= "pattern" then error("pattern expected") end
if not pt.compiled then
pt.compiled = compilers[pt.type](pt, cpstate)
end
return pt.compiled
end
compilers["string"] = function (pt)
local S, N = pt.data
local N = #S
return function(subject, index, state)
local in_1 = index - 1
for i = 1, N do
local c
c = s_byte(subject,in_1 + i)
if c ~= S[i] then
return false, index
end
end
return true, index + N
end
end
local
function truecompiled (subject, index, state)
return true, index
end
compilers["true"] = function (pt)
return truecompiled
end
local
function falsecompiled (subject, index, state)
return false, index
end
compilers["false"] = function (pt)
return falsecompiled
end
local
function eoscompiled (subject, index, state)
return index > #subject, index
end
compilers["eos"] = function (pt)
return eoscompiled
end
local
function onecompiled (subject, index, state)
local char, nindex = get_int(subject, index)
if char
then return true, nindex
else return flase, index end
end
compilers["one"] = function (pt)
return onecompiled
end
compilers["any"] = function (pt)
if charset == "UTF-8" then
local N = pt.data
return function(subject,index,state)
local n, c, nindex = N
while n > 0 do
c, nindex = get_int(subject, index)
if not c then return false, index end
n = n -1
end
return true, nindex
end
else -- version optimized for byte-width encodings.
local N = pt.data - 1
return function(subject, index, state)
local n = index + N
if n <= #subject then
return true, n + 1
else
return false, index
end
end
end
end
-- type = "grammar",
compilers["grammar"] = function (pt)
local gram = map_all(pt.data, compile)
local start = gram[1]
return function (subject, index, state)
t_insert(state.grammars, gram)
local success, nindex = start(subject, index, state)
t_remove(state.grammars)
return success, nindex
end
end
compilers["function"] = function (pt)
local func = pt.data
return function (subject, index)
local results = {func(subject, index)}
local nindex = results[1]
if nindex == true then
return true, index
elseif type(nindex) == "number" then
if index <= nindex and nindex <= #subject then
return true, nindex
else
error"Index out of bounds returned by match-time capture."
end
elseif not nindex then
return false, index
else
error"Match time capture must return a number, a boolean, nil or nothing."
end
end
end
compilers["back"] = function (pt)
local S, N = pt.data
local N = #S
return function (subject, index, state)
local start = index - N - 1
for i = 1, N do
local c
c = s_byte(subject, start + i)
if c ~= S[i] then
return false, index
end
end
return true, index
end
end
compilers["range"] = function (pt)
local r = pt.data
local b1, b2 = r[1], r[2]
return function (subject, index, state)
local char, nindex = get_int(subject, index)
if char and b1 <= char and char <= b2
then return true, nindex
else return false, index end
end
end
compilers["set"] = function (pt)
local s = pt.data
return function (subject, index, state)
local char, nindex = get_int(subject, index)
if s[char]
then return true, nindex
else return false, index end
end
end
compilers["ref"] = function (pt)
local name = pt.data
local ref
return function (subject, index, state)
if not ref then
ref = state.grammars[#state.grammars][name]
end
return ref(subject, index, state)
end
end
compilers["alternate"] = function (pt)
local alternatives, n = map(pt.data, compile), #pt.data
return function (subject, index, state)
local success
for i = 1, n do
success, index = alternatives[i](subject, index, state)
if success
then return true, index end
end
return false, index
end
end
compilers["sequence"] = function (pt)
local sequence, n = map(pt.data, compile), #pt.data
return function (subject, index, state)
local success
for i = 1, n do
success, index = sequence[i](subject, index, state)
if not success
then return false, index end
end
return true, index
end
end
compilers["repeat"] = function (pt)
local matcher, n = compile(pt.data), pt.times
if n >= 0 then
return function (subject, index, state)
local success = true
for i = 1, n do
success, index = matcher(subject, index, state)
if not success then return false, index end
end
while success do
success, index = matcher(subject, index, state)
end
return true, index
end
else
n = -n
return function (subject, index, state)
local success = true
for i = 1, n do
success, index = matcher(subject, index, state)
end
return true, index
end
end
end
compilers["unm"] = function (pt)
local matcher = compile(pt.data)
return function (subject, index, state)
local success, _ = matcher(subject, index, state)
return not success, index
end
end
compilers["lookahead"] = function (pt)
local matcher = compile(pt.data)
return function (subject, index, state)
local success, _ = matcher(subject, index, state)
return success, index
end
end
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- Match ----------------------------------------------------------------------
-------------------------------------------------------------------------------
local
function PL_match(pt, subject, index, ...)
pt = PL_P(pt)
if index == nil then
index = 1
elseif type(index) ~= "number" then
error"The index must be a number"
elseif not (1 <= index and index <= #subject) then
error("Index "..index.." out of bounds [1.."..#subject.."].")
end
local matcher, state
= compile(pt)
, {grammars = {}, args = {...}}
local success, index = matcher(subject, index, state)
if success
then return index
else return nil end
end
PL.match = PL_match
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
-- Print ----------------------------------------------------------------------
-------------------------------------------------------------------------------
local printers, PL_print = {}
printers["string"] = function (pt, offset, prefix)
print(offset..prefix.."P\""..pt.as_is.."\"")
end
printers["true"] = function (pt, offset, prefix)
print(offset..prefix.."P( true )")
end
printers["false"] = function (pt, offset, prefix)
print(offset..prefix.."P( false )")
end
printers["eos"] = function (pt, offset, prefix)
print(offset..prefix.."P( -1 )")
end
printers["one"] = function (pt, offset, prefix)
print(offset..prefix.."P( 1 )")
end
printers["any"] = function (pt, offset, prefix)
print(offset..prefix.."P( "..pt.data.." )")
end
printers["grammar"] = function (pt, offset, prefix)
print(offset..prefix.."Grammar")
for k, pt in pairs(pt.data) do
local prefix = ( type(k)~="string"
and type(k)..": " .. k
or "\""..k.."\"" )
PL_print(pt, offset.." ", prefix " = ")
end
end
printers["function"] = function (pt, offset, prefix)
print(offset..prefix.."P( "..pt.data.." )")
end
printers["back"] = function (pt, offset, prefix)
print(offset..prefix.."B( "..pt.as_is.." )")
end
printers["range"] = function (pt, offset, prefix)
print(offset..prefix.."R( "..pt.as_is.." )")
end
printers["set"] = function (pt, offset, prefix)
print(offset..prefix.."S( "..pt.as_is.." )")
end
printers["ref"] = function (pt, offset, prefix)
local val = ( type(k)~="string"
and type(k)..": " .. pt.data
or "\""..pt.data.."\"" )
print(offset..prefix.."V( "..val.." )")
end
printers["alternate"] = function (pt, offset, prefix)
print(offset..prefix.."+")
map(pt.data, PL_print, offset.." ", "")
end
printers["sequence"] = function (pt, offset, prefix)
print(offset..prefix.."*")
map(pt.data, PL_print, offset.." ", "")
end
printers["repeat"] = function (pt, offset, prefix)
PL_print(pt.data, offset, pt.times.." ^ ")
end
printers["unm"] = function (pt, offset, prefix)
PL_print(pt.data, offset, "- ")
end
printers["lookahead"] = function (pt, offset, prefix)
PL_print(pt.data, offset, "# ")
end
function PL_print (pt, offset, prefix)
return printers[pt.type](pt, offset, prefix)
end
function PL.print (pt)
return printers[pt.type](pt, "", "")
end
return PL
-- The Romantic WTF public license.
-- --------------------------------
-- a.k.a. version "<3" or simply v3
--
--
-- Dear user,
--
-- The PureLPeg proto-library
--
-- \
-- '.,__
-- \ /
-- '/,__
-- /
-- /
-- /
-- has been / released
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
-- under the Romantic WTF Public License.
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~`,´ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
-- I hereby grant you an irrevocable license to
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
-- do what the gentle caress you want to
-- ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
-- with this lovely
-- ~ ~ ~ ~ ~ ~ ~ ~
-- / thing...
-- / ~ ~ ~ ~
-- / Love,
-- # / '.'
-- ####### ·
-- #####
-- ###
-- #
--
-- -- Pierre-Yves
--
--
-- P.S.: Even though I poured my heart into this work,
-- I _cannot_ provide any warranty regarding
-- its fitness for _any_ purpose. You
-- acknowledge that I will not be held liable
-- for any damage its use could incur.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment