Skip to content

Instantly share code, notes, and snippets.

@CapsAdmin
Last active June 3, 2023 21:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CapsAdmin/d736f244e38d68f2ccc792d3e5d576ca to your computer and use it in GitHub Desktop.
Save CapsAdmin/d736f244e38d68f2ccc792d3e5d576ca to your computer and use it in GitHub Desktop.
loadfile("luajit_remake_compat.lua")() to get string, bit, require and table library implemented in lua. Intended for use with https://github.com/luajit-remake
-- For the string library I used https://github.com/pygy/strung.lua and removed the need for ffi.
-- My modification is not optimal at all as it's made with ffi in mind, but at least it passed its test suite and might be useful for stress testing.
-- For bit operators I used https://github.com/davidm/lua-bit-numberlua/blob/master/lmod/bit/numberlua.lua
-- for require I used https://github.com/hoelzro/lua-procure
local function find_nyi_functions()
local blacklist = {
loadfile = true,
dofile = true,
error = true,
}
local done = {}
local function scan(tbl, name)
for k, v in pairs(tbl) do
if type(v) == "function" and not blacklist[k] then
local ok, err = pcall(v)
local msg = "not implemented yet!"
if not ok and err:sub(-#msg) == msg then
print(name .. "." .. k .. "\t\t\t" .. err)
else
print(name .. "." .. k .. "\t\t\t" .. "ok")
end
elseif type(v) == "table" then
if not done[v] then
done[v] = true
scan(v, k)
end
end
end
end
scan(_G, "_G")
end
do -- io
function io.open()
return nil, "nyi"
end
function io.close()
end
function io.popen()
return nil, "nyi"
end
end
do -- table
function table.insert(tbl, a, b)
if not b then
local val = a
tbl[#tbl + 1] = val
else
local index = a
local val = b
for i = #tbl, index, -1 do
tbl[i + 1] = tbl[i]
end
tbl[index] = val
end
end
function table.remove(tbl, index)
local val = tbl[index]
for i = index, #tbl - 1 do
tbl[i] = tbl[i + 1]
end
tbl[#tbl] = nil
return val
end
function table.maxn(tbl)
return #tbl
end
end
do -- bit
local M = {_TYPE='module', _NAME='bit.numberlua', _VERSION='0.3.1.20120131'}
local floor = math.floor
local MOD = 2^32
local MODM = MOD-1
local function memoize(f)
local mt = {}
local t = setmetatable({}, mt)
function mt:__index(k)
local v = f(k); t[k] = v
return v
end
return t
end
local function make_bitop_uncached(t, m)
local function bitop(a, b)
local res,p = 0,1
while a ~= 0 and b ~= 0 do
local am, bm = a%m, b%m
res = res + t[am][bm]*p
a = (a - am) / m
b = (b - bm) / m
p = p*m
end
res = res + (a+b)*p
return res
end
return bitop
end
local function make_bitop(t)
local op1 = make_bitop_uncached(t,2^1)
local op2 = memoize(function(a)
return memoize(function(b)
return op1(a, b)
end)
end)
return make_bitop_uncached(op2, 2^(t.n or 1))
end
-- ok? probably not if running on a 32-bit int Lua number type platform
function M.tobit(x)
return x % 2^32
end
M.bxor = make_bitop {[0]={[0]=0,[1]=1},[1]={[0]=1,[1]=0}, n=4}
local bxor = M.bxor
function M.bnot(a) return MODM - a end
local bnot = M.bnot
function M.band(a,b) return ((a+b) - bxor(a,b))/2 end
local band = M.band
function M.bor(a,b) return MODM - band(MODM - a, MODM - b) end
local bor = M.bor
local lshift, rshift -- forward declare
function M.rshift(a,disp) -- Lua5.2 insipred
if disp < 0 then return lshift(a,-disp) end
return floor(a % 2^32 / 2^disp)
end
rshift = M.rshift
function M.lshift(a,disp) -- Lua5.2 inspired
if disp < 0 then return rshift(a,-disp) end
return (a * 2^disp) % 2^32
end
lshift = M.lshift
function M.tohex(x, n) -- BitOp style
n = n or 8
local up
if n <= 0 then
if n == 0 then return '' end
up = true
n = - n
end
x = band(x, 16^n-1)
return ('%0'..n..(up and 'X' or 'x')):format(x)
end
local tohex = M.tohex
function M.extract(n, field, width) -- Lua5.2 inspired
width = width or 1
return band(rshift(n, field), 2^width-1)
end
local extract = M.extract
function M.replace(n, v, field, width) -- Lua5.2 inspired
width = width or 1
local mask1 = 2^width-1
v = band(v, mask1) -- required by spec?
local mask = bnot(lshift(mask1, field))
return band(n, mask) + lshift(v, field)
end
local replace = M.replace
function M.bswap(x) -- BitOp style
local a = band(x, 0xff); x = rshift(x, 8)
local b = band(x, 0xff); x = rshift(x, 8)
local c = band(x, 0xff); x = rshift(x, 8)
local d = band(x, 0xff)
return lshift(lshift(lshift(a, 8) + b, 8) + c, 8) + d
end
local bswap = M.bswap
function M.rrotate(x, disp) -- Lua5.2 inspired
disp = disp % 32
local low = band(x, 2^disp-1)
return rshift(x, disp) + lshift(low, 32-disp)
end
local rrotate = M.rrotate
function M.lrotate(x, disp) -- Lua5.2 inspired
return rrotate(x, -disp)
end
local lrotate = M.lrotate
M.rol = M.lrotate -- LuaOp inspired
M.ror = M.rrotate -- LuaOp insipred
function M.arshift(x, disp) -- Lua5.2 inspired
local z = rshift(x, disp)
if x >= 0x80000000 then z = z + lshift(2^disp-1, 32-disp) end
return z
end
local arshift = M.arshift
function M.btest(x, y) -- Lua5.2 inspired
return band(x, y) ~= 0
end
--
-- Start Lua 5.2 "bit32" compat section.
--
M.bit32 = {} -- Lua 5.2 'bit32' compatibility
local function bit32_bnot(x)
return (-1 - x) % MOD
end
M.bit32.bnot = bit32_bnot
local function bit32_bxor(a, b, c, ...)
local z
if b then
a = a % MOD
b = b % MOD
z = bxor(a, b)
if c then
z = bit32_bxor(z, c, ...)
end
return z
elseif a then
return a % MOD
else
return 0
end
end
M.bit32.bxor = bit32_bxor
local function bit32_band(a, b, c, ...)
local z
if b then
a = a % MOD
b = b % MOD
z = ((a+b) - bxor(a,b)) / 2
if c then
z = bit32_band(z, c, ...)
end
return z
elseif a then
return a % MOD
else
return MODM
end
end
M.bit32.band = bit32_band
local function bit32_bor(a, b, c, ...)
local z
if b then
a = a % MOD
b = b % MOD
z = MODM - band(MODM - a, MODM - b)
if c then
z = bit32_bor(z, c, ...)
end
return z
elseif a then
return a % MOD
else
return 0
end
end
M.bit32.bor = bit32_bor
function M.bit32.btest(...)
return bit32_band(...) ~= 0
end
function M.bit32.lrotate(x, disp)
return lrotate(x % MOD, disp)
end
function M.bit32.rrotate(x, disp)
return rrotate(x % MOD, disp)
end
function M.bit32.lshift(x,disp)
if disp > 31 or disp < -31 then return 0 end
return lshift(x % MOD, disp)
end
function M.bit32.rshift(x,disp)
if disp > 31 or disp < -31 then return 0 end
return rshift(x % MOD, disp)
end
function M.bit32.arshift(x,disp)
x = x % MOD
if disp >= 0 then
if disp > 31 then
return (x >= 0x80000000) and MODM or 0
else
local z = rshift(x, disp)
if x >= 0x80000000 then z = z + lshift(2^disp-1, 32-disp) end
return z
end
else
return lshift(x, -disp)
end
end
function M.bit32.extract(x, field, ...)
local width = ... or 1
if field < 0 or field > 31 or width < 0 or field+width > 32 then error 'out of range' end
x = x % MOD
return extract(x, field, ...)
end
function M.bit32.replace(x, v, field, ...)
local width = ... or 1
if field < 0 or field > 31 or width < 0 or field+width > 32 then error 'out of range' end
x = x % MOD
v = v % MOD
return replace(x, v, field, ...)
end
--
-- Start LuaBitOp "bit" compat section.
--
M.bit = {} -- LuaBitOp "bit" compatibility
function M.bit.tobit(x)
x = x % MOD
if x >= 0x80000000 then x = x - MOD end
return x
end
local bit_tobit = M.bit.tobit
function M.bit.tohex(x, ...)
return tohex(x % MOD, ...)
end
function M.bit.bnot(x)
return bit_tobit(bnot(x % MOD))
end
local function bit_bor(a, b, c, ...)
if c then
return bit_bor(bit_bor(a, b), c, ...)
elseif b then
return bit_tobit(bor(a % MOD, b % MOD))
else
return bit_tobit(a)
end
end
M.bit.bor = bit_bor
local function bit_band(a, b, c, ...)
if c then
return bit_band(bit_band(a, b), c, ...)
elseif b then
return bit_tobit(band(a % MOD, b % MOD))
else
return bit_tobit(a)
end
end
M.bit.band = bit_band
local function bit_bxor(a, b, c, ...)
if c then
return bit_bxor(bit_bxor(a, b), c, ...)
elseif b then
return bit_tobit(bxor(a % MOD, b % MOD))
else
return bit_tobit(a)
end
end
M.bit.bxor = bit_bxor
function M.bit.lshift(x, n)
return bit_tobit(lshift(x % MOD, n % 32))
end
function M.bit.rshift(x, n)
return bit_tobit(rshift(x % MOD, n % 32))
end
function M.bit.arshift(x, n)
return bit_tobit(arshift(x % MOD, n % 32))
end
function M.bit.rol(x, n)
return bit_tobit(lrotate(x % MOD, n % 32))
end
function M.bit.ror(x, n)
return bit_tobit(rrotate(x % MOD, n % 32))
end
function M.bit.bswap(x)
return bit_tobit(bswap(x % MOD))
end
_G.bit = M.bit
end
do -- string
local function ffi_copy(dst, src, len)
for i = 1, len do
dst[i - 1] = src[i - 1]
end
return dst
end
local function ffi_string(buf, len)
local str = {}
local i = 0
while true do
local c = buf[i]
if len and i >= len then break end
if not len and c == 0 then break end
str[i + 1] = string.char(c)
i = i + 1
end
return table.concat(str)
end
local _G = _G
local ffi_buffer
do
local meta = {}
function meta:__tostring()
local str = {"i=" .. self.pointer, ":["}
for i = 1, #self.values do
table.insert(str, self.values[i] or "#")
end
str[#str + 1] = "]"
return table.concat(str, " ")
end
function meta.__add(a, b)
if type(a) == "table" then
return (a:PointerOffset(b))
elseif type(b) == "table" then
return (b:PointerOffset(a))
end
error("UH OH")
end
function meta.__sub(a, b)
if type(a) == "table" then
return (a:PointerOffset(-b))
elseif type(b) == "table" then
return (b:PointerOffset(-a))
end
error("UH OH")
end
local function get_index_offset(self, index)
return self.pointer + index + 1
end
function meta:__index(index)
if type(index) == "string" then
error("string index: " .. index .. " " .. debug.traceback())
end
return self.values[get_index_offset(self, index)] or 0
end
function meta:__newindex(index, val)
-- handle uint32_t overflow
if val < 0 then val = val % 0xFFFFFFFF + 1 end
local index_offset = get_index_offset(self, index)
self.values[index_offset] = val
end
function meta:PointerOffset(offset)
local new = ffi_buffer()
new.pointer = self.pointer + offset
new.values = self.values -- don't copy the values, otherwise we can't modify the original buffer
return new
end
ffi_buffer = function(len, t)
local self = {}
self.type = t
self.PointerOffset = meta.PointerOffset
if type(len) == "table" then
self.length = len.length
self.pointer = len.pointer
self.values = {}
for i, v in ipairs(len.values) do
self.values[i] = v
end
elseif type(len) == "string" then
self.length = #len
self.pointer = 0
self.values = {}
for i = 1, #len do
self.values[i] = string.byte(len, i)
end
else
self.length = type(len) == "number" and len or math.huge
self.pointer = 0
self.values = {}
if type(len) == "number" then
for i = 1, len do
self.values[i] = 0
end
end
end
setmetatable(self, meta)
return self
end
end
-------------------------------------------------------------------------------
-- strung.lua -----------------------------------------------------------------
-------------------------------------------------------------------------------
-- A rewrite of the Lua string patterns in Lua + FFI, for LuaJIT
-- Copyright (C) 2013 - 2014 Pierre-Yves Gérardy
-- MIT licensed (see the LICENSE file for the detais).
-- strung compiles patterns to Lua functions, asssociated with an FFI array
-- holding bit sets, for the character sets (`[...]`) and classes (`%x`), and
-- slots for the capture bounds. This array is allocated once at pattern
-- compile time, and reused for each matching attempt, minimizing memory
-- pressure.
-------------------------------------------------------------------------------
--- localize globals ---
local assert, error, getmetatable, ipairs, loadstring, pairs, print, rawset, require, setmetatable, tonumber, tostring, type, pcall = assert,
error,
getmetatable,
ipairs,
loadstring,
pairs,
print,
rawset,
require,
setmetatable,
tonumber,
tostring,
type,
pcall
--[[DBG]] local unpack = unpack
-- used only for development. strung works fine in the absence of util.lua
-------------------------------------------------------------------------------
--- localize library functions ---
local m_max = math.max
local o_setlocale = os.setlocale
local s, t = string, table
local s_byte, s_find, s_gmatch, s_gsub, s_len, s_rep, s_sub = s.byte, s.find, s.gmatch, s.gsub, s.len, s.rep, s.sub
local t_concat, t_insert = t.concat, t.insert
local copy = ffi_copy
local new = ffi_buffer
local ffi_string = ffi_string
local bit = bit
local band, bor, bxor = bit.band, bit.bor, bit.xor
local lshift, rshift, rol = bit.lshift, bit.rshift, bit.rol
-------------------------------------------------------------------------------
--- C types ---
local u32ary
local u32ptr
local constchar
u32ary = function(len)
return ffi_buffer(len, "uint32_t[?]")
end
u32ptr = function(buf)
return ffi_buffer(buf, "uint32_t *")
end
constchar = function(str)
return ffi_buffer(str, "const unsigned char *")
end
-------------------------------------------------------------------------------
--- bit sets
--- written by Mike Pall released in the public domain.
-- local bitary = ffi.typeof"int32_t[?]"
-- local function bitnew(n)
-- return bitary(rshift(n+31, 5))
-- end
local function bittest(b, i)
return (band(rshift(b[rshift(i, 5)], i), 1) ~= 0)
end
local function bitset(b, i)
local x = rshift(i, 5)
b[x] = bor(b[x], lshift(1, i))
end
-------------------------------------------------------------------------------
--- Pseudo-enum ---
-- This allows the compiler to treat the values as constants.
local P = {
POS = 1,
VAL = 2,
INV = 2,
NEG = 3,
SET = 4,
UNTIL = 5,
RETURN = 6,
TEST = 7, -- the current test (either a single character, a charset or a ".")
NEXT = 8, -- the rest of the pattern
OPEN = 9,
CLOSE = 10,
}
local g_i, g_subj, g_ins, g_start, g_end
-------------------------------------------------------------------------------
--- Templates -----------------------------------------------------------------
-------------------------------------------------------------------------------
-- patterns are compiled to Lua by stitching these together:
local templates = {}
-- charsets, caps and qstn are the FFI pointers to the corresponding resources.
-- in order to minimize the memory allocation and ensure type stability, we
-- set the i variable to 0 in case of failure at a given index.
templates.head = {
[=[
local bittest, charsets, caps, constchar, expose = ...
return function(subj, _, i)
local charsets = charsets
local len = #subj
local i0 = i - 1
local chars = constchar(subj) - 1 -- substract one to keep the 1-based index
local c, open, close, diff
if i > len + 1 then return nil end
]=],
--[[
anchored and "do" or "repeat"]] "",
[=[ --
i0 = i0 + 1
do
i = i0]=],
}
templates.tail = {
[=[ --
::done:: end
]=],
P.UNTIL, -- anchored and "end" or "until i ~= 0 or i0 > len"
P.RETURN,
[=[ --
end]=],
}
templates.one = {[[ -- c
i = (]], P.TEST, [[) and i + 1 or 0
if i == 0 then goto done end]]}
-- match the current character as much as possible, then
-- try to match the rest of the pattern. If the rest fails,
-- backtrack one character at a time until success is met
templates["*"] = {
[=[ -- c*
local i0, i1 = i
while true do
if (]=],
P.TEST,
[=[) then i = i + 1 else break end
end
i1 = i
repeat
i = i1
do
]=],
P.NEXT,
[[ --
::done:: end
if i ~= 0 then break end
i1 = i1 - 1
until i1 < i0
--if not i then goto done end]],
}
-- attempt to match the rest of the pattern (NEXT). on failure,
-- attempt the TEST and retry the rest of the match one character
-- further. repeat until success or the end of the subject is met.
templates["-"] = {
[[ -- c-
local i1 = i
while true do
i = i1
do --]],
P.NEXT,
[[ --
::done:: end
if i ~= 0 then break end
i = i1
if not (]],
P.TEST,
[[) then i = 0; break end
i1 = i1 + 1
end
if i == 0 then goto done end]],
}
-- the semantics of the "?" modifier are tricky to explain briefly...
-- If the TEST fails, do as if it does not exist. If it succeeds,
-- first try to match the rest of the pattern (NEXT), the first time starting
-- on the next character, and if that fails, the second time starting on
-- the current character (i1).
templates["?"] = {
[[ -- c?
do
local i1 = i
if ]],
P.TEST,
[[ then i = i + 1 else i1 = 0 end
goto firsttime
::secondtime::
i, i1 = i1, 0
::firsttime::
do --]],
P.NEXT,
[[ --
::done:: end
if i == 0 and i1 ~= 0 then goto secondtime end
end]],
}
templates.char = {[[(i <= len) and chars[i] == ]], P.VAL}
templates.any = {[[i <= len]]}
templates.set = {[[(i <= len) and ]], P.INV, [[ bittest(charsets, ]], P.SET, [=[ + chars[i])]=]}
templates.ballanced = {
[[ -- %b
if chars[i] ~= ]],
P.OPEN,
[[ then
i = 0; goto done
else
count = 1
repeat
i = i + 1
if i > len then i = 0; break end
c = chars[i]
if c == ]],
P.CLOSE,
[[ then
count = count - 1
elseif c == ]],
P.OPEN,
[[ then
count = count + 1
end
until count == 0 or i == 0
end
if i == 0 then goto done end
i = i + 1]],
}
templates.frontier = {
[[ -- %f
if ]],
P.NEG,
[[ bittest(charsets, ]],
P.SET,
[[ + chars[i])
or ]],
P.POS,
[[ bittest(charsets, ]],
P.SET,
[[ + chars[i-1])
then i = 0; goto done end]],
}
templates.poscap = {[[ -- ()
caps[]], P.OPEN, [[] = 0
caps[]], P.CLOSE, [[] = i
]]}
templates.refcap = {
[[ -- %n for n = 1, 9
open, close = caps[]],
P.OPEN,
[[], caps[]],
P.CLOSE,
[[]
diff = close - open
if subj:sub(open, close) == subj:sub(i, i + diff) then
i = i + diff + 1
else
i = 0; goto done --break
end]],
}
templates.open = {[[ -- (
caps[]], P.OPEN, [[] = i]]}
templates.close = {[[ -- )
caps[]], P.CLOSE, [[] = i - 1]]}
templates.dollar = {[[ --
if i ~= #subj + 1 then i = 0 end -- $]]}
-------------------------------------------------------------------------------
---- Compiler -----------------------------------------------------------------
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--- Simple pattern compiler ---
local function hash_find(s, p, i)
if p == "" then return i, i - 1 end
local lp, ls = s_len(p), s_len(s)
if ls < lp then return nil end
if p == s then return i, i + lp - 1 end
local chars = constchar(s) - 1
local c = s_byte(p)
lp = lp - 1
local last = ls - lp
repeat
while c ~= chars[i] do
i = i + 1
if i > last then return nil end
end
if lp == 0 or s_sub(s, i, i + lp) == p then return i, i + lp end
i = i + 1
until i > last
return nil
end
local function hash_match(s, p, i)
local st, e = hash_find(s, p, i)
if not st then return nil end
return s_sub(s, st, e)
end
local specials = {}
for _, c in ipairs({"^", "$", "*", "+", "?", ".", "(", "[", "%", "-"}) do
specials[c:byte()] = true
end
local function normal(s)
for i = 1, #s do
if specials[s:byte(i)] then return false end
end
return true
end
-- local specials = u32ary(8)
-- for _, c in ipairs{"^", "$", "*", "+", "?", ".", "(", "[", "%", "-"} do
-- bitset(specials, c:byte())
-- end
-- local function normal(s)
-- local ptr = constchar(s)
-- for i = 0, #s - 1 do
-- if bittest(specials, ptr[i]) then return false end
-- end
-- return true
-- end
local simplefind = {hash_find, {"simple find"}, 0}
-------------------------------------------------------------------------------
--- Main pattern compiler helpers ---
local --[[function]] compile --(pattern, mode) forward declaration
--- The caches for the compiled pattern matchers.
local findcodecache
findcodecache = setmetatable(
{},
{
__mode = "k",
__index = function(codecache, pat)
local code = normal(pat) and simplefind or compile(pat, "find")
rawset(findcodecache, pat, code)
return code
end,
}
)
local simplematch = {hash_match, {"simple match"}, 0}
local matchcodecache
matchcodecache = setmetatable(
{},
{
__mode = "k",
__index = function(codecache, pat)
local code = normal(pat) and simplematch or compile(pat, "match")
rawset(matchcodecache, pat, code)
return code
end,
}
)
local gmatchcodecache
gmatchcodecache = setmetatable(
{},
{
__mode = "k",
__index = function(codecache, pat)
local code = --[[normal(pat) and simple(pat) or]] compile(pat, "gmatch")
rawset(gmatchcodecache, pat, code)
return code
end,
}
)
local gsubcodecache
gsubcodecache = setmetatable(
{},
{
__mode = "k",
__index = function(codecache, pat)
local code = --[[normal(pat) and simple(pat) or]] compile(pat, "gsub")
rawset(gsubcodecache, pat, code)
return code
end,
}
)
local function indent(i, s)
return s--s_gsub(tostring(s), "\n", "\n" .. s_rep(" ", i * 2))
end
--- Push the template parts in two buffers.
local function push(tpl, data, buf, backbuf, ind)
local back
for _, o in ipairs(tpl) do
if type(o) ~= "string" then
if o == P.NEXT then
back = true
break
end
buf[#buf + 1] = indent(ind, data[o])
else
buf[#buf + 1] = indent(ind, o)
end
end
if back then
for i = #tpl, 1, -1 do
local o = tpl[i]
if type(o) ~= "string" then
if o == P.NEXT then break end
backbuf[#backbuf + 1] = indent(ind, data[o])
else
backbuf[#backbuf + 1] = indent(ind, o)
end
end
end
end
--- Character classes...
local B = s_byte
local function isdigit(c)
return (c >= B("0") and c <= B("9"))
end
local function isxdigit(c)
return (isdigit(c) or (c >= B("A") and c <= B("F")) or (c >= B("a") and c <= B("f")))
end
local function isupper(c)
return (c >= B("A") and c <= B("Z"))
end
local function isspace(c)
return (
c == B(" ") or
c == B("\f")
or
c == B("\n")
or
c == B("\r")
or
c == B("\t")
or
c == B("\v")
)
end
local function isprint(c)
return (c >= 0x20 and c <= 0x7E)
end
local function isalpha(c)
return ((c >= B("a") and c <= B("z")) or (c >= B("A") and c <= B("Z")))
end
local function isalnum(c)
return (isalpha(c) or isdigit(c))
end
local function ispunct(c)
return (isprint(c) and not isspace(c) and not isalnum(c))
end
local function islower(c)
return (c >= B("a") and c <= B("z"))
end
local function iscntrl(c)
return (c == 127 or (c >= 0 and c <= 31))
end
local ccref = {
a = isalpha,
c = iscntrl,
d = isdigit,
l = islower,
p = ispunct,
s = isspace,
u = isupper,
w = isalnum,
x = isxdigit,
}
local charclass = setmetatable(
{},
{
__index = function(self, c)
local func = ccref[c:lower()]
if not func then return nil end
local cc0, cc1 = u32ary(8), u32ary(8)
for i = 0, 255 do
-- This is slow, but only used once per
-- (pair of charachter class) x (program run).
if func(i) then bitset(cc0, i) else bitset(cc1, i) end
end
self[c:lower()] = cc0
self[c:upper()] = cc1
return self[c]
end,
}
)
--- %Z
do
local Z = u32ary(8)
for i = 1, 255 do
bitset(Z, i)
end
charclass.Z = Z
end
--- build keys for the character class cache (used at pattern compilation time)
local function key(cs)
return t_concat({cs[0], cs[1], cs[2], cs[3], cs[4], cs[5], cs[6], cs[7]}, ":")
end
--- character class builder
local function makecc(pat, i, sets)
local c = pat:sub(i, i)
local class = charclass[c]
local k = key(class)
if not sets[k] then
sets[#sets + 1] = class
sets[k] = #sets
end
return "", (sets[k] - 1) * 256
end
--- Character set builder
local hat = ("^"):byte()
local function makecs(pat, i, sets)
local inv = s_byte(pat, i) == hat
i = inv and i + 1 or i
local last = #pat
local cs = u32ary(8)
local c = s_sub(pat, i, i)
while i <= last do
if c == "%" then
i = i + 1
local cc = charclass[s_sub(pat, i, i)]
if cc then
for i = 0, 7 do
cs[i] = bor(cs[i], cc[i])
end
i = i + 1
goto continue
elseif s_sub(pat, i, i) == "z" then
bitset(cs, 0)
i = i + 1
goto continue
end -- else, skip the % and evaluate the character as itself.
end
if s_sub(pat, i + 1, i + 1) == "-" and s_sub(pat, i + 2, i + 2) ~= "]" then
for i = s_byte(pat, i), s_byte(pat, i + 2) do
bitset(cs, i)
end
i = i + 3
else
bitset(cs, s_byte(pat, i))
i = i + 1
end
::continue::
c = s_sub(pat, i, i)
if c == "]" then break end
end
if i > last then error("unfinished character class") end
local k = key(cs)
if not sets[k] then
sets[#sets + 1] = cs
sets[k] = #sets
end
return inv, (sets[k] - 1) * 256, i
end
-------------------------------------------------------------------------------
--- Main pattern compiler ---
local suffixes = {["*"] = true, ["+"] = true, ["-"] = true, ["?"] = true}
local function suffix(i, ind, len, pat, data, buf, backbuf)
local c = pat:sub(i, i)
if not suffixes[c] then
push(templates.one, data, buf, backbuf, ind)
return i - 1, ind
end
if c == "+" then
push(templates.one, data, buf, backbuf, ind)
c = "*"
end
push(templates[c], data, buf, backbuf, ind + (c == "?" and 0 or 1))
return i, ind + 2
end
local function body(pat, i, caps, sets, data, buf, backbuf)
local len = #pat
local ind = 1
local c = pat:sub(i, i)
while i <= len do
local op = 0
local canmod = false
if c == "(" then -- position capture
if pat:sub(i + 1, i + 1) == ")" then
caps[#caps + 1] = 1
caps[#caps + 1] = 0
caps.type[#caps.type + 1] = "pos"
data[P.OPEN] = -#caps
data[P.CLOSE] = -#caps + 1
push(templates.poscap, data, buf, backbuf, ind)
i = i + 1
else -- open capture
caps[#caps + 1] = 1
caps[#caps + 1] = -1
caps.open = caps.open + 1 -- keep track of opened captures
caps.type[#caps.type + 1] = "txt"
data[P.OPEN] = -#caps
push(templates.open, data, buf, backbuf, ind)
end
elseif c == ")" then -- open capture
data[P.CLOSE] = false
for j = #caps, 2, -2 do
if caps[j] == -1 then -- -1 means that the slot has not been closed yet.
caps[j] = 1 -- colse it
caps.open = caps.open - 1
data[P.CLOSE] = -j + 1
break
end
end
if not data[P.CLOSE] then error("invalid closing parenthesis") end
push(templates.close, data, buf, backbuf, ind)
elseif c == "." then
data[P.TEST] = templates.any[1]
i, ind = suffix(i + 1, ind, len, pat, data, buf, backbuf)
elseif c == "[" then
local inv
inv, templates.set[P.SET], i = makecs(pat, i + 1, sets)
templates.set[P.INV] = inv and "not" or ""
data[P.TEST] = t_concat(templates.set)
i, ind = suffix(i + 1, ind, len, pat, data, buf, backbuf)
elseif c == "%" then
i = i + 1
c = pat:sub(i, i)
if not c then error("malformed pattern (ends with '%')") end
if ccref[c:lower()] or c == "Z" then -- a character class
templates.set[P.INV], templates.set[P.SET] = makecc(pat, i, sets)
data[P.TEST] = t_concat(templates.set)
i, ind = suffix(i + 1, ind, len, pat, data, buf, backbuf)
elseif c == "0" then
error("invalid capture index")
elseif "1" <= c and c <= "9" then
local n = tonumber(c) * 2
if n > #caps or caps[n] == -1 then
error("attempt to reference a non-existing capture")
end
data[P.OPEN] = -n
data[P.CLOSE] = -n + 1
push(templates.refcap, data, buf, backbuf, ind)
elseif c == "b" then
data[P.OPEN], data[P.CLOSE] = pat:byte(i + 1, i + 2)
i = i + 2
push(templates.ballanced, data, buf, backbuf, ind)
elseif c == "f" then
if pat:sub(i + 1, i + 1) ~= "[" then
error("missing '['' after '%f' in pattern")
end
local inv, set_i
inv, data[P.SET], i = makecs(pat, i + 2, sets)
data[P.POS] = inv and "not" or ""
data[P.NEG] = inv and "" or "not"
push(templates.frontier, data, buf, backbuf, ind)
else
if c == "z" then c = "\0" end
templates.char[P.VAL] = c:byte()
data[P.TEST] = t_concat(templates.char)
i, ind = suffix(i + 1, ind, len, pat, data, buf, backbuf)
end -- /"%"
elseif c == "$" and i == #pat then
push(templates.dollar, data, buf, backbuf, ind)
else
templates.char[P.VAL] = c:byte()
data[P.TEST] = t_concat(templates.char)
i, ind = suffix(i + 1, ind, len, pat, data, buf, backbuf)
end
i = i + 1
c = pat:sub(i, i)
end ---- /while
end
--- Create the uint32_t array that holds the character sets and capture bounds.
--[[
Memory layout of the charset/capture bounds uint32_t array:
Character sets each take 8 solts, starting from the start of the array.
Each capture uses two slots corresponding to its first ("open") and last ("close")
character.
Position captures are tagged by having the "open" bound set to 0. The "close" slot
holds the position.
The zeroth capture is implicit, and corresponds to the whole match.
We return two pointers: `charsets`, which points to the first word of the first
character set, and `caps`, such that
`caps[-n*2]` is the "open" bound of the nth capture.
`caps[-n*2 + 1]` is the corresponding "close" bound.
--]] local function pack(sets, ncaps)
local nsets = #sets
local len = nsets * 8 + ncaps * 2
local charsets = u32ary(len + 2) -- add two slots for the bounds of the whole match.
local caps = u32ptr(charsets) + len
for i = 1, nsets do
for j = 0, 7 do
charsets[(i - 1) * 8 + j] = sets[i][j]
end
end
return charsets, caps
end
local M = {CODE = 1, SOURCE = 2, NCAPS = 3, CAPS = 4, ANCHORED = 5} -- fields of the "_M_atchers" table.
function compile(pat, mode) -- local, declared above
local anchored = (pat:sub(1, 1) == "^") and mode ~= "gmatch"
local caps, sets = {open = 0, type = {}}, {}
local data = {}
local buf = {
templates.head[1],
anchored and
"do" or
"repeat",
templates.head[3],
}
local backbuf = {}
local i = anchored and 2 or 1
body(pat, i, caps, sets, data, buf, backbuf)
-- pack the charsets and captures in an FFI array.
local ncaps = #caps / 2
local charsets, capsptr = pack(sets, (mode == "gsub" and m_max(1, ncaps) or ncaps))
-- append the tail of the matcher to its head.
for i = #backbuf, 1, -1 do
buf[#buf + 1] = backbuf[i]
end
--
data[P.UNTIL] = anchored and "end" or "until i ~=0 or i0 > len"
-- prepare the return values
assert(caps.open == 0, "invalid pattern: one or more captures left open")
assert(#caps < 400, "too many captures in pattern (max 200)")
if ncaps == 0 then
if mode == "find" then
data[P.RETURN] = [[ --
if i == 0 then return nil end
return i0, i -1]]
elseif mode == "match" then
data[P.RETURN] = [[ --
if i == 0 then return nil end
return subj:sub(i0, i - 1)]]
elseif mode == "gmatch" then
data[P.RETURN] = [[ --
caps[0], caps[1] = i0, i-1
return i ~= 0]]
elseif mode == "gsub" then
data[P.RETURN] = [[ --
caps[0], caps[1] = i0, i-1
caps[-2], caps[-1] = i0, i-1
return i ~= 0]]
end
elseif mode:sub(1, 1) == "g" then
data[P.RETURN] = [[ --
caps[0], caps[1] = i0, i-1
return i ~= 0]]
else
local rc = {}
for i = 2, #caps, 2 do
if caps.type[i / 2] == "pos" then
rc[#rc + 1] = "caps[" .. -i + 1 .. "]"
else
rc[#rc + 1] = "subj:sub(caps[" .. -i .. "], caps[" .. -i + 1 .. "]) "
end
end
if mode == "find" then t_insert(rc, 1, "i0, i - 1") end
data[P.RETURN] = [[ --
if i == 0 then return nil end
return ]] .. t_concat(rc, ", ")
end
push(templates.tail, data, buf, backbuf, 0)
-- load the source
local source = t_concat(buf)
local loader, err = loadstring(source)
if not loader then error(source .. "\nERROR:" .. err) end
local code = loader(bittest, charsets, capsptr, constchar, expose)
return {code, source, ncaps, capsptr, anchored}
-- m.CODE, m.SOURCE, m.NCAPS, m.CAPS -- anchor the charset array? Seems to fix the segfault.
end
-------------------------------------------------------------------------------
---- API ----------------------------------------------------------------------
-------------------------------------------------------------------------------
-------------------------------------------------------------------------------
--- Helpers
local function checki(i, subj)
if not i then return 1 end
if i < 0 then i = #subj + 1 + i end
if i < 1 then i = 1 end
return i
end
local producers = setmetatable(
{},
{
__index = function(self, n)
local acc = {}
for open = -2, -n * 2, -2 do
local close = open + 1
acc[#acc + 1] = "c[" .. open .. "] == 0 and c[" .. close .. "] or " .. "subj:sub(c[" .. open .. "], c[" .. close .. "])"
end
local res = loadstring(
[=[ --
return function(c, subj)
return ]=] .. t_concat(acc, ", ") .. [[ --
end
]]
)()
self[n] = res
return res
end,
}
)
producers[0] = function(caps, subj)
return subj:sub(caps[0], caps[1])
end
-------------------------------------------------------------------------------
--- find
local function find(subj, pat, i, plain)
if plain then return s_find(subj, pat, i, true) end
i = checki(i, subj)
return findcodecache[pat][M.CODE](subj, pat, i)
end
-------------------------------------------------------------------------------
--- match
local function match(subj, pat, i, raw)
return matchcodecache[pat][M.CODE](subj, pat, checki(i, subj))
end
-------------------------------------------------------------------------------
--- gmatch
local gmatch
do
local GM = {
CODE = 1,
SUBJ = 2,
PAT = 3,
INDEX = 4,
PROD = 5,
CAPS = 6,
}
local function gmatch_iter(state)
local success = state[GM.CODE](state[GM.SUBJ], state[GM.PAT], state[GM.INDEX])
if success then
local caps = state[GM.CAPS]
state[GM.INDEX] = m_max(caps[0], caps[1]) + 1
return state[GM.PROD](caps, state[2])
else
return nil
end
end
function gmatch(subj, pat)
local c = gmatchcodecache[pat]
local state = {
c[M.CODE],
subj,
pat,
1, -- GM.INDEX
producers[c[M.NCAPS]], -- GM.PROD
c[M.CAPS], -- GM.CAPS
}
return gmatch_iter, state
end
end
-------------------------------------------------------------------------------
--- gsub
local gsub
do
-- buffers
local BUFF_INIT_SIZE = 16
local Buffer
local free
local malloc
local charsize
charsize = 1
malloc = function(s)
return ffi_buffer(s, "unsigned char[?]")
end
free = function() end
Buffer = function(size, index, arr)
return {s = size, i = index, a = arr}
end
local function buffer()
local b = Buffer(BUFF_INIT_SIZE, 0, malloc(BUFF_INIT_SIZE * charsize))
return b
end
local function reserve(buf, size)
if size <= buf.s then return end
repeat
buf.s = buf.s * 2
until size <= buf.s
local a = malloc(buf.s * charsize)
copy(a, buf.a, buf.i)
free(buf.a)
buf.a = a
end
local function mergebuf(acc, new)
reserve(acc, acc.i + new.i)
copy(acc.a + acc.i, new.a, new.i)
acc.i = acc.i + new.i
end
local function mergestr(acc, str)
reserve(acc, acc.i + #str)
copy(acc.a + acc.i, constchar(str), #str)
acc.i = acc.i + #str
end
local function mergebytes(acc, ptr, len)
reserve(acc, acc.i + len)
copy(acc.a + acc.i, ptr, len)
acc.i = acc.i + len
end
local function mergeonebyte(acc, byte)
reserve(acc, acc.i + 1)
acc.a[acc.i] = byte
acc.i = acc.i + 1
end
-- handlers for each type of replacement
local function table_handler(subj, caps, _, producer, buf, tbl)
local res = tbl[producer(caps, subj)]
if not res then
local i, e = caps[0], caps[1]
mergebytes(buf, constchar(subj) + i - 1, e - i + 1)
else
local t = type(res)
if t == "string" or t == "number" then
res = tostring(res)
mergestr(buf, res)
else
error("invalid replacement type (a " .. t .. ")")
end
end
end
local function function_handler(subj, caps, _, producer, buf, fun)
local res = fun(producer(caps, subj))
if not res then
local i, e = caps[0], caps[1]
mergebytes(buf, constchar(subj) + i - 1, e - i + 1)
else
local t = type(res)
if t == "string" or t == "number" then
res = tostring(res)
mergestr(buf, res)
else
error("invalid replacement type (a " .. t .. ")")
end
end
end
local function string_handler(_, _, _, _, buf, str)
mergestr(buf, str)
end
local function string_with_captures_handler(subj, caps, ncaps, _, buf, pat)
local i, L = 1, #pat
ncaps = m_max(1, ncaps) -- for simple matchers, %0 and %1 mean the same thing.
subj = constchar(subj) - 1 -- subj is anchored in `gsub()`
pat = constchar(pat) - 1 -- ditto
while i <= L do
local n = pat[i]
if n == 37 then -- "%" --> capture or escape sequence.
i = i + 1
n = pat[i]
if 48 <= n and n <= 57 then -- "0" <= n <= "9"
n = n - 48
if n > ncaps then error("invalid capture index") end
local s = caps[-2 * n]
if s == 0 then
mergestr(buf, tostring(caps[-2 * n + 1]))
else
local ll = caps[-2 * n + 1] - s + 1
mergebytes(buf, subj + s, ll)
end
else
mergeonebyte(buf, n)
end
else
mergeonebyte(buf, n)
end
i = i + 1
end
end
local function select_handler(ncaps, repl, t)
if t == "string" then
if repl:find("%%") then
return string_with_captures_handler, string_with_captures_handler
else
return string_handler, string_handler
end
elseif t == "table" then
return table_handler, producers[1]
elseif t == "function" then
return function_handler, producers[ncaps]
else
error("bad argument #3 to 'strung.gsub' (string/function/table expected)")
end
end
function gsub(subj, pat, repl, n)
n = n or -1
local c = gsubcodecache[pat]
local matcher = c[M.CODE]
local success = matcher(subj, pat, 1)
if not success then return subj, 0 end
local t = type(repl)
if t == "number" then repl, t = tostring(repl), "string" end
local handler, producer = select_handler(c[M.NCAPS], repl, t)
-- Anchored patterns should not be matched more than once.
if c[M.ANCHORED] then n = 1 end
local caps = c[M.CAPS]
local ncaps = c[M.NCAPS]
local count = 0
local buf = buffer()
local subjptr = constchar(subj)
local last_e = 0
while success and n ~= 0 do
n = n - 1
count = count + 1
mergebytes(buf, subjptr + last_e, caps[0] - last_e - 1)
last_e = caps[1]
handler(subj, caps, ncaps, producer, buf, repl)
success = matcher(subj, pat, m_max(caps[0], caps[1]) + 1)
end
mergebytes(buf, subjptr + last_e, #subj - last_e)
return ffi_string(buf.a, buf.i), count
end
end
-- used in the test suite.
local function _assert(test, pat, msg)
if not test then
local source = findcodecache[pat][M.SOURCE]
print(("- -"):rep(60))
print(source)
print(("- "):rep(60))
print(msg)
error()
end
end
-------------------------------------------------------------------------------
--- Misc API functions
-- reset the compiler cache to match the new locale.
local function reset()
findcodecache = setmetatable({}, getmetatable(findcodecache))
matchcodecache = setmetatable({}, getmetatable(matchcodecache))
gmatchcodecache = setmetatable({}, getmetatable(gmatchcodecache))
gsubcodecache = setmetatable({}, getmetatable(gsubcodecache))
charclass = setmetatable({}, getmetatable(charclass))
end
-- os.setlocale wrapper
local function setlocale(loc, mode)
reset()
return o_setlocale(loc, mode)
end
-- show the source code of the compiled pattern matcher
local function showpat(p)
print(p, "\n---------")
print(gsubcodecache[p][M.SOURCE])
end
-------------------------------------------------------------------------------
--- Monkey patching and export table
local strung, install, uninstall
do
local os, string = os, string
local installable = {"find", "match", "gmatch", "gfind", "gsub"}
local instset = {}
for _, func in ipairs(installable) do
instset[func] = string[func]
end
installable[#installable + 1] = "setlocale"
-- monkey patches the string library.
function install(...)
local m = {...}
if #m == 0 then m = installable end
for _, func in ipairs(m) do
if instset[func] then string[func] = strung[func] end
if func == "setlocale" then os.setlocale = setlocale end
end
end
-- revert install
function uninstall(...)
local m = {...}
if #m == 0 then
m = installable
os.setlocale = o_setlocale
end
for _, func in ipairs(m) do
if instset[func] then string[func] = instset[func] end
if func == "setlocale" then os.setlocale = o_setlocale end
end
end
strung = {
install = install,
uninstall = uninstall,
find = find,
match = match,
gfind = gmatch,
gmatch = gmatch,
gsub = gsub,
reset = reset,
setlocale = setlocale,
assert = _assert,
showpat = showpat,
_VERSION = "1.0.0-rc1",
}
end
-------------------------------------------------------------------------------
_G.string.find = strung.find
_G.string.match = strung.match
_G.string.gmatch = strung.gmatch
_G.string.gsub = strung.gsub
end
if false then
jit = {}
jit.vmdef = { }
jit.version_num = 20100
jit.opt = {}
function jit.opt.start()
end
function jit.flush()
end
end
do -- package
package.loaded = {}
package.preload = {}
package.preload["io"] = function() return io end
package.preload["os"] = function() return os end
package.preload["string"] = function() return string end
package.preload["table"] = function() return table end
package.preload["math"] = function() return math end
package.preload["debug"] = function() return debug end
package.path = "./?.lua;/usr/local/share/luajit-2.1.0-beta3/?.lua;/usr/local/share/lua/5.1/?.lua;/usr/local/share/lua/5.1/?/init.lua"
end
do -- _G
function _G.collectgarbage(...)
print("collectgarbage(", ..., ")")
end
do -- require
local sformat = string.format
local sgmatch = string.gmatch
local sgsub = string.gsub
local smatch = string.match
local tconcat = table.concat
local tinsert = table.insert
local setmetatable = setmetatable
local ploadlib = package.loadlib
local meta = {}
local _M = setmetatable({}, meta)
_M.VERSION = "0.01"
-- XXX assert(type(package.preload[name]) == 'function')?
local function preload_loader(name)
if package.preload[name] then
return package.preload[name]
else
return sformat("no field package.preload['%s']\n", name)
end
end
local function path_loader(name, paths, loader_func)
local errors = {}
local loader
name = sgsub(name, "%.", "/")
for path in sgmatch(paths, "[^;]+") do
path = sgsub(path, "%?", name)
local errmsg
loader, errmsg = loader_func(path)
if loader then
break
else
-- XXX error for when file isn't readable?
-- XXX error for when file isn't valid Lua (or loadable?)
tinsert(errors, sformat("no file '%s'", path))
end
end
if loader then
return loader
else
return tconcat(errors, "\n") .. "\n"
end
end
local function lua_loader(name)
return path_loader(name, package.path, loadfile)
end
local function get_init_function_name(name)
name = sgsub(name, "^.*%-", "", 1)
name = sgsub(name, "%.", "_")
return "luaopen_" .. name
end
local function c_loader(name)
local init_func_name = get_init_function_name(name)
return path_loader(name, package.cpath, function(path)
return ploadlib(path, init_func_name)
end)
end
local function all_in_one_loader(name)
local init_func_name = get_init_function_name(name)
local base_name = smatch(name, "^[^.]+")
return path_loader(base_name, package.cpath, function(path)
return ploadlib(path, init_func_name)
end)
end
local function findchunk(name)
local errors = {string.format("module '%s' not found\n", name)}
local found
for _, loader in ipairs(_M.loaders) do
local chunk = loader(name)
if type(chunk) == "function" then
return chunk
elseif type(chunk) == "string" then
errors[#errors + 1] = chunk
end
end
return nil, table.concat(errors, "")
end
local function require(name)
if package.loaded[name] == nil then
local chunk, errors = findchunk(name)
if not chunk then error(errors, 2) end
local result = chunk(name)
if result ~= nil then
package.loaded[name] = result
elseif package.loaded[name] == nil then
package.loaded[name] = true
end
end
return package.loaded[name]
end
local loadermeta = {}
function loadermeta:__call(...)
return self.impl(...)
end
local function makeloader(loader_func, name)
return setmetatable({impl = loader_func, name = name}, loadermeta)
end
-- XXX make sure that any added loaders are preserved (esp. luarocks)
_M.loaders = {
makeloader(preload_loader, "preload"),
makeloader(lua_loader, "lua"),
--makeloader(c_loader, "c"),
--makeloader(all_in_one_loader, "all_in_one"),
}
-- XXX sugar for adding/removing loaders
function meta:__call(name)
return require(name)
end
_M.findchunk = findchunk
_G.require = _M
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment