Skip to content

Instantly share code, notes, and snippets.

@meisterluk
Created September 9, 2019 15:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save meisterluk/9cdf7ad78253ebcb434c19358507690f to your computer and use it in GitHub Desktop.
Save meisterluk/9cdf7ad78253ebcb434c19358507690f to your computer and use it in GitHub Desktop.
lua string.gsub test extracted from the official lua testsuite
local tests = {
["gsub"] = string.gsub,
["testID"] = 0
}
function tests.assert(self, cond, identifier)
self.testID = self.testID + 1
assert(cond, "test #" .. tostring(self.testID) .. " (" .. tostring(identifier) .. " test) failed")
end
-- via api.lua
function tests.api(self)
local a, b = self.gsub("alo alo", "(a)", function (x) return x..'b' end)
self:assert(a == 'ablo ablo', 'ablo')
local a, b = self.gsub("alo.alo.uhuh.", "%.", "//")
self:assert(a == "alo//alo//uhuh//", "alo alo")
a, b = self.gsub("alo.alo.uhuh.", "alo", "//")
self:assert(a == "//.//.uhuh.", "uhuh")
a, b = self.gsub("", "alo", "//")
self:assert(a == "", "alo")
a, b = self.gsub("...", "%.", "/.")
self:assert(a == "/././.", "dot")
a, b = self.gsub("...", "%.%.%.", "")
self:assert(a == "", "ellipsis")
end
-- via constructs.lua
function tests.constructs(self)
local f = [[
return function ( a , b , c , d , e )
local x = a >= b or c or ( d and e ) or nil
return x
end , { a = 1 , b = 2 >= 1 , } or { 1 };
]]
local expected = [[
return
function
(
a
,
b
,
c
,
d
,
e
)
local
x
=
a
>=
b
or
c
or
(
d
and
e
)
or
nil
return
x
end
,
{
a
=
1
,
b
=
2
>=
1
,
}
or
{
1
};
]]
f = self.gsub(f, "%s+", "\n")
self:assert(f == expected, "force a SETLINE between opcodes")
end
-- via coroutine.lua
function tests.coroutine(self)
local a, b = self.gsub("a", ".", function (c) return c .. c end)
self:assert(a == "aa")
self:assert(b == 1)
end
-- via db.lua
function tests.db(self)
local countlines = function (s)
return select(2, self.gsub(s, "\n", ""))
end
-- these tests are contrived, the original testcases use some tracebacks
self:assert(countlines("hello\nworld\n") == 2, "hello world line count")
self:assert(countlines("\n\n\n\n\n") == 5, "empty lines count")
end
-- via errors.lua
function tests.errors(self)
local a, b = self.gsub("<eof>", "(%p)", "%%%1")
self:assert(a == "%<eof%>")
self:assert(b == 2)
end
-- via gc.lua
function tests.gc(self)
local limit = 5000
local a = "a"
local contCreate = 0
while contCreate <= limit do
a = contCreate .. "b";
a = self.gsub(a, '(%d%d*)', string.upper)
a = "a"
contCreate = contCreate+1
end
self:assert(a == "a")
self:assert(contCreate == 5001)
foo = nil
x = "01234567890123456789012345678901234567890123456789012345678901234567890123456789"
self:assert(string.len(x)==80, "length")
s = ''
n = 0
k = math.min(300, (math.maxinteger // 80) // 2)
while n < k do s = s..x; n=n+1; j=tostring(n) end
self:assert(string.len(s) == k*80, "length 2")
s = string.sub(s, 1, 10000)
s, i = self.gsub(s, '(%d%d%d%d)', '')
self:assert(s == "", "s")
self:assert(i == 2500, "#matches")
end
-- via literals.lua
function tests.literals(self)
local env = { ["require"] = require }
local dostring = function (x) return assert(load(x, "", "t", env))() end
local prog = [[
a = 1 -- a comment
b = 2
x = [=[
hi
]=]
y = "\
hello\r\n\
"
return require"debug".getinfo(1).currentline
]]
for _, n in pairs{"\n", "\r", "\n\r", "\r\n"} do
local prog, nn = self.gsub(prog, "\n", n)
self:assert(dostring(prog) == nn, "dostring")
self:assert(env.x == "hi\n" and env.y == "\nhello\r\n\n", "x&y")
end
end
-- via math.lua
function tests.math(self)
local eqT = function (a,b)
return a == b and math.type(a) == math.type(b)
end
local minint = math.mininteger
local maxint = math.maxinteger
local incd = function (n)
local s = string.format("%d", n)
s = self.gsub(s, "%d$", function (d)
self:assert(d ~= '9', "9")
return string.char(string.byte(d) + 1)
end)
return s
end
-- 'tonumber' with overflow by 1
self:assert(eqT(tonumber(incd(maxint)), maxint + 1.0), "maxint")
self:assert(eqT(tonumber(incd(minint)), minint - 1.0), "minint")
-- large numbers
self:assert(eqT(tonumber("1"..string.rep("0", 30)), 1e30), "1e30")
self:assert(eqT(tonumber("-1"..string.rep("0", 30)), -1e30), "-1e30")
end
-- via main.lua
function tests.main(self)
local s = [=[ --
function f ( x )
local a = [[
xuxu
]]
local b = "\
xuxu\n"
if x == 11 then return 1 + 12 , 2 + 20 end --[[ test multiple returns ]]
return x + 1
--\\
end
return( f( 100 ) )
assert( a == b )
do return f( 11 ) end ]=]
s = self.gsub(s, ' ', '\n\n')
local expected = [=[
--
function
f
(
x
)
local
a
=
[[
xuxu
]]
local
b
=
"\
xuxu\n"
if
x
==
11
then
return
1
+
12
,
2
+
20
end
--[[
test
multiple
returns
]]
return
x
+
1
--\\
end
return(
f(
100
)
)
assert(
a
==
b
)
do
return
f(
11
)
end
]=]
self:assert(s == expected, "double newlines")
end
-- via utf8.lua
function tests.utf8(self)
local len = function (s)
return #self.gsub(s, "[\x80-\xBF]", "")
end
local check = function (s, t)
local l = utf8.len(s)
assert(#t == l and len(s) == l)
end
local s = "hello World"
local t = {string.byte(s, 1, -1)}
for i = 1, utf8.len(s) do assert(t[i] == string.byte(s, i)) end
check(s, t)
check("汉字/漢字", {27721, 23383, 47, 28450, 23383,})
check("", {})
s = "\0 \x7F\z
\xC2\x80 \xDF\xBF\z
\xE0\xA0\x80 \xEF\xBF\xBF\z
\xF0\x90\x80\x80 \xF4\x8F\xBF\xBF"
s = self.gsub(s, " ", "")
check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF})
x = "日本語a-4\0éó"
check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243})
check("𣲷𠜎𠱓𡁻𠵼ab𠺢",
{0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,})
check("𨳊𩶘𦧺𨳒𥄫𤓓\xF4\x8F\xBF\xBF",
{0x28CCA, 0x29D98, 0x269FA, 0x28CD2, 0x2512B, 0x244D3, 0x10ffff})
end
-- via strings.lua (do not work on my system, os.setlocale always fails)
--[[
function tests.strings(self)
local locales = { "ptb", "pt_BR.iso88591", "ISO-8859-1" }
local function trylocale (w)
for i = 1, #locales do
if os.setlocale(locales[i], w) then
print(string.format("'%s' locale set to '%s'", w, locales[i]))
return locales[i]
end
end
print(string.format("'%s' locale not found", w))
return false
end
if trylocale("collate") then
self:assert("alo" < "álo" and "álo" < "amo")
end
if trylocale("ctype") then
self:assert(self.gsub("áéíóú", "%a", "x") == "xxxxx")
self:assert(self.gsub("áÁéÉ", "%l", "x") == "xÁxÉ")
self:assert(self.gsub("áÁéÉ", "%u", "x") == "áxéx")
end
self:assert(false)
end]]
-- via pm.lua
function tests.pm(self)
local f1 = function(s, p)
p = self.gsub(p, "%%([0-9])", function (s)
return "%" .. (tonumber(s)+1)
end)
p = self.gsub(p, "^(^?)", "%1()", 1)
p = self.gsub(p, "($?)$", "()%1", 1)
local t = {string.match(s, p)}
return string.sub(s, t[1], t[#t] - 1)
end
self:assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o", "\\0")
self:assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3', "3443")
self:assert(f1('=======', '^(=*)=%1$') == '=======', "equalssign")
local range
range = function (i, j)
if i <= j then
return i, range(i+1, j)
end
end
local abc = string.char(range(0, 255));
local strset = function (p)
local res = {s=''}
self.gsub(abc, p, function (c) res.s = res.s .. c end)
return res.s
end;
self:assert(string.len(strset('[\200-\210]')) == 11, "11")
self:assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz", "lower latin")
self:assert(strset('[a-z%d]') == strset('[%da-uu-z]'), "a-z%d")
self:assert(strset('[a-]') == "-a", "a-")
self:assert(strset('[^%W]') == strset('[%w]'), "^%W")
self:assert(strset('[]%%]') == '%]', "%]")
self:assert(strset('[a%-z]') == '-az', "-az")
self:assert(strset('[%^%[%-a%]%-b]') == '-[]^ab', "-[]^ab")
self:assert(strset('%Z') == strset('[\1-\255]'), "%Z")
self:assert(strset('.') == strset('[\1-\255%z]'), ".")
self:assert(self.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo', "xlo")
self:assert(self.gsub('alo úlo ', ' +$', '') == 'alo úlo', "trim")
self:assert(self.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo', "double trim")
self:assert(self.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ', "alo alo 123")
local t = "abç d"
local a, b = self.gsub(t, '(.)', '%1@')
self:assert('@'..a == self.gsub(t, '', '@'), "@str")
self:assert(b == 6, "@6") -- adjusted to 6, because this file (unlike the ISO-8859-1 test files) is UTF-8 encoded
a, b = self.gsub('abçd', '(.)', '%0@', 2)
self:assert(a == 'a@b@çd' and b == 2, "a@b")
self:assert(self.gsub('alo alo', '()[al]', '%1') == '12o 56o', "()[al]")
self:assert(self.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") ==
"xyz=abc-abc=xyz", "abc=xyz")
self:assert(self.gsub("abc", "%w", "%1%0") == "aabbcc", "aabbcc")
self:assert(self.gsub("abc", "%w+", "%0%1") == "abcabc", "abcabc")
self:assert(self.gsub('áéí', '$', '\0óú') == 'áéí\0óú', "áéí")
self:assert(self.gsub('', '^', 'r') == 'r', "r")
self:assert(self.gsub('', '$', 'r') == 'r', "r")
self:assert(self.gsub("a b cd", " *", "-") == "-a-b-c-d-", "-a-b-c-d-")
self:assert(self.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) ==
"um (DOIS) tres (QUATRO)", "um tres")
do
local env = {}
local set = function (n,v) env[n] = v end
self.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", set)
self:assert(env.a=="roberto" and env.roberto=="a", "env assign")
end
local f = function (a,b) return self.gsub(a,'.',b) end
self:assert(self.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) ==
"trocar tudo em bbbbb é alalalalalal", "alalalalalal")
local dostring = function (s) return load(s, "")() or "" end
self:assert(self.gsub("alo $a='x'$ novamente $return a$",
"$([^$]*)%$",
dostring) == "alo novamente x", "novamente")
local x = self.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$",
"$([^$]*)%$", dostring)
self:assert(x == ' assim vai para ALO', "assim")
local t = {}
local s = 'a alo jose joao'
local r = self.gsub(s, '()(%w+)()', function (a,w,b)
self:assert(string.len(w) == b-a, "b-a");
t[a] = b-a;
end)
self:assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4, "s==r")
local isbalanced = function (s)
return string.find(self.gsub(s, "%b()", ""), "[()]") == nil
end
self:assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"), "isbalanced")
self:assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"), "not isbalanced")
self:assert(self.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo', "alo oi")
local t = {"apple", "orange", "lime"; n=0}
self:assert(self.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end)
== "apple and orange and lime", "x and x and x")
t = {n=0}
self.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end)
self:assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3, "t[1]==first")
t = {n=0}
self:assert(self.gsub("first second word", "%w+",
function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word", "first 2nd word")
self:assert(t[1] == "first" and t[2] == "second" and t[3] == nil, "first second nil")
local checkerror = function (msg, f, ...)
local s, err = pcall(f, ...)
self:assert(not s and string.find(err, msg), "checkerror " .. msg)
end
checkerror("invalid replacement value %(a table%)",
self.gsub, "alo", ".", {a = {}})
checkerror("invalid capture index %%2", self.gsub, "alo", ".", "%2")
checkerror("invalid capture index %%0", self.gsub, "alo", "(%0)", "a")
checkerror("invalid capture index %%1", self.gsub, "alo", "(%1)", "a")
checkerror("invalid use of '%%'", self.gsub, "alo", ".", "%x")
-- bug since 2.5 (C-stack overflow)
do
local f = function (size)
local s = string.rep("a", size)
local p = string.rep(".?", size)
return pcall(string.match, s, p)
end
local r, m = f(80)
self:assert(r and #m == 80, "r and #m == 80")
r, m = f(200000)
self:assert(not r and string.find(m, "too complex"), "too complex")
end
local a = string.rep('a', 300000)
self:assert(string.find(a, '^a*.?$'), "^a*.?$")
self:assert(not string.find(a, '^a*.?b$'), "^a*.?b$")
self:assert(string.find(a, '^a-.?$'), "^a-.?$")
-- bug in 5.1.2
a = string.rep('a', 10000) .. string.rep('b', 10000)
self:assert(not pcall(self.gsub, a, 'b'), "rep")
-- recursive nest of gsubs
function rev (s)
return self.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end)
end
local x = "abcdef"
self:assert(rev(rev(x)) == x, "rev")
-- gsub with tables
self:assert(self.gsub("alo alo", ".", {}) == "alo alo", "aloalo{}")
self:assert(self.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo", "AAo AAo 1")
self:assert(self.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo", "AAo AAo 2")
self:assert(self.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo", "AAo AAo 3")
self:assert(self.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo", "xyyzzz alo")
t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end})
self:assert(self.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI", "a ALO b HI")
-- tests for `%f' (`frontiers')
self:assert(self.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x", "xaa xa x xaa x")
self:assert(self.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[", "x[]] x]x] x[[[")
self:assert(self.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3", ".01abc.45de.3")
self:assert(self.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.", "01.bc45 de3.")
self:assert(self.gsub("function", "%f[\1-\255]%w", ".") == ".unction", ".unction")
self:assert(self.gsub("function", "%f[^\1-\255]", ".") == "function.", "function.")
end
function tests.string_library_doc(self)
local a, b = string.gsub("Hello banana", "banana", "Lua user")
self:assert(a == "Hello Lua user" and b == 1)
a, b = string.gsub("banana", "a", "A", 2) -- limit substitutions made to 2
self:assert(a == "bAnAna" and b == 2)
a, b = string.gsub("banana", "(an)", "%1-") -- capture any occurences of "an" and replace
self:assert(a == "ban-an-a" and b == 2)
a, b = string.gsub("banana", "a(n)", "a(%1)") -- brackets around n's which follow a's
self:assert(a == "ba(n)a(n)a" and b == 2)
a, b = string.gsub("banana", "(a)(n)", "%2%1") -- reverse any "an"s
self:assert(a == "bnanaa" and b == 2)
a, b = string.gsub("Hello Lua user", "(%w+)", print) -- print any words found
self:assert(a == "Hello Lua user" and b == 3)
a, b = string.gsub("Hello Lua user", "(%w+)", function(w) return string.len(w) end) -- replace with lengths
self:assert(a == "5 3 4" and b == 3)
a, b = string.gsub("banana", "(a)", string.upper) -- make all "a"s found uppercase
self:assert(a == "bAnAnA" and b == 3)
a, b = string.gsub("banana", "(a)(n)", function(a,b) return b..a end) -- reverse any "an"s
self:assert(a == "bnanaa" and b == 2)
a, b = string.gsub("The big {brown} fox jumped {over} the lazy {dog}.","{(.-)}", function(a) print(a) end )
self:assert(a == "The big {brown} fox jumped {over} the lazy {dog}." and b == 3)
a, b = string.gsub("The big {brown} fox jumped {over} the lazy {dog}.","{(.*)}", function(a) print(a) end )
self:assert(a == "The big {brown} fox jumped {over} the lazy {dog}." and b == 1)
end
function tests.custom(self)
-- TODO https://www.lua.org/manual/5.3/manual.html#6.4.1
-- TODO https://en.wikibooks.org/wiki/Lua_Programming/Standard_libraries#String_matching
-- TODO http://lua-users.org/wiki/StringLibraryTutorial
local a, b = self.gsub("hello (hi) world", "%b()", "")
self:assert(a == "hello world" and b == 1)
a, b = self.gsub("hello (hi) (ho) world", "%b()", "")
self:assert(a == "hello world" and b == 2)
a, b = self.gsub("hello (hi) (ho) world", "%b()", "1")
self:assert(a == "hello 1 1 world" and b == 2)
a, b = self.gsub("hello (h(i)) (ho) world", "%b()", "1")
self:assert(a == "hello 1 1 world" and b == 2)
a, b = self.gsub("hello (h(i())) (ho) world", "%b()", "1")
self:assert(a == "hello 1 1 world" and b == 2)
a, b = self.gsub("hello (h(^C))) (ho) world", "%b()", "1")
self:assert(a == "hello 1) 1 world" and b == 2)
end
--[[
actually run these test cases
]]
for k, test_function in pairs(tests) do
if type(test_function) == "function" and k ~= "gsub" and k ~= "assert" then
print(("Running %s tests"):format(k))
test_function(tests)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment