lua string.gsub test extracted from the official lua testsuite
local tests = { | |
["gsub"] = string.gsub, | |
["testID"] = 0 | |
} | |
function tests.assert(self, cond, identifier) | |
self.testID = self.testID + 1 | |
assert(cond, "test #" .. tostring(self.testID) .. " (" .. tostring(identifier) .. " test) failed") | |
end | |
-- via api.lua | |
function tests.api(self) | |
local a, b = self.gsub("alo alo", "(a)", function (x) return x..'b' end) | |
self:assert(a == 'ablo ablo', 'ablo') | |
local a, b = self.gsub("alo.alo.uhuh.", "%.", "//") | |
self:assert(a == "alo//alo//uhuh//", "alo alo") | |
a, b = self.gsub("alo.alo.uhuh.", "alo", "//") | |
self:assert(a == "//.//.uhuh.", "uhuh") | |
a, b = self.gsub("", "alo", "//") | |
self:assert(a == "", "alo") | |
a, b = self.gsub("...", "%.", "/.") | |
self:assert(a == "/././.", "dot") | |
a, b = self.gsub("...", "%.%.%.", "") | |
self:assert(a == "", "ellipsis") | |
end | |
-- via constructs.lua | |
function tests.constructs(self) | |
local f = [[ | |
return function ( a , b , c , d , e ) | |
local x = a >= b or c or ( d and e ) or nil | |
return x | |
end , { a = 1 , b = 2 >= 1 , } or { 1 }; | |
]] | |
local expected = [[ | |
return | |
function | |
( | |
a | |
, | |
b | |
, | |
c | |
, | |
d | |
, | |
e | |
) | |
local | |
x | |
= | |
a | |
>= | |
b | |
or | |
c | |
or | |
( | |
d | |
and | |
e | |
) | |
or | |
nil | |
return | |
x | |
end | |
, | |
{ | |
a | |
= | |
1 | |
, | |
b | |
= | |
2 | |
>= | |
1 | |
, | |
} | |
or | |
{ | |
1 | |
}; | |
]] | |
f = self.gsub(f, "%s+", "\n") | |
self:assert(f == expected, "force a SETLINE between opcodes") | |
end | |
-- via coroutine.lua | |
function tests.coroutine(self) | |
local a, b = self.gsub("a", ".", function (c) return c .. c end) | |
self:assert(a == "aa") | |
self:assert(b == 1) | |
end | |
-- via db.lua | |
function tests.db(self) | |
local countlines = function (s) | |
return select(2, self.gsub(s, "\n", "")) | |
end | |
-- these tests are contrived, the original testcases use some tracebacks | |
self:assert(countlines("hello\nworld\n") == 2, "hello world line count") | |
self:assert(countlines("\n\n\n\n\n") == 5, "empty lines count") | |
end | |
-- via errors.lua | |
function tests.errors(self) | |
local a, b = self.gsub("<eof>", "(%p)", "%%%1") | |
self:assert(a == "%<eof%>") | |
self:assert(b == 2) | |
end | |
-- via gc.lua | |
function tests.gc(self) | |
local limit = 5000 | |
local a = "a" | |
local contCreate = 0 | |
while contCreate <= limit do | |
a = contCreate .. "b"; | |
a = self.gsub(a, '(%d%d*)', string.upper) | |
a = "a" | |
contCreate = contCreate+1 | |
end | |
self:assert(a == "a") | |
self:assert(contCreate == 5001) | |
foo = nil | |
x = "01234567890123456789012345678901234567890123456789012345678901234567890123456789" | |
self:assert(string.len(x)==80, "length") | |
s = '' | |
n = 0 | |
k = math.min(300, (math.maxinteger // 80) // 2) | |
while n < k do s = s..x; n=n+1; j=tostring(n) end | |
self:assert(string.len(s) == k*80, "length 2") | |
s = string.sub(s, 1, 10000) | |
s, i = self.gsub(s, '(%d%d%d%d)', '') | |
self:assert(s == "", "s") | |
self:assert(i == 2500, "#matches") | |
end | |
-- via literals.lua | |
function tests.literals(self) | |
local env = { ["require"] = require } | |
local dostring = function (x) return assert(load(x, "", "t", env))() end | |
local prog = [[ | |
a = 1 -- a comment | |
b = 2 | |
x = [=[ | |
hi | |
]=] | |
y = "\ | |
hello\r\n\ | |
" | |
return require"debug".getinfo(1).currentline | |
]] | |
for _, n in pairs{"\n", "\r", "\n\r", "\r\n"} do | |
local prog, nn = self.gsub(prog, "\n", n) | |
self:assert(dostring(prog) == nn, "dostring") | |
self:assert(env.x == "hi\n" and env.y == "\nhello\r\n\n", "x&y") | |
end | |
end | |
-- via math.lua | |
function tests.math(self) | |
local eqT = function (a,b) | |
return a == b and math.type(a) == math.type(b) | |
end | |
local minint = math.mininteger | |
local maxint = math.maxinteger | |
local incd = function (n) | |
local s = string.format("%d", n) | |
s = self.gsub(s, "%d$", function (d) | |
self:assert(d ~= '9', "9") | |
return string.char(string.byte(d) + 1) | |
end) | |
return s | |
end | |
-- 'tonumber' with overflow by 1 | |
self:assert(eqT(tonumber(incd(maxint)), maxint + 1.0), "maxint") | |
self:assert(eqT(tonumber(incd(minint)), minint - 1.0), "minint") | |
-- large numbers | |
self:assert(eqT(tonumber("1"..string.rep("0", 30)), 1e30), "1e30") | |
self:assert(eqT(tonumber("-1"..string.rep("0", 30)), -1e30), "-1e30") | |
end | |
-- via main.lua | |
function tests.main(self) | |
local s = [=[ -- | |
function f ( x ) | |
local a = [[ | |
xuxu | |
]] | |
local b = "\ | |
xuxu\n" | |
if x == 11 then return 1 + 12 , 2 + 20 end --[[ test multiple returns ]] | |
return x + 1 | |
--\\ | |
end | |
return( f( 100 ) ) | |
assert( a == b ) | |
do return f( 11 ) end ]=] | |
s = self.gsub(s, ' ', '\n\n') | |
local expected = [=[ | |
-- | |
function | |
f | |
( | |
x | |
) | |
local | |
a | |
= | |
[[ | |
xuxu | |
]] | |
local | |
b | |
= | |
"\ | |
xuxu\n" | |
if | |
x | |
== | |
11 | |
then | |
return | |
1 | |
+ | |
12 | |
, | |
2 | |
+ | |
20 | |
end | |
--[[ | |
test | |
multiple | |
returns | |
]] | |
return | |
x | |
+ | |
1 | |
--\\ | |
end | |
return( | |
f( | |
100 | |
) | |
) | |
assert( | |
a | |
== | |
b | |
) | |
do | |
return | |
f( | |
11 | |
) | |
end | |
]=] | |
self:assert(s == expected, "double newlines") | |
end | |
-- via utf8.lua | |
function tests.utf8(self) | |
local len = function (s) | |
return #self.gsub(s, "[\x80-\xBF]", "") | |
end | |
local check = function (s, t) | |
local l = utf8.len(s) | |
assert(#t == l and len(s) == l) | |
end | |
local s = "hello World" | |
local t = {string.byte(s, 1, -1)} | |
for i = 1, utf8.len(s) do assert(t[i] == string.byte(s, i)) end | |
check(s, t) | |
check("汉字/漢字", {27721, 23383, 47, 28450, 23383,}) | |
check("", {}) | |
s = "\0 \x7F\z | |
\xC2\x80 \xDF\xBF\z | |
\xE0\xA0\x80 \xEF\xBF\xBF\z | |
\xF0\x90\x80\x80 \xF4\x8F\xBF\xBF" | |
s = self.gsub(s, " ", "") | |
check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF}) | |
x = "日本語a-4\0éó" | |
check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243}) | |
check("𣲷𠜎𠱓𡁻𠵼ab𠺢", | |
{0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,}) | |
check("𨳊𩶘𦧺𨳒𥄫𤓓\xF4\x8F\xBF\xBF", | |
{0x28CCA, 0x29D98, 0x269FA, 0x28CD2, 0x2512B, 0x244D3, 0x10ffff}) | |
end | |
-- via strings.lua (do not work on my system, os.setlocale always fails) | |
--[[ | |
function tests.strings(self) | |
local locales = { "ptb", "pt_BR.iso88591", "ISO-8859-1" } | |
local function trylocale (w) | |
for i = 1, #locales do | |
if os.setlocale(locales[i], w) then | |
print(string.format("'%s' locale set to '%s'", w, locales[i])) | |
return locales[i] | |
end | |
end | |
print(string.format("'%s' locale not found", w)) | |
return false | |
end | |
if trylocale("collate") then | |
self:assert("alo" < "álo" and "álo" < "amo") | |
end | |
if trylocale("ctype") then | |
self:assert(self.gsub("áéíóú", "%a", "x") == "xxxxx") | |
self:assert(self.gsub("áÁéÉ", "%l", "x") == "xÁxÉ") | |
self:assert(self.gsub("áÁéÉ", "%u", "x") == "áxéx") | |
end | |
self:assert(false) | |
end]] | |
-- via pm.lua | |
function tests.pm(self) | |
local f1 = function(s, p) | |
p = self.gsub(p, "%%([0-9])", function (s) | |
return "%" .. (tonumber(s)+1) | |
end) | |
p = self.gsub(p, "^(^?)", "%1()", 1) | |
p = self.gsub(p, "($?)$", "()%1", 1) | |
local t = {string.match(s, p)} | |
return string.sub(s, t[1], t[#t] - 1) | |
end | |
self:assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o", "\\0") | |
self:assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3', "3443") | |
self:assert(f1('=======', '^(=*)=%1$') == '=======', "equalssign") | |
local range | |
range = function (i, j) | |
if i <= j then | |
return i, range(i+1, j) | |
end | |
end | |
local abc = string.char(range(0, 255)); | |
local strset = function (p) | |
local res = {s=''} | |
self.gsub(abc, p, function (c) res.s = res.s .. c end) | |
return res.s | |
end; | |
self:assert(string.len(strset('[\200-\210]')) == 11, "11") | |
self:assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz", "lower latin") | |
self:assert(strset('[a-z%d]') == strset('[%da-uu-z]'), "a-z%d") | |
self:assert(strset('[a-]') == "-a", "a-") | |
self:assert(strset('[^%W]') == strset('[%w]'), "^%W") | |
self:assert(strset('[]%%]') == '%]', "%]") | |
self:assert(strset('[a%-z]') == '-az', "-az") | |
self:assert(strset('[%^%[%-a%]%-b]') == '-[]^ab', "-[]^ab") | |
self:assert(strset('%Z') == strset('[\1-\255]'), "%Z") | |
self:assert(strset('.') == strset('[\1-\255%z]'), ".") | |
self:assert(self.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo', "xlo") | |
self:assert(self.gsub('alo úlo ', ' +$', '') == 'alo úlo', "trim") | |
self:assert(self.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo', "double trim") | |
self:assert(self.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ', "alo alo 123") | |
local t = "abç d" | |
local a, b = self.gsub(t, '(.)', '%1@') | |
self:assert('@'..a == self.gsub(t, '', '@'), "@str") | |
self:assert(b == 6, "@6") -- adjusted to 6, because this file (unlike the ISO-8859-1 test files) is UTF-8 encoded | |
a, b = self.gsub('abçd', '(.)', '%0@', 2) | |
self:assert(a == 'a@b@çd' and b == 2, "a@b") | |
self:assert(self.gsub('alo alo', '()[al]', '%1') == '12o 56o', "()[al]") | |
self:assert(self.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") == | |
"xyz=abc-abc=xyz", "abc=xyz") | |
self:assert(self.gsub("abc", "%w", "%1%0") == "aabbcc", "aabbcc") | |
self:assert(self.gsub("abc", "%w+", "%0%1") == "abcabc", "abcabc") | |
self:assert(self.gsub('áéí', '$', '\0óú') == 'áéí\0óú', "áéí") | |
self:assert(self.gsub('', '^', 'r') == 'r', "r") | |
self:assert(self.gsub('', '$', 'r') == 'r', "r") | |
self:assert(self.gsub("a b cd", " *", "-") == "-a-b-c-d-", "-a-b-c-d-") | |
self:assert(self.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) == | |
"um (DOIS) tres (QUATRO)", "um tres") | |
do | |
local env = {} | |
local set = function (n,v) env[n] = v end | |
self.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", set) | |
self:assert(env.a=="roberto" and env.roberto=="a", "env assign") | |
end | |
local f = function (a,b) return self.gsub(a,'.',b) end | |
self:assert(self.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == | |
"trocar tudo em bbbbb é alalalalalal", "alalalalalal") | |
local dostring = function (s) return load(s, "")() or "" end | |
self:assert(self.gsub("alo $a='x'$ novamente $return a$", | |
"$([^$]*)%$", | |
dostring) == "alo novamente x", "novamente") | |
local x = self.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$", | |
"$([^$]*)%$", dostring) | |
self:assert(x == ' assim vai para ALO', "assim") | |
local t = {} | |
local s = 'a alo jose joao' | |
local r = self.gsub(s, '()(%w+)()', function (a,w,b) | |
self:assert(string.len(w) == b-a, "b-a"); | |
t[a] = b-a; | |
end) | |
self:assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4, "s==r") | |
local isbalanced = function (s) | |
return string.find(self.gsub(s, "%b()", ""), "[()]") == nil | |
end | |
self:assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"), "isbalanced") | |
self:assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"), "not isbalanced") | |
self:assert(self.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo', "alo oi") | |
local t = {"apple", "orange", "lime"; n=0} | |
self:assert(self.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end) | |
== "apple and orange and lime", "x and x and x") | |
t = {n=0} | |
self.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end) | |
self:assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3, "t[1]==first") | |
t = {n=0} | |
self:assert(self.gsub("first second word", "%w+", | |
function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word", "first 2nd word") | |
self:assert(t[1] == "first" and t[2] == "second" and t[3] == nil, "first second nil") | |
local checkerror = function (msg, f, ...) | |
local s, err = pcall(f, ...) | |
self:assert(not s and string.find(err, msg), "checkerror " .. msg) | |
end | |
checkerror("invalid replacement value %(a table%)", | |
self.gsub, "alo", ".", {a = {}}) | |
checkerror("invalid capture index %%2", self.gsub, "alo", ".", "%2") | |
checkerror("invalid capture index %%0", self.gsub, "alo", "(%0)", "a") | |
checkerror("invalid capture index %%1", self.gsub, "alo", "(%1)", "a") | |
checkerror("invalid use of '%%'", self.gsub, "alo", ".", "%x") | |
-- bug since 2.5 (C-stack overflow) | |
do | |
local f = function (size) | |
local s = string.rep("a", size) | |
local p = string.rep(".?", size) | |
return pcall(string.match, s, p) | |
end | |
local r, m = f(80) | |
self:assert(r and #m == 80, "r and #m == 80") | |
r, m = f(200000) | |
self:assert(not r and string.find(m, "too complex"), "too complex") | |
end | |
local a = string.rep('a', 300000) | |
self:assert(string.find(a, '^a*.?$'), "^a*.?$") | |
self:assert(not string.find(a, '^a*.?b$'), "^a*.?b$") | |
self:assert(string.find(a, '^a-.?$'), "^a-.?$") | |
-- bug in 5.1.2 | |
a = string.rep('a', 10000) .. string.rep('b', 10000) | |
self:assert(not pcall(self.gsub, a, 'b'), "rep") | |
-- recursive nest of gsubs | |
function rev (s) | |
return self.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end) | |
end | |
local x = "abcdef" | |
self:assert(rev(rev(x)) == x, "rev") | |
-- gsub with tables | |
self:assert(self.gsub("alo alo", ".", {}) == "alo alo", "aloalo{}") | |
self:assert(self.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo", "AAo AAo 1") | |
self:assert(self.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo", "AAo AAo 2") | |
self:assert(self.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo", "AAo AAo 3") | |
self:assert(self.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo", "xyyzzz alo") | |
t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end}) | |
self:assert(self.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI", "a ALO b HI") | |
-- tests for `%f' (`frontiers') | |
self:assert(self.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x", "xaa xa x xaa x") | |
self:assert(self.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[", "x[]] x]x] x[[[") | |
self:assert(self.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3", ".01abc.45de.3") | |
self:assert(self.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.", "01.bc45 de3.") | |
self:assert(self.gsub("function", "%f[\1-\255]%w", ".") == ".unction", ".unction") | |
self:assert(self.gsub("function", "%f[^\1-\255]", ".") == "function.", "function.") | |
end | |
function tests.string_library_doc(self) | |
local a, b = string.gsub("Hello banana", "banana", "Lua user") | |
self:assert(a == "Hello Lua user" and b == 1) | |
a, b = string.gsub("banana", "a", "A", 2) -- limit substitutions made to 2 | |
self:assert(a == "bAnAna" and b == 2) | |
a, b = string.gsub("banana", "(an)", "%1-") -- capture any occurences of "an" and replace | |
self:assert(a == "ban-an-a" and b == 2) | |
a, b = string.gsub("banana", "a(n)", "a(%1)") -- brackets around n's which follow a's | |
self:assert(a == "ba(n)a(n)a" and b == 2) | |
a, b = string.gsub("banana", "(a)(n)", "%2%1") -- reverse any "an"s | |
self:assert(a == "bnanaa" and b == 2) | |
a, b = string.gsub("Hello Lua user", "(%w+)", print) -- print any words found | |
self:assert(a == "Hello Lua user" and b == 3) | |
a, b = string.gsub("Hello Lua user", "(%w+)", function(w) return string.len(w) end) -- replace with lengths | |
self:assert(a == "5 3 4" and b == 3) | |
a, b = string.gsub("banana", "(a)", string.upper) -- make all "a"s found uppercase | |
self:assert(a == "bAnAnA" and b == 3) | |
a, b = string.gsub("banana", "(a)(n)", function(a,b) return b..a end) -- reverse any "an"s | |
self:assert(a == "bnanaa" and b == 2) | |
a, b = string.gsub("The big {brown} fox jumped {over} the lazy {dog}.","{(.-)}", function(a) print(a) end ) | |
self:assert(a == "The big {brown} fox jumped {over} the lazy {dog}." and b == 3) | |
a, b = string.gsub("The big {brown} fox jumped {over} the lazy {dog}.","{(.*)}", function(a) print(a) end ) | |
self:assert(a == "The big {brown} fox jumped {over} the lazy {dog}." and b == 1) | |
end | |
function tests.custom(self) | |
-- TODO https://www.lua.org/manual/5.3/manual.html#6.4.1 | |
-- TODO https://en.wikibooks.org/wiki/Lua_Programming/Standard_libraries#String_matching | |
-- TODO http://lua-users.org/wiki/StringLibraryTutorial | |
local a, b = self.gsub("hello (hi) world", "%b()", "") | |
self:assert(a == "hello world" and b == 1) | |
a, b = self.gsub("hello (hi) (ho) world", "%b()", "") | |
self:assert(a == "hello world" and b == 2) | |
a, b = self.gsub("hello (hi) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1 1 world" and b == 2) | |
a, b = self.gsub("hello (h(i)) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1 1 world" and b == 2) | |
a, b = self.gsub("hello (h(i())) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1 1 world" and b == 2) | |
a, b = self.gsub("hello (h(^C))) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1) 1 world" and b == 2) | |
end | |
--[[ | |
actually run these test cases | |
]] | |
for k, test_function in pairs(tests) do | |
if type(test_function) == "function" and k ~= "gsub" and k ~= "assert" then | |
print(("Running %s tests"):format(k)) | |
test_function(tests) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment