Created
September 9, 2019 15:37
-
-
Save meisterluk/9cdf7ad78253ebcb434c19358507690f to your computer and use it in GitHub Desktop.
lua string.gsub test extracted from the official lua testsuite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local tests = { | |
["gsub"] = string.gsub, | |
["testID"] = 0 | |
} | |
function tests.assert(self, cond, identifier) | |
self.testID = self.testID + 1 | |
assert(cond, "test #" .. tostring(self.testID) .. " (" .. tostring(identifier) .. " test) failed") | |
end | |
-- via api.lua | |
function tests.api(self) | |
local a, b = self.gsub("alo alo", "(a)", function (x) return x..'b' end) | |
self:assert(a == 'ablo ablo', 'ablo') | |
local a, b = self.gsub("alo.alo.uhuh.", "%.", "//") | |
self:assert(a == "alo//alo//uhuh//", "alo alo") | |
a, b = self.gsub("alo.alo.uhuh.", "alo", "//") | |
self:assert(a == "//.//.uhuh.", "uhuh") | |
a, b = self.gsub("", "alo", "//") | |
self:assert(a == "", "alo") | |
a, b = self.gsub("...", "%.", "/.") | |
self:assert(a == "/././.", "dot") | |
a, b = self.gsub("...", "%.%.%.", "") | |
self:assert(a == "", "ellipsis") | |
end | |
-- via constructs.lua | |
function tests.constructs(self) | |
local f = [[ | |
return function ( a , b , c , d , e ) | |
local x = a >= b or c or ( d and e ) or nil | |
return x | |
end , { a = 1 , b = 2 >= 1 , } or { 1 }; | |
]] | |
local expected = [[ | |
return | |
function | |
( | |
a | |
, | |
b | |
, | |
c | |
, | |
d | |
, | |
e | |
) | |
local | |
x | |
= | |
a | |
>= | |
b | |
or | |
c | |
or | |
( | |
d | |
and | |
e | |
) | |
or | |
nil | |
return | |
x | |
end | |
, | |
{ | |
a | |
= | |
1 | |
, | |
b | |
= | |
2 | |
>= | |
1 | |
, | |
} | |
or | |
{ | |
1 | |
}; | |
]] | |
f = self.gsub(f, "%s+", "\n") | |
self:assert(f == expected, "force a SETLINE between opcodes") | |
end | |
-- via coroutine.lua | |
function tests.coroutine(self) | |
local a, b = self.gsub("a", ".", function (c) return c .. c end) | |
self:assert(a == "aa") | |
self:assert(b == 1) | |
end | |
-- via db.lua | |
function tests.db(self) | |
local countlines = function (s) | |
return select(2, self.gsub(s, "\n", "")) | |
end | |
-- these tests are contrived, the original testcases use some tracebacks | |
self:assert(countlines("hello\nworld\n") == 2, "hello world line count") | |
self:assert(countlines("\n\n\n\n\n") == 5, "empty lines count") | |
end | |
-- via errors.lua | |
function tests.errors(self) | |
local a, b = self.gsub("<eof>", "(%p)", "%%%1") | |
self:assert(a == "%<eof%>") | |
self:assert(b == 2) | |
end | |
-- via gc.lua | |
function tests.gc(self) | |
local limit = 5000 | |
local a = "a" | |
local contCreate = 0 | |
while contCreate <= limit do | |
a = contCreate .. "b"; | |
a = self.gsub(a, '(%d%d*)', string.upper) | |
a = "a" | |
contCreate = contCreate+1 | |
end | |
self:assert(a == "a") | |
self:assert(contCreate == 5001) | |
foo = nil | |
x = "01234567890123456789012345678901234567890123456789012345678901234567890123456789" | |
self:assert(string.len(x)==80, "length") | |
s = '' | |
n = 0 | |
k = math.min(300, (math.maxinteger // 80) // 2) | |
while n < k do s = s..x; n=n+1; j=tostring(n) end | |
self:assert(string.len(s) == k*80, "length 2") | |
s = string.sub(s, 1, 10000) | |
s, i = self.gsub(s, '(%d%d%d%d)', '') | |
self:assert(s == "", "s") | |
self:assert(i == 2500, "#matches") | |
end | |
-- via literals.lua | |
function tests.literals(self) | |
local env = { ["require"] = require } | |
local dostring = function (x) return assert(load(x, "", "t", env))() end | |
local prog = [[ | |
a = 1 -- a comment | |
b = 2 | |
x = [=[ | |
hi | |
]=] | |
y = "\ | |
hello\r\n\ | |
" | |
return require"debug".getinfo(1).currentline | |
]] | |
for _, n in pairs{"\n", "\r", "\n\r", "\r\n"} do | |
local prog, nn = self.gsub(prog, "\n", n) | |
self:assert(dostring(prog) == nn, "dostring") | |
self:assert(env.x == "hi\n" and env.y == "\nhello\r\n\n", "x&y") | |
end | |
end | |
-- via math.lua | |
function tests.math(self) | |
local eqT = function (a,b) | |
return a == b and math.type(a) == math.type(b) | |
end | |
local minint = math.mininteger | |
local maxint = math.maxinteger | |
local incd = function (n) | |
local s = string.format("%d", n) | |
s = self.gsub(s, "%d$", function (d) | |
self:assert(d ~= '9', "9") | |
return string.char(string.byte(d) + 1) | |
end) | |
return s | |
end | |
-- 'tonumber' with overflow by 1 | |
self:assert(eqT(tonumber(incd(maxint)), maxint + 1.0), "maxint") | |
self:assert(eqT(tonumber(incd(minint)), minint - 1.0), "minint") | |
-- large numbers | |
self:assert(eqT(tonumber("1"..string.rep("0", 30)), 1e30), "1e30") | |
self:assert(eqT(tonumber("-1"..string.rep("0", 30)), -1e30), "-1e30") | |
end | |
-- via main.lua | |
function tests.main(self) | |
local s = [=[ -- | |
function f ( x ) | |
local a = [[ | |
xuxu | |
]] | |
local b = "\ | |
xuxu\n" | |
if x == 11 then return 1 + 12 , 2 + 20 end --[[ test multiple returns ]] | |
return x + 1 | |
--\\ | |
end | |
return( f( 100 ) ) | |
assert( a == b ) | |
do return f( 11 ) end ]=] | |
s = self.gsub(s, ' ', '\n\n') | |
local expected = [=[ | |
-- | |
function | |
f | |
( | |
x | |
) | |
local | |
a | |
= | |
[[ | |
xuxu | |
]] | |
local | |
b | |
= | |
"\ | |
xuxu\n" | |
if | |
x | |
== | |
11 | |
then | |
return | |
1 | |
+ | |
12 | |
, | |
2 | |
+ | |
20 | |
end | |
--[[ | |
test | |
multiple | |
returns | |
]] | |
return | |
x | |
+ | |
1 | |
--\\ | |
end | |
return( | |
f( | |
100 | |
) | |
) | |
assert( | |
a | |
== | |
b | |
) | |
do | |
return | |
f( | |
11 | |
) | |
end | |
]=] | |
self:assert(s == expected, "double newlines") | |
end | |
-- via utf8.lua | |
function tests.utf8(self) | |
local len = function (s) | |
return #self.gsub(s, "[\x80-\xBF]", "") | |
end | |
local check = function (s, t) | |
local l = utf8.len(s) | |
assert(#t == l and len(s) == l) | |
end | |
local s = "hello World" | |
local t = {string.byte(s, 1, -1)} | |
for i = 1, utf8.len(s) do assert(t[i] == string.byte(s, i)) end | |
check(s, t) | |
check("汉字/漢字", {27721, 23383, 47, 28450, 23383,}) | |
check("", {}) | |
s = "\0 \x7F\z | |
\xC2\x80 \xDF\xBF\z | |
\xE0\xA0\x80 \xEF\xBF\xBF\z | |
\xF0\x90\x80\x80 \xF4\x8F\xBF\xBF" | |
s = self.gsub(s, " ", "") | |
check(s, {0,0x7F, 0x80,0x7FF, 0x800,0xFFFF, 0x10000,0x10FFFF}) | |
x = "日本語a-4\0éó" | |
check(x, {26085, 26412, 35486, 97, 45, 52, 0, 233, 243}) | |
check("𣲷𠜎𠱓𡁻𠵼ab𠺢", | |
{0x23CB7, 0x2070E, 0x20C53, 0x2107B, 0x20D7C, 0x61, 0x62, 0x20EA2,}) | |
check("𨳊𩶘𦧺𨳒𥄫𤓓\xF4\x8F\xBF\xBF", | |
{0x28CCA, 0x29D98, 0x269FA, 0x28CD2, 0x2512B, 0x244D3, 0x10ffff}) | |
end | |
-- via strings.lua (do not work on my system, os.setlocale always fails) | |
--[[ | |
function tests.strings(self) | |
local locales = { "ptb", "pt_BR.iso88591", "ISO-8859-1" } | |
local function trylocale (w) | |
for i = 1, #locales do | |
if os.setlocale(locales[i], w) then | |
print(string.format("'%s' locale set to '%s'", w, locales[i])) | |
return locales[i] | |
end | |
end | |
print(string.format("'%s' locale not found", w)) | |
return false | |
end | |
if trylocale("collate") then | |
self:assert("alo" < "álo" and "álo" < "amo") | |
end | |
if trylocale("ctype") then | |
self:assert(self.gsub("áéíóú", "%a", "x") == "xxxxx") | |
self:assert(self.gsub("áÁéÉ", "%l", "x") == "xÁxÉ") | |
self:assert(self.gsub("áÁéÉ", "%u", "x") == "áxéx") | |
end | |
self:assert(false) | |
end]] | |
-- via pm.lua | |
function tests.pm(self) | |
local f1 = function(s, p) | |
p = self.gsub(p, "%%([0-9])", function (s) | |
return "%" .. (tonumber(s)+1) | |
end) | |
p = self.gsub(p, "^(^?)", "%1()", 1) | |
p = self.gsub(p, "($?)$", "()%1", 1) | |
local t = {string.match(s, p)} | |
return string.sub(s, t[1], t[#t] - 1) | |
end | |
self:assert(f1('alo alx 123 b\0o b\0o', '(..*) %1') == "b\0o b\0o", "\\0") | |
self:assert(f1('axz123= 4= 4 34', '(.+)=(.*)=%2 %1') == '3= 4= 4 3', "3443") | |
self:assert(f1('=======', '^(=*)=%1$') == '=======', "equalssign") | |
local range | |
range = function (i, j) | |
if i <= j then | |
return i, range(i+1, j) | |
end | |
end | |
local abc = string.char(range(0, 255)); | |
local strset = function (p) | |
local res = {s=''} | |
self.gsub(abc, p, function (c) res.s = res.s .. c end) | |
return res.s | |
end; | |
self:assert(string.len(strset('[\200-\210]')) == 11, "11") | |
self:assert(strset('[a-z]') == "abcdefghijklmnopqrstuvwxyz", "lower latin") | |
self:assert(strset('[a-z%d]') == strset('[%da-uu-z]'), "a-z%d") | |
self:assert(strset('[a-]') == "-a", "a-") | |
self:assert(strset('[^%W]') == strset('[%w]'), "^%W") | |
self:assert(strset('[]%%]') == '%]', "%]") | |
self:assert(strset('[a%-z]') == '-az', "-az") | |
self:assert(strset('[%^%[%-a%]%-b]') == '-[]^ab', "-[]^ab") | |
self:assert(strset('%Z') == strset('[\1-\255]'), "%Z") | |
self:assert(strset('.') == strset('[\1-\255%z]'), ".") | |
self:assert(self.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo', "xlo") | |
self:assert(self.gsub('alo úlo ', ' +$', '') == 'alo úlo', "trim") | |
self:assert(self.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo', "double trim") | |
self:assert(self.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ', "alo alo 123") | |
local t = "abç d" | |
local a, b = self.gsub(t, '(.)', '%1@') | |
self:assert('@'..a == self.gsub(t, '', '@'), "@str") | |
self:assert(b == 6, "@6") -- adjusted to 6, because this file (unlike the ISO-8859-1 test files) is UTF-8 encoded | |
a, b = self.gsub('abçd', '(.)', '%0@', 2) | |
self:assert(a == 'a@b@çd' and b == 2, "a@b") | |
self:assert(self.gsub('alo alo', '()[al]', '%1') == '12o 56o', "()[al]") | |
self:assert(self.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") == | |
"xyz=abc-abc=xyz", "abc=xyz") | |
self:assert(self.gsub("abc", "%w", "%1%0") == "aabbcc", "aabbcc") | |
self:assert(self.gsub("abc", "%w+", "%0%1") == "abcabc", "abcabc") | |
self:assert(self.gsub('áéí', '$', '\0óú') == 'áéí\0óú', "áéí") | |
self:assert(self.gsub('', '^', 'r') == 'r', "r") | |
self:assert(self.gsub('', '$', 'r') == 'r', "r") | |
self:assert(self.gsub("a b cd", " *", "-") == "-a-b-c-d-", "-a-b-c-d-") | |
self:assert(self.gsub("um (dois) tres (quatro)", "(%(%w+%))", string.upper) == | |
"um (DOIS) tres (QUATRO)", "um tres") | |
do | |
local env = {} | |
local set = function (n,v) env[n] = v end | |
self.gsub("a=roberto,roberto=a", "(%w+)=(%w%w*)", set) | |
self:assert(env.a=="roberto" and env.roberto=="a", "env assign") | |
end | |
local f = function (a,b) return self.gsub(a,'.',b) end | |
self:assert(self.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == | |
"trocar tudo em bbbbb é alalalalalal", "alalalalalal") | |
local dostring = function (s) return load(s, "")() or "" end | |
self:assert(self.gsub("alo $a='x'$ novamente $return a$", | |
"$([^$]*)%$", | |
dostring) == "alo novamente x", "novamente") | |
local x = self.gsub("$x=string.gsub('alo', '.', string.upper)$ assim vai para $return x$", | |
"$([^$]*)%$", dostring) | |
self:assert(x == ' assim vai para ALO', "assim") | |
local t = {} | |
local s = 'a alo jose joao' | |
local r = self.gsub(s, '()(%w+)()', function (a,w,b) | |
self:assert(string.len(w) == b-a, "b-a"); | |
t[a] = b-a; | |
end) | |
self:assert(s == r and t[1] == 1 and t[3] == 3 and t[7] == 4 and t[13] == 4, "s==r") | |
local isbalanced = function (s) | |
return string.find(self.gsub(s, "%b()", ""), "[()]") == nil | |
end | |
self:assert(isbalanced("(9 ((8))(\0) 7) \0\0 a b ()(c)() a"), "isbalanced") | |
self:assert(not isbalanced("(9 ((8) 7) a b (\0 c) a"), "not isbalanced") | |
self:assert(self.gsub("alo 'oi' alo", "%b''", '"') == 'alo " alo', "alo oi") | |
local t = {"apple", "orange", "lime"; n=0} | |
self:assert(self.gsub("x and x and x", "x", function () t.n=t.n+1; return t[t.n] end) | |
== "apple and orange and lime", "x and x and x") | |
t = {n=0} | |
self.gsub("first second word", "%w%w*", function (w) t.n=t.n+1; t[t.n] = w end) | |
self:assert(t[1] == "first" and t[2] == "second" and t[3] == "word" and t.n == 3, "t[1]==first") | |
t = {n=0} | |
self:assert(self.gsub("first second word", "%w+", | |
function (w) t.n=t.n+1; t[t.n] = w end, 2) == "first second word", "first 2nd word") | |
self:assert(t[1] == "first" and t[2] == "second" and t[3] == nil, "first second nil") | |
local checkerror = function (msg, f, ...) | |
local s, err = pcall(f, ...) | |
self:assert(not s and string.find(err, msg), "checkerror " .. msg) | |
end | |
checkerror("invalid replacement value %(a table%)", | |
self.gsub, "alo", ".", {a = {}}) | |
checkerror("invalid capture index %%2", self.gsub, "alo", ".", "%2") | |
checkerror("invalid capture index %%0", self.gsub, "alo", "(%0)", "a") | |
checkerror("invalid capture index %%1", self.gsub, "alo", "(%1)", "a") | |
checkerror("invalid use of '%%'", self.gsub, "alo", ".", "%x") | |
-- bug since 2.5 (C-stack overflow) | |
do | |
local f = function (size) | |
local s = string.rep("a", size) | |
local p = string.rep(".?", size) | |
return pcall(string.match, s, p) | |
end | |
local r, m = f(80) | |
self:assert(r and #m == 80, "r and #m == 80") | |
r, m = f(200000) | |
self:assert(not r and string.find(m, "too complex"), "too complex") | |
end | |
local a = string.rep('a', 300000) | |
self:assert(string.find(a, '^a*.?$'), "^a*.?$") | |
self:assert(not string.find(a, '^a*.?b$'), "^a*.?b$") | |
self:assert(string.find(a, '^a-.?$'), "^a-.?$") | |
-- bug in 5.1.2 | |
a = string.rep('a', 10000) .. string.rep('b', 10000) | |
self:assert(not pcall(self.gsub, a, 'b'), "rep") | |
-- recursive nest of gsubs | |
function rev (s) | |
return self.gsub(s, "(.)(.+)", function (c,s1) return rev(s1)..c end) | |
end | |
local x = "abcdef" | |
self:assert(rev(rev(x)) == x, "rev") | |
-- gsub with tables | |
self:assert(self.gsub("alo alo", ".", {}) == "alo alo", "aloalo{}") | |
self:assert(self.gsub("alo alo", "(.)", {a="AA", l=""}) == "AAo AAo", "AAo AAo 1") | |
self:assert(self.gsub("alo alo", "(.).", {a="AA", l="K"}) == "AAo AAo", "AAo AAo 2") | |
self:assert(self.gsub("alo alo", "((.)(.?))", {al="AA", o=false}) == "AAo AAo", "AAo AAo 3") | |
self:assert(self.gsub("alo alo", "().", {'x','yy','zzz'}) == "xyyzzz alo", "xyyzzz alo") | |
t = {}; setmetatable(t, {__index = function (t,s) return string.upper(s) end}) | |
self:assert(self.gsub("a alo b hi", "%w%w+", t) == "a ALO b HI", "a ALO b HI") | |
-- tests for `%f' (`frontiers') | |
self:assert(self.gsub("aaa aa a aaa a", "%f[%w]a", "x") == "xaa xa x xaa x", "xaa xa x xaa x") | |
self:assert(self.gsub("[[]] [][] [[[[", "%f[[].", "x") == "x[]] x]x] x[[[", "x[]] x]x] x[[[") | |
self:assert(self.gsub("01abc45de3", "%f[%d]", ".") == ".01abc.45de.3", ".01abc.45de.3") | |
self:assert(self.gsub("01abc45 de3x", "%f[%D]%w", ".") == "01.bc45 de3.", "01.bc45 de3.") | |
self:assert(self.gsub("function", "%f[\1-\255]%w", ".") == ".unction", ".unction") | |
self:assert(self.gsub("function", "%f[^\1-\255]", ".") == "function.", "function.") | |
end | |
function tests.string_library_doc(self) | |
local a, b = string.gsub("Hello banana", "banana", "Lua user") | |
self:assert(a == "Hello Lua user" and b == 1) | |
a, b = string.gsub("banana", "a", "A", 2) -- limit substitutions made to 2 | |
self:assert(a == "bAnAna" and b == 2) | |
a, b = string.gsub("banana", "(an)", "%1-") -- capture any occurences of "an" and replace | |
self:assert(a == "ban-an-a" and b == 2) | |
a, b = string.gsub("banana", "a(n)", "a(%1)") -- brackets around n's which follow a's | |
self:assert(a == "ba(n)a(n)a" and b == 2) | |
a, b = string.gsub("banana", "(a)(n)", "%2%1") -- reverse any "an"s | |
self:assert(a == "bnanaa" and b == 2) | |
a, b = string.gsub("Hello Lua user", "(%w+)", print) -- print any words found | |
self:assert(a == "Hello Lua user" and b == 3) | |
a, b = string.gsub("Hello Lua user", "(%w+)", function(w) return string.len(w) end) -- replace with lengths | |
self:assert(a == "5 3 4" and b == 3) | |
a, b = string.gsub("banana", "(a)", string.upper) -- make all "a"s found uppercase | |
self:assert(a == "bAnAnA" and b == 3) | |
a, b = string.gsub("banana", "(a)(n)", function(a,b) return b..a end) -- reverse any "an"s | |
self:assert(a == "bnanaa" and b == 2) | |
a, b = string.gsub("The big {brown} fox jumped {over} the lazy {dog}.","{(.-)}", function(a) print(a) end ) | |
self:assert(a == "The big {brown} fox jumped {over} the lazy {dog}." and b == 3) | |
a, b = string.gsub("The big {brown} fox jumped {over} the lazy {dog}.","{(.*)}", function(a) print(a) end ) | |
self:assert(a == "The big {brown} fox jumped {over} the lazy {dog}." and b == 1) | |
end | |
function tests.custom(self) | |
-- TODO https://www.lua.org/manual/5.3/manual.html#6.4.1 | |
-- TODO https://en.wikibooks.org/wiki/Lua_Programming/Standard_libraries#String_matching | |
-- TODO http://lua-users.org/wiki/StringLibraryTutorial | |
local a, b = self.gsub("hello (hi) world", "%b()", "") | |
self:assert(a == "hello world" and b == 1) | |
a, b = self.gsub("hello (hi) (ho) world", "%b()", "") | |
self:assert(a == "hello world" and b == 2) | |
a, b = self.gsub("hello (hi) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1 1 world" and b == 2) | |
a, b = self.gsub("hello (h(i)) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1 1 world" and b == 2) | |
a, b = self.gsub("hello (h(i())) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1 1 world" and b == 2) | |
a, b = self.gsub("hello (h(^C))) (ho) world", "%b()", "1") | |
self:assert(a == "hello 1) 1 world" and b == 2) | |
end | |
--[[ | |
actually run these test cases | |
]] | |
for k, test_function in pairs(tests) do | |
if type(test_function) == "function" and k ~= "gsub" and k ~= "assert" then | |
print(("Running %s tests"):format(k)) | |
test_function(tests) | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment