Created
April 23, 2011 07:58
-
-
Save EmmanuelOga/938457 to your computer and use it in GitHub Desktop.
Lua subsequence match with a c extension.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local function bench(name, count, cbk) | |
local start = os.clock() | |
for i = 1, count do cbk() end | |
print(name, os.clock() - start) | |
end | |
local function ssm(pattern, str, start) | |
local pidx, pb, sb = start or 1, {pattern:byte(1, #pattern)}, {str:byte(1, #str)} | |
for i = 1, #str do | |
if sb[i] == pb[pidx] then pidx = pidx + 1 end | |
if pidx > #pb then return true end | |
end | |
return pidx | |
end | |
local ssmC = require('subsequence').subsequenceMatch | |
local LONG = string.rep("z", 100).."app/models/user.rb" | |
function test(cbk) | |
cbk("" , "bananas" ) | |
cbk("as" , "bananas" ) | |
cbk("bas", "bananas" ) | |
cbk("x" , "bananas" ) | |
cbk("caf", "bananas" ) | |
cbk("bfa", "bananas" ) | |
cbk("bfa", "fanatic" ) | |
cbk("bfa", "fanatic" , 2 ) | |
cbk("bax", "bananasx" , 3 ) | |
cbk("bax", "bananas" , 3 ) | |
cbk(string.rep("a/m/user", 10), LONG) | |
cbk("bfa123123123123123123123123123123", "bananas123123123123123123123123123123" ) | |
cbk("b123123123123123123123123123123fa", "fa123123123123123123123123123123natic" ) | |
cbk("bf123123123123123123123123123123a", "fa123123123123123123123123123123natic" , 2 ) | |
cbk("b123123123123123123123123123123ax", "b123123123123123123123123123123ananasx" , 3 ) | |
cbk("a/m/user", LONG) | |
cbk(string.rep("a/m/user", 10), LONG) | |
end | |
local TIMES = 50000 | |
bench("lua", TIMES, function() test(ssm) end) | |
bench("C ext", TIMES, function() test(ssmC) end) | |
--[[ | |
RESULTS: | |
▸ lua -v | |
Lua 5.1.4 Copyright (C) 1994-2008 Lua.org, PUC-Rio | |
▸ lua ssbench.lua | |
lua 5.94 | |
C ext 0.24 | |
▸ luajit-2.0.0-beta6 -v | |
LuaJIT 2.0.0-beta6 -- Copyright (C) 2005-2011 Mike Pall. http://luajit.org/ | |
▸ luajit-2.0.0-beta6 ssbench.lua | |
lua 1.36 | |
C ext 0.17 | |
]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Built with: | |
gcc -fPIC -shared -I/usr/local/include -O3 -Wall -Wextra subsequence.c -o subsequence.so | |
*/ | |
#include "lua.h" | |
#include "lualib.h" | |
#include "lauxlib.h" | |
#include "string.h" | |
/* returns idx of first char on the pattern that could not be found, or -1 if it found the pattern entirely. */ | |
static inline int subsequenceMatch(const char *str, const char *subseq, int idx) { | |
int lsubseq = (int)strlen(subseq); | |
if (str && subseq && idx < lsubseq) { | |
const char *pSeq = subseq + (idx < 0 ? 0 : idx); | |
const char *pSeqEnd = subseq + lsubseq; | |
while (*str) { | |
if (*str++ == *pSeq && ++pSeq >= pSeqEnd) { return -1; } | |
} | |
return pSeq - subseq; | |
} | |
return -1; | |
} | |
static int l_subsequenceMatch(lua_State *L) { | |
int idx = subsequenceMatch(luaL_checkstring(L, 2), luaL_checkstring(L, 1), lua_tonumber(L, 3) - 1); | |
if (idx == -1) { | |
lua_pushboolean(L, 1); | |
} else { | |
lua_pushnumber(L, idx + 1); | |
}; | |
return 1; | |
} | |
static const struct luaL_Reg subsequence [] = { | |
{"subsequenceMatch", l_subsequenceMatch}, | |
{NULL, NULL} /* sentinel */ | |
}; | |
int luaopen_subsequence(lua_State *L) { | |
luaL_register(L, "subsequence", subsequence); | |
return 1; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local ssm = require('subsequence').subsequenceMatch | |
local function test_match(expected, string, pattern, idx) | |
local returned = ssm(pattern, string, idx) | |
local result = "Expected: " .. tostring(expected) .. " Returned: " .. tostring(returned) | |
print(result) | |
assert(expected == returned) | |
end | |
test_match(true , "bananas" , "") | |
test_match(true , "bananas" , "as") | |
test_match(true , "bananas" , "bas") | |
test_match(1 , "bananas" , "x") | |
test_match(1 , "bananas" , "caf") | |
test_match(2 , "bananas" , "bfa") | |
test_match(1 , "fanatic" , "bfa") | |
test_match(true , "fanatic" , "bfa", 2) | |
test_match(true, "bananasx" , "bax", 3) | |
test_match(3, "bananas" , "bax", 3) | |
test_match(true, "bananas" , "bax", 4) | |
test_match(true, "bananas" , "bax", 5) | |
print("OK") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment