Skip to content

Instantly share code, notes, and snippets.

@xiejiangzhi
Last active April 12, 2022 01:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xiejiangzhi/29aa38117dda2c4723ec67b98cacdbb4 to your computer and use it in GitHub Desktop.
Save xiejiangzhi/29aa38117dda2c4723ec67b98cacdbb4 to your computer and use it in GitHub Desktop.
local Names = {}
local CharIndex = {} -- { [index_str] = { word1, word2, word3, ...}, ... }
local CharIndexPos = {} -- { [index_str] = { char_pos1, char_pos2, char_pos3, ...}, ... }
local MaxIndexLen = 2 -- 1: start with {x}, 2: start with {xx}, 3: start with {xxx}
-- word: asdf asdf zxcv
-- fuzzy match: adfasf, aszx, asdfasdf, as as
local RandChars = 'abcdefghijklnmopqrstuvwxyz'
local function rand_word(n)
if not n then n = math.random(3, 10) end
local cs = {}
for _ = 1, n do
local idx = math.random(#RandChars)
cs[#cs + 1] = RandChars:sub(idx, idx)
end
return table.concat(cs)
end
-- init test data
for _ = 1, 1000000 do
local n = math.random(3)
local ns = {}
for _ = 1, n do
ns[#ns + 1] = rand_word()
end
Names[#Names + 1] = table.concat(ns, ' ')
end
Names[#Names + 1] = 'abcabc'
Names[#Names + 1] = 'abcxabc'
-- init index
local function index_name(name, id)
local index_data = {}
local added = {}
for i = 1, #name do
for j = 0, MaxIndexLen - 1 do
local c = name:sub(i, i + j)
if not added[c] then
added[c] = true
local cg = CharIndex[c]
local cpg = CharIndexPos[c]
if not cg then
cg = {}
CharIndex[c] = cg
cpg = {}
CharIndexPos[c] = cpg
end
cg[#cg + 1] = id
cpg[#cpg + 1] = i
end
end
end
return index_data
end
for i, name in ipairs(Names) do
index_name(name, i)
end
-- start_index_len: name must include query_str:sub(1, start_index_len), max is MaxIndexLen
local function search_str(query_str, start_index_len)
if not start_index_len or start_index_len < 1 then start_index_len = 1 end
if start_index_len > MaxIndexLen then start_index_len = MaxIndexLen end
local qidx = query_str:sub(1, start_index_len)
local last_list = CharIndex[qidx]
local last_pos = CharIndexPos[qidx]
print(#last_list)
for i = 2, #query_str do
local c = query_str:sub(i, i)
local r = {}
local rpos = {}
if c == '.' then
for wi, id in ipairs(last_list) do
r[#r + 1] = id
rpos[#rpos + 1] = last_pos[wi] + 1
end
else
for wi, id in ipairs(last_list) do
local s = Names[id]
local idx = s:find(c, last_pos[wi])
if idx then
r[#r + 1] = id
rpos[#rpos + 1] = idx + 1
end
end
end
last_list, last_pos = r, rpos
end
local r = {}
for _, id in ipairs(last_list) do
r[#r + 1] = Names[id]
end
return r
end
local function time_test(name, fn, ...)
local st = os.clock()
local r = fn(...)
local time = (os.clock() - st) * 1000
print(string.format("%s cost %.4fms", name, time))
return r
end
local test_s = {
'aa', 'aaa', 'aaaa', 'abc', 'abcd', 'abc abc',
Names[1], Names[#Names], 'abc.abc'
}
for _, str in ipairs(test_s) do
local r = time_test('search: "'..str..'"', search_str, str, 3)
if r then
local pws = {}
for i, pw in ipairs(r) do
pws[#pws + 1] = pw
if i >= 5 then
break
end
end
print(string.format('[OK] "%s" (%i): %s', str, #r, table.concat(pws, ', ')))
else
print('[FAIL] not found "'..str..'"')
end
print('')
end
@xiejiangzhi
Copy link
Author

xiejiangzhi commented Apr 11, 2022

for 1000000 items.

15565
search: "aa" cost 4.0000ms
[OK] "aa" (15565): sdwaagr lmafe, nufuesuqc gvaaq nmv, pltspecbqj ykjaraabh ytbks, fbogk chcnvws opxtaa, ipggbvqf mbj vaahizqe

15565
search: "aaa" cost 4.0000ms
[OK] "aaa" (15565): sdwaagr lmafe, nufuesuqc gvaaq nmv, pltspecbqj ykjaraabh ytbks, fbogk chcnvws opxtaa, ipggbvqf mbj vaahizqe

15565
search: "aaaa" cost 5.0000ms
[OK] "aaaa" (3533): sdwaagr lmafe, cne baaryons zavrshpcpn, jph uulaafag habcr, qpmbcq zaaabthz izoct, ifhurjcaa hsv facjeipje

15963
search: "abc" cost 4.0000ms
[OK] "abc" (3509): thmd niyvkabbcm imdcmceg, tnrndksmab aicgbh, jph uulaafag habcr, wnabvsx srhbqzchow jjxwbr, qpmbcq zaaabthz izoct

15963
search: "abcd" cost 3.0000ms
[OK] "abcd" (614): thmd niyvkabbcm imdcmceg, vtab cnaudxjmv isznqhfh, zzxffabcq glrdxmxn, tjsagoabw rwubnlozzc zudpua, abcuzxh afigibdl

15963
search: "abc abc" cost 2.0000ms
[OK] "abc abc" (8): nabc evutsbc csambcvg, abtvscfz fagkobgd rcbacn, yesabedctq abgkhpcfny gquw, tdyxpprwe xkkabcdry tarqbzftc, abxc ktltptna jyflpvbc

16124
search: "ptcritlo bzs zhjn" cost 5.0000ms
[OK] "ptcritlo bzs zhjn" (1): ptcritlo bzs zhjn

15963
search: "abcxabc" cost 5.0000ms
[OK] "abcxabc" (1): abcxabc

15963
search: "abc.abc" cost 4.0000ms
[OK] "abc.abc" (16): aaboxtfkh xwhowqxcoa ubezuc, nabc evutsbc csambcvg, abcrvlyau hvbqkdc dim, gjbekmu tnabccpla hsibhyuocp, abtvscfz fagkobgd rcbacn

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment