Skip to content

Instantly share code, notes, and snippets.

@knazarov
Created September 23, 2020 08:00
Show Gist options
  • Save knazarov/8102e558bac54cdd255e9025eee8aa11 to your computer and use it in GitHub Desktop.
Save knazarov/8102e558bac54cdd255e9025eee8aa11 to your computer and use it in GitHub Desktop.
Full text search example for Tarantool
#!/usr/bin/env tarantool
local pickle = require('pickle')
local yaml = require('yaml')
function trivec(str)
str = string.lower(str)
local vec = ""
if #str < 3 then
return ""
end
local res = 0
for i = 1,#str-2 do
local c1 = string.sub(str, i, i)
local c2 = string.sub(str, i+1, i+1)
local c3 = string.sub(str, i+2, i+2)
local val = string.byte(c1) * 10000 +
string.byte(c2) * 100 + string.byte(c3)
res = bit.bor(res, bit.lshift(1ULL, val%64))
end
return pickle.pack('Q', res)
end
box.cfg{}
box.schema.space.create('account', {if_not_exists=true})
box.space.account:format({ {name='id',type='unsigned'},
{name='name',type='string'},
{name='trivec',type='string'},
})
box.space.account:create_index(
'primary',
{
unique = true,
parts = { {field = 'id', type = 'unsigned'}},
if_not_exists=true})
box.space.account:create_index(
'hash',
{unique=false, type='BITSET', parts={3,type='string'},if_not_exists=true})
function put_user(id, name)
return box.space.account:put({id, name, trivec(name)})
end
function find_user(name_part)
local vec = trivec(name_part)
local res = {}
for _, v in box.space.account.index.hash:pairs(vec, {iterator='BITS_ALL_SET'}) do
if string.match(string.lower(v.name), string.lower(name_part)) then
table.insert(res, v)
end
end
return res
end
put_user(1, "Konstantin Nazarov")
put_user(2, "Konstantin Osipov")
print(yaml.encode(find_user('onst')))
@ochaton
Copy link

ochaton commented Feb 15, 2021

WTF? Why is it working?

@ochaton
Copy link

ochaton commented Feb 15, 2021

Modifying constants in hash function seems to change nothing!

#!/usr/bin/env tarantool
require 'strict'.on()
local utf8 = require 'utf8'

box.cfg{}
box.once('schema:v1', function()
	box.schema.space.create('accounts', {
		format = {
			{ name = 'id',   type = 'unsigned' },
			{ name = 'name', type = 'string'   },
			{ name = 'bpf',  type = 'string'   },
		}
	})

	box.space.accounts:create_index('primary', {
		unique = true,
		parts = { 'id' },
	})

	box.space.accounts:create_index('bpf', {
		unique = false,
		type = 'BITSET',
		parts = { 'bpf' },
	})

	print("space accounts created")
end)

local api = {}
rawset(_G, 'api', api)

local function trivec(str)
	if #str < 3 then return "" end

	local res = 0
	for i = 1,#str-2 do
		local b1 = str:sub(i, i):byte()
		local b2 = str:sub(i+1, i+1):byte()
		local b3 = str:sub(i+2, i+2):byte()
		local val = b1 * 0xffff + b2 * 0xff + b3

		res = bit.bor(res, bit.lshift(1, val%64))
	end
	return require'pickle'.pack('Q', res)
end

function api.register(id, name)
	return box.space.accounts:replace{ id, name, trivec(utf8.lower(name)) }
end

function api.find(name)
	name = utf8.lower(name)

	return box.space.accounts.index.bpf
		:pairs({ trivec(name) }, {iterator='BITS_ALL_SET'})
		:take(1000) -- hard limit
		:grep(function(account)
			return utf8.lower(account.name):find(name)
		end)
		:totable()
end

require 'console'.start()
os.exit(0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment