Last active
September 12, 2016 22:47
-
-
Save itamarhaber/c1ffda42d86b314ea701 to your computer and use it in GitHub Desktop.
Redis Quadtrees in Hash - finally motivated because of http://stackoverflow.com/questions/32911604/intersection-of-two-or-more-sorted-sets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
foo@bar:~/src/quadtree$ redis-cli zcard x | |
(integer) 100000 | |
foo@bar:~/src/quadtree$ redis-cli zcard y | |
(integer) 100000 | |
foo@bar:~/src/quadtree$ redis-cli hlen qt | |
(integer) 56325 | |
foo@bar:~/src/quadtree$ redis-cli script load "`cat rqtih.lua`" | |
"84f91fddf261e9a2ef14ff4b417443eed3dc4b2b" | |
foo@bar:~/src/quadtree$ redis-benchmark -n 100 -r 1000000 evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 3 x y tmp zquery __rand_int__ __rand_int__ __rand_int__ __rand_int__ | |
====== evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 3 x y tmp zquery __rand_int__ __rand_int__ __rand_int__ __rand_int__ ====== | |
100 requests completed in 41.50 seconds | |
50 parallel clients | |
3 bytes payload | |
keep alive: 1 | |
1.00% <= 404 milliseconds | |
2.00% <= 20128 milliseconds | |
3.00% <= 20523 milliseconds | |
8.00% <= 20524 milliseconds | |
52.00% <= 20966 milliseconds | |
100.00% <= 20966 milliseconds | |
2.41 requests per second | |
foo@bar:~/src/quadtree$ redis-benchmark -n 100 -r 1000000 evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 1 qt query __rand_int__ __rand_int__ __rand_int__ __rand_int__ | |
====== evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 1 qt query __rand_int__ __rand_int__ __rand_int__ __rand_int__ ====== | |
100 requests completed in 1.36 seconds | |
50 parallel clients | |
3 bytes payload | |
keep alive: 1 | |
1.00% <= 37 milliseconds | |
2.00% <= 638 milliseconds | |
3.00% <= 665 milliseconds | |
9.00% <= 666 milliseconds | |
24.00% <= 667 milliseconds | |
43.00% <= 668 milliseconds | |
52.00% <= 679 milliseconds | |
59.00% <= 680 milliseconds | |
100.00% <= 680 milliseconds | |
73.69 requests per second |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- rqtih.lua - Redis Quadtree in Hash | |
-- | |
-- TODO: | |
-- Extend to store boxes and polygons instead of just points | |
-- More dimensions - kd-trees | |
-- Get rid of object-oriented design - flatten everything to nested loops | |
-- JSON, really? msgpck or even better - a custom bit-level encoder (otoh, readable and implemented in C...) | |
-- node capacity should be configurable as well | |
-- Currently requires declaring the qt's min/max dimensions a priori | |
-- Use HMGET and HMSET | |
-- Missing QT functionality: removePoint, exists, rebuild, ... | |
-- Add member uniqueness and then support union, intersect & diff on QTs | |
-- ... | |
-- Do the entire thing in Redis core and add this as an internal data type - win! | |
-- | |
-- Copyright Itamar Haber & Redis Labs. Provided under the 3-Clause BSD licene. | |
-- Helper functions | |
-- add a call to _trace in strategic code paths, open a MONITOR session to view | |
-- output interleaved with the actual calls to redis.call + timing | |
local function _trace(...) | |
local msg = "_trace:" | |
for i, v in ipairs(arg) do | |
msg = msg .. ' [' .. i .. ']: ' .. v | |
end | |
return redis.call('ECHO', msg) | |
end | |
-- returns true if two boxes intersect | |
local function _intersect(ax1,ay1,aw,ah, bx1,by1,bw,bh) | |
local ax2,ay2,bx2,by2 = ax1 + aw, ay1 + ah, bx1 + bw, by1 + bh | |
return not (bx1 > ax2 or bx2 < ax1 or by1 > ay2 or by2 < ay1) | |
end | |
-- returns true if a is contained in b | |
local function _contained(ax1,ay1,aw,ah, bx1,by1,bw,bh) | |
local ax2,ay2,bx2,by2 = ax1 + aw, ay1 + ah, bx1 + bw, by1 + bh | |
return bx1 <= ax1 and bx2 >= ax2 and by1 <= ay1 and by2 >= ay2 | |
end | |
-- Point class | |
-- An arbitrary point in 2D space | |
local Point = {} | |
Point.__index = Point | |
Point.__tostring = function(cls) | |
return '{ "Point" : { "x" : "' .. (cls.x or "nil") .. '", "y" : "' .. (cls.y or "nil") .. '", "data" :"' .. (cls.data or nil) .. '" } }' | |
end | |
setmetatable(Point, { | |
__call = function(cls, ...) | |
return cls.new(...) | |
end, | |
}) | |
function Point.new(x, y, data) | |
local self = setmetatable({}, Point) | |
self.x, self.y, self.data = x, y, data | |
return self | |
end | |
-- Quadtree class | |
local Quadtree = {} | |
Quadtree.__index = Quadtree | |
Quadtree.__tostring = function(cls) | |
local reply = | |
'{ "Quadtree" :' .. | |
' { "x" = "' .. (cls.x or "nil") .. | |
'", "y" = "' .. (cls.y or "nil") .. | |
'", "w" = "' .. (cls.w or "nil") .. | |
'", "h" = "' .. (cls.h or "nil") .. | |
'", "points" = [ ' | |
local spoints | |
for _, v in pairs(cls.points) do | |
if spoints then | |
spoints = spoints .. ", " .. tostring(v) | |
else | |
spoints = tostring(v) | |
end | |
end | |
reply = reply .. (spoints or "") .. " ] } }" | |
return reply | |
end | |
setmetatable(Quadtree, { | |
__call = function (cls, ...) | |
return cls.new(...) | |
end, | |
}) | |
-- Initializes the quadtree. Call with dimension arguments to create a new one | |
-- or without them to load from storage | |
function Quadtree.new(x, y, w, h, name) | |
local self = setmetatable({}, Quadtree) | |
self.name = name or (KEYS[1]) | |
self.children = {} | |
self.points = {} | |
-- load the node from storage if it exists, otherwise init | |
local storage = redis.call('HGET', KEYS[1], self.name) | |
if storage then | |
self.exists = true | |
storage = cjson.decode(storage) | |
self.x = storage.x | |
self.y = storage.y | |
self.w = storage.w | |
self.h = storage.h | |
if storage.points then | |
for _, v in pairs(storage.points) do | |
self.points[#(self.points)+1] = Point.new(v.x, v.y, v.data) | |
end | |
end | |
else | |
self.exists = false | |
self.x, self.y, self.w, self.h = x or 0, y or 0, w or 0, h or 0 | |
end | |
self.loadedChildren = false | |
self.dirty = false | |
return(self) | |
end | |
-- loads a node's children from storage | |
function Quadtree:loadChildren() | |
local children = redis.call('HEXISTS', KEYS[1], self.name .. "/NW") | |
if children ~= 0 then | |
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/NW") | |
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/NE") | |
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/SW") | |
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/SE") | |
end | |
self.loadedChildren = true | |
end | |
-- inserts a point to a quadtree node | |
function Quadtree:insertPoint(point) | |
if not _contained(point.x, point.y, 0, 0, self.x, self.y, self.w, self.h) then | |
return false | |
end | |
-- try to insert the point to this node | |
if #(self.points) < 4 then -- node capacity | |
self.points[#(self.points)+1] = point | |
self.dirty = true | |
return true | |
end | |
if not self.loadedChildren then | |
self:loadChildren() | |
end | |
-- subdivide the node to 4 children | |
if #(self.children) == 0 then | |
local hw = self.w / 2.0 | |
local hh = self.h / 2.0 | |
self.children[#(self.children)+1] = Quadtree.new(self.x, self.y, hw, hh, self.name .. "/NW") | |
self.children[#(self.children)+1] = Quadtree.new(self.x + hw, self.y, hw, hh, self.name .. "/NE") | |
self.children[#(self.children)+1] = Quadtree.new(self.x, self.y + hh, hw, hh, self.name .. "/SW") | |
self.children[#(self.children)+1] = Quadtree.new(self.x + hw, self.y + hh , hw, hh, self.name .. "/SE") | |
end | |
-- insert to relevant child | |
for _, child in pairs(self.children) do | |
if child:insertPoint(point) then | |
return true | |
end | |
end | |
return false | |
end | |
-- returns the points in the area specified | |
function Quadtree:query(x, y, w, h) | |
local reply = {} | |
if not _intersect(x, y, w, h, self.x, self.y, self.w, self.h) then | |
return reply | |
end | |
for _, point in pairs(self.points) do | |
if _contained(point.x, point.y, 0, 0, x, y, w, h) then | |
reply[#reply + 1] = point | |
end | |
end | |
if not self.loadedChildren then | |
self:loadChildren() | |
end | |
if #(self.children) ~= 0 then | |
for _, child in pairs(self.children) do | |
local r = child:query(x, y, w, h) | |
for _, p in pairs(r) do | |
reply[#reply + 1] = p | |
end | |
end | |
end | |
return reply | |
end | |
-- converts a quadtree to a table | |
function Quadtree:toTable(dirtyOnly) | |
local reply = {} | |
if not dirtyOnly or dirtyOnly and self.dirty then | |
reply[self.name] = cjson.encode({ x = self.x, y = self.y, h = self.h, w = self.w, points = self.points}) | |
end | |
for _, child in pairs(self.children) do | |
local cs = child:toTable(dirtyOnly) | |
for k, v in pairs(cs) do | |
reply[k] = v | |
end | |
end | |
return reply | |
end | |
-- saves the quadtree to storage | |
function Quadtree:dump(dirtyOnly) | |
local t = self:toTable(dirtyOnly) | |
for k, v in pairs(t) do | |
redis.call('HSET', KEYS[1], k, v) | |
end | |
end | |
local function _debug_populatetree() | |
redis.call('DEL', KEYS[1]) | |
local q = Quadtree.new(0, 0, 100, 100) | |
q:insertPoint(Point.new(1,1, 'first')) | |
q:insertPoint(Point.new(2,2, '2nd')) | |
q:insertPoint(Point.new(4,4, 'last')) | |
q:dump() | |
end | |
local function _debug_populatezsets(size, xmax, ymax) | |
redis.call('DEL', KEYS[1], KEYS[2]) | |
for i = 1, size do | |
redis.call('ZADD', KEYS[1], math.random(xmax), 'id:' .. i) | |
redis.call('ZADD', KEYS[2], math.random(ymax), 'id:' .. i) | |
end | |
end | |
local function fuzzy_test(items, queries) | |
local dim = 2 | |
local timings = {} | |
local avgt = 0.0 | |
redis.call('DEL', KEYS[1]) | |
local q = Quadtree.new(1, 1, 1000, 1000) | |
local id = 0 | |
local dataset = {} | |
for i = 1, items do | |
local vars = {} | |
for j = 1, dim do | |
table.insert(vars, math.random(1000)) | |
end | |
q:insertPoint(Point.new(vars[1], vars[2], id)) | |
table.insert(vars, id) | |
table.insert(dataset, vars) | |
id = id + 1 | |
end | |
q:dump() | |
for i = 1, queries do | |
local random = {} | |
for j = 1, dim do | |
local s = math.random(1000) | |
local e = math.random(1000) | |
if e < s then | |
s, e = e, s | |
end | |
table.insert(random, { s, e }) | |
end | |
local tq = Quadtree.new(1, 1, 1000, 1000) | |
local start_t = redis.call('TIME') | |
local res1 = tq:query(random[1][1], random[2][1], random[1][2] - random[1][1], random[2][2] - random[2][1]) | |
local end_t = redis.call('TIME') | |
for i, v in ipairs(res1) do | |
local t = {} | |
res1[i] = {v.x, v.y, v.data} | |
end | |
start_t[1], start_t[2] = tonumber(start_t[1]), tonumber(start_t[2]) | |
end_t[1], end_t[2] = tonumber(end_t[1]), tonumber(end_t[2]) | |
if end_t[2] > start_t[2] then | |
table.insert(timings, { end_t[1] - start_t[1], end_t[2] - start_t[2] }) | |
else | |
table.insert(timings, { end_t[1] - start_t[1] - 1, math.abs(end_t[2] - start_t[2]) }) | |
end | |
avgt = (avgt * (#timings - 1) + tonumber(string.format('%d.%06d', timings[#timings][1], timings[#timings][2]))) / #timings | |
local res2 = {} | |
for _, v in pairs(dataset) do | |
local included = true | |
for j = 1, dim do | |
if v[j] < random[j][1] or v[j] > random[j][2] then | |
included = false | |
end | |
end | |
if included then | |
table.insert(res2, v) | |
end | |
end | |
-- table sorting is so much FUN! | |
local function cmp(a, b, depth) | |
depth = depth or 1 | |
if depth > dim + 1 then | |
return false | |
end | |
if a[depth] < b[depth] then | |
return true | |
end | |
if a[depth] == b[depth] then | |
return cmp(a, b, depth + 1) | |
end | |
if a[depth] > b[depth] then | |
return false | |
end | |
end | |
table.sort(res1, cmp) | |
table.sort(res2, cmp) | |
for i1, r1 in ipairs(res1) do | |
for i2 = 1, dim + 1 do | |
if r1[i2] ~= res2[i1][i2] then | |
error('ERROR ' .. r1[i2] .. ' ~= ' .. res2[i1][i2]) | |
end | |
end | |
end | |
end | |
-- housekeeping drop() can't be called unless replication mode is changed | |
return({ 'fuzzily tested.', {dim, items, queries, timings}}) | |
end | |
-- Example #1 | |
--_debug_populatetree() | |
--local q = Quadtree.new() | |
--q:insertPoint(Point.new(15,5, 'for reals')) | |
--q:dump(true) | |
--return cjson.encode(q:query(3, 3, 20, 20)) | |
-- Example 2 | |
--_debug_populatezsets(100000, 1000000, 42000000) | |
local _USAGE = { | |
'commands', | |
-- ' add - adds a point, ARGV: x y data', | |
-- ' create - creates a quatree, ARGV: x y w h', | |
' query - returns points in range, KEYS: quadtree ARGV: x y w h', | |
' zcreate - creates a quadtree from the intersect of two sorted sets, KEYS: quadtree xzset yzset tempkeyname', | |
' zquery - returns points in range w/o quadtree, KEYS: xzset yzset tempkeyname ARGV: x y w h', | |
' help - this message', | |
} | |
--- "Parser" | |
if #ARGV == 0 or #KEYS == 0 then | |
return(_USAGE) | |
end | |
local cmd = ARGV[1]:lower() | |
if cmd == "help" then | |
return _USAGE | |
elseif cmd == "query" then | |
local q = Quadtree.new() | |
return cjson.encode(q:query(tonumber(ARGV[2]), tonumber(ARGV[3]), tonumber(ARGV[4]), tonumber(ARGV[5]))) | |
elseif cmd == "zcreate" then | |
redis.call('ZINTERSTORE', KEYS[4], 2, KEYS[2], KEYS[3], 'WEIGHTS', 1, 0) | |
local xmin = tonumber(redis.call('ZRANGE', KEYS[4], 0, 0, 'WITHSCORES')[2]) | |
local xmax = tonumber(redis.call('ZRANGE', KEYS[4], -1, -1, 'WITHSCORES')[2]) | |
redis.call('ZINTERSTORE', KEYS[4], 2, KEYS[2], KEYS[3], 'WEIGHTS', 0, 1) | |
local ymin = tonumber(redis.call('ZRANGE', KEYS[4], 0, 0, 'WITHSCORES')[2]) | |
local ymax = tonumber(redis.call('ZRANGE', KEYS[4], -1, -1, 'WITHSCORES')[2]) | |
redis.call('DEL', KEYS[1]) | |
local q = Quadtree.new(xmin, ymin, xmax-xmin, ymax-ymin) | |
local members = redis.call('ZRANGE', KEYS[4], 0, -1, 'WITHSCORES') | |
for i = 1, #members/2 do | |
local x = redis.call('ZSCORE', KEYS[2], members[i*2-1]) | |
q:insertPoint(Point.new(tonumber(x), tonumber(members[i*2]), members[i*2-1])) | |
end | |
redis.call('DEL', KEYS[4]) | |
q:dump() | |
elseif cmd == "zquery" then | |
redis.call('ZUNIONSTORE', KEYS[3], 1, KEYS[1], 'WEIGHTS', 1) | |
redis.call('ZREMRANGEBYSCORE', KEYS[3], '-inf', '(' .. ARGV[2]) | |
redis.call('ZREMRANGEBYSCORE', KEYS[3], '(' .. ARGV[2] + ARGV[4], '+inf') | |
redis.call('ZINTERSTORE', KEYS[3], 2, KEYS[3], KEYS[2], 'WEIGHTS', 0, 1) | |
return cjson.encode(redis.call('ZRANGEBYSCORE', KEYS[3], ARGV[3], ARGV[3] + ARGV[5])) | |
elseif cmd == "fuzzy_test" then | |
return fuzzy_test(tonumber(ARGV[2]), tonumber(ARGV[3])) | |
else | |
return "-ERR: Unknown command '" .. ARGV[1] .. "' - run with 'help' for usage" | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
SHA1=$(redis-cli SCRIPT LOAD "$(cat rqtih.lua)") | |
redis-cli EVALSHA $SHA1 2 h fuzzy_test |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment