Skip to content

Instantly share code, notes, and snippets.

@itamarhaber
Last active September 12, 2016 22:47
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save itamarhaber/c1ffda42d86b314ea701 to your computer and use it in GitHub Desktop.
Save itamarhaber/c1ffda42d86b314ea701 to your computer and use it in GitHub Desktop.
Redis Quadtrees in Hash - finally motivated because of http://stackoverflow.com/questions/32911604/intersection-of-two-or-more-sorted-sets
foo@bar:~/src/quadtree$ redis-cli zcard x
(integer) 100000
foo@bar:~/src/quadtree$ redis-cli zcard y
(integer) 100000
foo@bar:~/src/quadtree$ redis-cli hlen qt
(integer) 56325
foo@bar:~/src/quadtree$ redis-cli script load "`cat rqtih.lua`"
"84f91fddf261e9a2ef14ff4b417443eed3dc4b2b"
foo@bar:~/src/quadtree$ redis-benchmark -n 100 -r 1000000 evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 3 x y tmp zquery __rand_int__ __rand_int__ __rand_int__ __rand_int__
====== evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 3 x y tmp zquery __rand_int__ __rand_int__ __rand_int__ __rand_int__ ======
100 requests completed in 41.50 seconds
50 parallel clients
3 bytes payload
keep alive: 1
1.00% <= 404 milliseconds
2.00% <= 20128 milliseconds
3.00% <= 20523 milliseconds
8.00% <= 20524 milliseconds
52.00% <= 20966 milliseconds
100.00% <= 20966 milliseconds
2.41 requests per second
foo@bar:~/src/quadtree$ redis-benchmark -n 100 -r 1000000 evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 1 qt query __rand_int__ __rand_int__ __rand_int__ __rand_int__
====== evalsha 84f91fddf261e9a2ef14ff4b417443eed3dc4b2b 1 qt query __rand_int__ __rand_int__ __rand_int__ __rand_int__ ======
100 requests completed in 1.36 seconds
50 parallel clients
3 bytes payload
keep alive: 1
1.00% <= 37 milliseconds
2.00% <= 638 milliseconds
3.00% <= 665 milliseconds
9.00% <= 666 milliseconds
24.00% <= 667 milliseconds
43.00% <= 668 milliseconds
52.00% <= 679 milliseconds
59.00% <= 680 milliseconds
100.00% <= 680 milliseconds
73.69 requests per second
-- rqtih.lua - Redis Quadtree in Hash
--
-- TODO:
-- Extend to store boxes and polygons instead of just points
-- More dimensions - kd-trees
-- Get rid of object-oriented design - flatten everything to nested loops
-- JSON, really? msgpck or even better - a custom bit-level encoder (otoh, readable and implemented in C...)
-- node capacity should be configurable as well
-- Currently requires declaring the qt's min/max dimensions a priori
-- Use HMGET and HMSET
-- Missing QT functionality: removePoint, exists, rebuild, ...
-- Add member uniqueness and then support union, intersect & diff on QTs
-- ...
-- Do the entire thing in Redis core and add this as an internal data type - win!
--
-- Copyright Itamar Haber & Redis Labs. Provided under the 3-Clause BSD licene.
-- Helper functions
-- add a call to _trace in strategic code paths, open a MONITOR session to view
-- output interleaved with the actual calls to redis.call + timing
local function _trace(...)
local msg = "_trace:"
for i, v in ipairs(arg) do
msg = msg .. ' [' .. i .. ']: ' .. v
end
return redis.call('ECHO', msg)
end
-- returns true if two boxes intersect
local function _intersect(ax1,ay1,aw,ah, bx1,by1,bw,bh)
local ax2,ay2,bx2,by2 = ax1 + aw, ay1 + ah, bx1 + bw, by1 + bh
return not (bx1 > ax2 or bx2 < ax1 or by1 > ay2 or by2 < ay1)
end
-- returns true if a is contained in b
local function _contained(ax1,ay1,aw,ah, bx1,by1,bw,bh)
local ax2,ay2,bx2,by2 = ax1 + aw, ay1 + ah, bx1 + bw, by1 + bh
return bx1 <= ax1 and bx2 >= ax2 and by1 <= ay1 and by2 >= ay2
end
-- Point class
-- An arbitrary point in 2D space
local Point = {}
Point.__index = Point
Point.__tostring = function(cls)
return '{ "Point" : { "x" : "' .. (cls.x or "nil") .. '", "y" : "' .. (cls.y or "nil") .. '", "data" :"' .. (cls.data or nil) .. '" } }'
end
setmetatable(Point, {
__call = function(cls, ...)
return cls.new(...)
end,
})
function Point.new(x, y, data)
local self = setmetatable({}, Point)
self.x, self.y, self.data = x, y, data
return self
end
-- Quadtree class
local Quadtree = {}
Quadtree.__index = Quadtree
Quadtree.__tostring = function(cls)
local reply =
'{ "Quadtree" :' ..
' { "x" = "' .. (cls.x or "nil") ..
'", "y" = "' .. (cls.y or "nil") ..
'", "w" = "' .. (cls.w or "nil") ..
'", "h" = "' .. (cls.h or "nil") ..
'", "points" = [ '
local spoints
for _, v in pairs(cls.points) do
if spoints then
spoints = spoints .. ", " .. tostring(v)
else
spoints = tostring(v)
end
end
reply = reply .. (spoints or "") .. " ] } }"
return reply
end
setmetatable(Quadtree, {
__call = function (cls, ...)
return cls.new(...)
end,
})
-- Initializes the quadtree. Call with dimension arguments to create a new one
-- or without them to load from storage
function Quadtree.new(x, y, w, h, name)
local self = setmetatable({}, Quadtree)
self.name = name or (KEYS[1])
self.children = {}
self.points = {}
-- load the node from storage if it exists, otherwise init
local storage = redis.call('HGET', KEYS[1], self.name)
if storage then
self.exists = true
storage = cjson.decode(storage)
self.x = storage.x
self.y = storage.y
self.w = storage.w
self.h = storage.h
if storage.points then
for _, v in pairs(storage.points) do
self.points[#(self.points)+1] = Point.new(v.x, v.y, v.data)
end
end
else
self.exists = false
self.x, self.y, self.w, self.h = x or 0, y or 0, w or 0, h or 0
end
self.loadedChildren = false
self.dirty = false
return(self)
end
-- loads a node's children from storage
function Quadtree:loadChildren()
local children = redis.call('HEXISTS', KEYS[1], self.name .. "/NW")
if children ~= 0 then
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/NW")
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/NE")
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/SW")
self.children[#(self.children)+1] = Quadtree.new(0, 0, 0, 0, self.name .. "/SE")
end
self.loadedChildren = true
end
-- inserts a point to a quadtree node
function Quadtree:insertPoint(point)
if not _contained(point.x, point.y, 0, 0, self.x, self.y, self.w, self.h) then
return false
end
-- try to insert the point to this node
if #(self.points) < 4 then -- node capacity
self.points[#(self.points)+1] = point
self.dirty = true
return true
end
if not self.loadedChildren then
self:loadChildren()
end
-- subdivide the node to 4 children
if #(self.children) == 0 then
local hw = self.w / 2.0
local hh = self.h / 2.0
self.children[#(self.children)+1] = Quadtree.new(self.x, self.y, hw, hh, self.name .. "/NW")
self.children[#(self.children)+1] = Quadtree.new(self.x + hw, self.y, hw, hh, self.name .. "/NE")
self.children[#(self.children)+1] = Quadtree.new(self.x, self.y + hh, hw, hh, self.name .. "/SW")
self.children[#(self.children)+1] = Quadtree.new(self.x + hw, self.y + hh , hw, hh, self.name .. "/SE")
end
-- insert to relevant child
for _, child in pairs(self.children) do
if child:insertPoint(point) then
return true
end
end
return false
end
-- returns the points in the area specified
function Quadtree:query(x, y, w, h)
local reply = {}
if not _intersect(x, y, w, h, self.x, self.y, self.w, self.h) then
return reply
end
for _, point in pairs(self.points) do
if _contained(point.x, point.y, 0, 0, x, y, w, h) then
reply[#reply + 1] = point
end
end
if not self.loadedChildren then
self:loadChildren()
end
if #(self.children) ~= 0 then
for _, child in pairs(self.children) do
local r = child:query(x, y, w, h)
for _, p in pairs(r) do
reply[#reply + 1] = p
end
end
end
return reply
end
-- converts a quadtree to a table
function Quadtree:toTable(dirtyOnly)
local reply = {}
if not dirtyOnly or dirtyOnly and self.dirty then
reply[self.name] = cjson.encode({ x = self.x, y = self.y, h = self.h, w = self.w, points = self.points})
end
for _, child in pairs(self.children) do
local cs = child:toTable(dirtyOnly)
for k, v in pairs(cs) do
reply[k] = v
end
end
return reply
end
-- saves the quadtree to storage
function Quadtree:dump(dirtyOnly)
local t = self:toTable(dirtyOnly)
for k, v in pairs(t) do
redis.call('HSET', KEYS[1], k, v)
end
end
local function _debug_populatetree()
redis.call('DEL', KEYS[1])
local q = Quadtree.new(0, 0, 100, 100)
q:insertPoint(Point.new(1,1, 'first'))
q:insertPoint(Point.new(2,2, '2nd'))
q:insertPoint(Point.new(4,4, 'last'))
q:dump()
end
local function _debug_populatezsets(size, xmax, ymax)
redis.call('DEL', KEYS[1], KEYS[2])
for i = 1, size do
redis.call('ZADD', KEYS[1], math.random(xmax), 'id:' .. i)
redis.call('ZADD', KEYS[2], math.random(ymax), 'id:' .. i)
end
end
local function fuzzy_test(items, queries)
local dim = 2
local timings = {}
local avgt = 0.0
redis.call('DEL', KEYS[1])
local q = Quadtree.new(1, 1, 1000, 1000)
local id = 0
local dataset = {}
for i = 1, items do
local vars = {}
for j = 1, dim do
table.insert(vars, math.random(1000))
end
q:insertPoint(Point.new(vars[1], vars[2], id))
table.insert(vars, id)
table.insert(dataset, vars)
id = id + 1
end
q:dump()
for i = 1, queries do
local random = {}
for j = 1, dim do
local s = math.random(1000)
local e = math.random(1000)
if e < s then
s, e = e, s
end
table.insert(random, { s, e })
end
local tq = Quadtree.new(1, 1, 1000, 1000)
local start_t = redis.call('TIME')
local res1 = tq:query(random[1][1], random[2][1], random[1][2] - random[1][1], random[2][2] - random[2][1])
local end_t = redis.call('TIME')
for i, v in ipairs(res1) do
local t = {}
res1[i] = {v.x, v.y, v.data}
end
start_t[1], start_t[2] = tonumber(start_t[1]), tonumber(start_t[2])
end_t[1], end_t[2] = tonumber(end_t[1]), tonumber(end_t[2])
if end_t[2] > start_t[2] then
table.insert(timings, { end_t[1] - start_t[1], end_t[2] - start_t[2] })
else
table.insert(timings, { end_t[1] - start_t[1] - 1, math.abs(end_t[2] - start_t[2]) })
end
avgt = (avgt * (#timings - 1) + tonumber(string.format('%d.%06d', timings[#timings][1], timings[#timings][2]))) / #timings
local res2 = {}
for _, v in pairs(dataset) do
local included = true
for j = 1, dim do
if v[j] < random[j][1] or v[j] > random[j][2] then
included = false
end
end
if included then
table.insert(res2, v)
end
end
-- table sorting is so much FUN!
local function cmp(a, b, depth)
depth = depth or 1
if depth > dim + 1 then
return false
end
if a[depth] < b[depth] then
return true
end
if a[depth] == b[depth] then
return cmp(a, b, depth + 1)
end
if a[depth] > b[depth] then
return false
end
end
table.sort(res1, cmp)
table.sort(res2, cmp)
for i1, r1 in ipairs(res1) do
for i2 = 1, dim + 1 do
if r1[i2] ~= res2[i1][i2] then
error('ERROR ' .. r1[i2] .. ' ~= ' .. res2[i1][i2])
end
end
end
end
-- housekeeping drop() can't be called unless replication mode is changed
return({ 'fuzzily tested.', {dim, items, queries, timings}})
end
-- Example #1
--_debug_populatetree()
--local q = Quadtree.new()
--q:insertPoint(Point.new(15,5, 'for reals'))
--q:dump(true)
--return cjson.encode(q:query(3, 3, 20, 20))
-- Example 2
--_debug_populatezsets(100000, 1000000, 42000000)
local _USAGE = {
'commands',
-- ' add - adds a point, ARGV: x y data',
-- ' create - creates a quatree, ARGV: x y w h',
' query - returns points in range, KEYS: quadtree ARGV: x y w h',
' zcreate - creates a quadtree from the intersect of two sorted sets, KEYS: quadtree xzset yzset tempkeyname',
' zquery - returns points in range w/o quadtree, KEYS: xzset yzset tempkeyname ARGV: x y w h',
' help - this message',
}
--- "Parser"
if #ARGV == 0 or #KEYS == 0 then
return(_USAGE)
end
local cmd = ARGV[1]:lower()
if cmd == "help" then
return _USAGE
elseif cmd == "query" then
local q = Quadtree.new()
return cjson.encode(q:query(tonumber(ARGV[2]), tonumber(ARGV[3]), tonumber(ARGV[4]), tonumber(ARGV[5])))
elseif cmd == "zcreate" then
redis.call('ZINTERSTORE', KEYS[4], 2, KEYS[2], KEYS[3], 'WEIGHTS', 1, 0)
local xmin = tonumber(redis.call('ZRANGE', KEYS[4], 0, 0, 'WITHSCORES')[2])
local xmax = tonumber(redis.call('ZRANGE', KEYS[4], -1, -1, 'WITHSCORES')[2])
redis.call('ZINTERSTORE', KEYS[4], 2, KEYS[2], KEYS[3], 'WEIGHTS', 0, 1)
local ymin = tonumber(redis.call('ZRANGE', KEYS[4], 0, 0, 'WITHSCORES')[2])
local ymax = tonumber(redis.call('ZRANGE', KEYS[4], -1, -1, 'WITHSCORES')[2])
redis.call('DEL', KEYS[1])
local q = Quadtree.new(xmin, ymin, xmax-xmin, ymax-ymin)
local members = redis.call('ZRANGE', KEYS[4], 0, -1, 'WITHSCORES')
for i = 1, #members/2 do
local x = redis.call('ZSCORE', KEYS[2], members[i*2-1])
q:insertPoint(Point.new(tonumber(x), tonumber(members[i*2]), members[i*2-1]))
end
redis.call('DEL', KEYS[4])
q:dump()
elseif cmd == "zquery" then
redis.call('ZUNIONSTORE', KEYS[3], 1, KEYS[1], 'WEIGHTS', 1)
redis.call('ZREMRANGEBYSCORE', KEYS[3], '-inf', '(' .. ARGV[2])
redis.call('ZREMRANGEBYSCORE', KEYS[3], '(' .. ARGV[2] + ARGV[4], '+inf')
redis.call('ZINTERSTORE', KEYS[3], 2, KEYS[3], KEYS[2], 'WEIGHTS', 0, 1)
return cjson.encode(redis.call('ZRANGEBYSCORE', KEYS[3], ARGV[3], ARGV[3] + ARGV[5]))
elseif cmd == "fuzzy_test" then
return fuzzy_test(tonumber(ARGV[2]), tonumber(ARGV[3]))
else
return "-ERR: Unknown command '" .. ARGV[1] .. "' - run with 'help' for usage"
end
#!/bin/sh
SHA1=$(redis-cli SCRIPT LOAD "$(cat rqtih.lua)")
redis-cli EVALSHA $SHA1 2 h fuzzy_test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment