Skip to content

Instantly share code, notes, and snippets.

@mtourne
Created July 5, 2013 18:43
Show Gist options
  • Save mtourne/5936444 to your computer and use it in GitHub Desktop.
Save mtourne/5936444 to your computer and use it in GitHub Desktop.
Heavy hitter algorithm (get top-k token in a possibly infine set of token), in constant space. This version only allows a increments of 1 The design is based on "Efficient Computation of Frequent and Top-k Elements in Data Streams" by Metwally et al, available at www.cs.ucsb.edu/research/tech_reports/reports/2005-23.pdf‎
-- Copyright (C) 2013 Matthieu Tourne
-- @author Matthieu Tourne <matthieu@cloudflare.com>
local M = {}
--------------------
--- Heavy Hitter ---
--------------------
local obj_mt = { __index = M }
local function init_scores(size)
local scores = {}
-- set all scores[card] = (score, token)
-- scores[token] = (score, card)
for i=1, size do
scores[i] = { score = i, token = 'foo' .. i }
scores['foo' .. i ] = { score = i, card = i }
end
return scores
end
local function swap_score(scores, card1, card2)
local scores = scores
local t1 = scores[card1]
local t2 = scores[card2]
scores[card1] = t2
scores[card2] = t1
-- change scores[token]
scores[t1.token].card = card2
scores[t2.token].card = card1
return true
end
function M.new(self, size)
local obj = {
scores = init_scores(size),
overestimates = {},
size = size,
}
return setmetatable(obj, obj_mt)
end
local function incr_score(scores, card, incr)
local t = scores[card]
local swap
local new_score = t.score + incr
if incr == 1 then
t.score = new_score
-- bubble sort with card + 1
local next_t = scores[card + 1]
while next_t and new_score > next_t.score do
swap = swap_score(scores, card, card + 1)
card = card + 1
next_t = scores[card + 1]
end
return card, swap
else
-- TODO (mtourne) bin search between cards
end
end
function M.insert(self, token, incr)
-- TODO (support incr)
local incr = 1
local scores = self.scores
local t = scores[token]
if t then
-- token already present
return incr_score(scores, t.card, incr)
else
local t1 = scores[1]
scores[t1.token] = nil
-- set token for first card in the list
t1.token = token
scores[token] = { score = t1.score,
card = 1 }
-- increment 1 token in the cardinal list
return incr_score(scores, 1, incr)
end
end
function M.dump_ordered(self)
return unpack(self.scores)
end
return M
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment