Last active
August 29, 2015 14:16
-
-
Save vitillo/cdcfbbb94f308e20e4ee to your computer and use it in GitHub Desktop.
Bug 1134669
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This Source Code Form is subject to the terms of the Mozilla Public | |
-- License, v. 2.0. If a copy of the MPL was not distributed with this | |
-- file, You can obtain one at http://mozilla.org/MPL/2.0/. | |
--[[ | |
1) How many days do we need to look back for k% of profiles to be up-to-date? | |
Each profile is associated to the date in which we last received a submission | |
for it on our servers. Periodically, we compute for all profiles the difference | |
between the current date and the date of reception and finally plot a histogram | |
of the differences expressed in number of days. | |
2) How may days do we need to look back to observe k% of profile activity? | |
Each profile is associated with its last activity date. Periodically, we compute | |
for all profiles the difference from the last activity date to the current date | |
and finally plot a histogram of the differences expressed in number of days. | |
3) What’s the delay in hours between the start of the profile activity and the time we receive the submission? | |
When we receive a new submission for a profile we compute the delay from the | |
start of the profile activity to the time we received the submission on our servers. | |
Periodically, we plot a histogram of the latencies for the profiles in hours. | |
As timeseries of histograms or heatmaps are not supported by the Heka plotting facilities, | |
only the median and some other percentiles are being output. | |
Can we say something about the *daily* profile activity? | |
Given 2, we know that k% of #N profiles were active in the past d days, where #A | |
is the total number of active profiles. We would like to say something about the | |
number of active users on day L, where L is a day within the past d days. | |
Say #L is the number of profiles that were active on day L for which have | |
received a submission so far. We can define an upper bound to the true number | |
of active profiles on that day as #L + #U where #U = #N(100 - k)/100. That is, | |
the proportion of daily active users on day L for which we have received a | |
submission so far is at least #L/(#L + #U). | |
Proof: Let’s ignore churn and new profiles and assume that #U is not an upper bound. | |
That means that there are some profiles that were active on day L but are not | |
part of #N(100-k)/100, so they must have sent a submission of their activity on | |
day L within the past d days which wouldn’t have been counted in #L. | |
--]] | |
require "circular_buffer" | |
require "table" | |
require "string" | |
require "math" | |
require "os" | |
local seen_by_channel = {} | |
local seen_history_by_channel = {} | |
local active_by_channel = {} | |
local active_history_by_channel = {} | |
local delay_by_channel = {} | |
local delay_history_by_channel = {} | |
local rows = read_config("rows") or 1440 | |
local sec_per_row = 2 -- TODO: read_config("sec_per_row") or 60 | |
local LOST_PROFILE_THRESHOLD = 42 -- https://people.mozilla.org/~bsmedberg/fhr-reporting/#usage | |
local PING_VERSION = "4" | |
local NSPERHOUR = 60*60*1e9 | |
local NSPERDAY = 24*NSPERHOUR | |
local MEDIAN = 1 | |
local function log(message) | |
local dbg = {message} | |
inject_payload("txt", "debug", table.concat(dbg, "\n")) | |
end | |
local function get_channel_entry(t, channel) | |
local entry = t[channel] | |
if not entry then | |
entry = {} | |
t[channel] = entry | |
end | |
return entry | |
end | |
local function get_history(unit, metric_history_by_channel, channel) | |
local history = metric_history_by_channel[channel] | |
if not history then | |
history = circular_buffer.new(rows, 1, sec_per_row) | |
history:set_header(MEDIAN, "Median", unit, "none") | |
metric_history_by_channel[channel] = history | |
end | |
return history | |
end | |
local function process_client_metric(metric_by_channel, channel, client_id, value) | |
local metric = get_channel_entry(metric_by_channel, channel) | |
metric[client_id] = value | |
end | |
function process_message () | |
local sample_id = read_message("Fields[sampleId]") | |
local version = read_message("Fields[sourceVersion]") | |
if version == PING_VERSION and sample_id then -- TODO sample_id == 0 | |
local ts = read_message("Timestamp") | |
local channel = read_message("Fields[appUpdateChannel]") or "UNKNOWN" | |
local client_id = read_message("Fields[clientId]") | |
local creation_date = read_message("Fields[creationTimestamp]") -- exists only in new "unified" pings | |
local activity_ts = creation_date | |
if not activity_ts then return 0 end | |
process_client_metric(seen_by_channel, channel, client_id, ts) | |
process_client_metric(active_by_channel, channel, client_id, activity_ts) | |
process_client_metric(delay_by_channel, channel, client_id, ts - activity_ts) | |
end | |
return 0 | |
end | |
local function timer_event_metric(descr, unit, metric_by_channel, metric_history_by_channel, ns, calc) | |
for channel, metric in pairs(metric_by_channel) do | |
sorted_metric = {} | |
for k, v in pairs(metric) do | |
sorted_metric[#sorted_metric + 1] = calc(ns, v) | |
end | |
table.sort(sorted_metric) | |
median = sorted_metric[math.ceil(#sorted_metric/2)] | |
local history = get_history(unit, metric_history_by_channel, channel) | |
if median then | |
history:set(ns, MEDIAN, median) | |
end | |
inject_payload("cbuf", channel .. " " .. descr, history) | |
end | |
end | |
local function remove_inactive_client(channel, client_id) | |
seen_by_channel[channel][client_id] = nil | |
active_by_channel[channel][client_id] = nil | |
delay_by_channel[channel][client_id] = nil | |
end | |
local function remove_inactive_clients(current_ts) | |
for channel, active in pairs(active_by_channel) do | |
for client_id, last_active_ts in pairs(active) do | |
if (current_ts - last_active_ts)/NSPERDAY > LOST_PROFILE_THRESHOLD then | |
remove_inactive_client(channel, client_id) | |
end | |
end | |
end | |
end | |
function timer_event(ns) | |
remove_inactive_clients(ns) | |
timer_event_metric("up-to-date", "days", seen_by_channel, seen_history_by_channel, ns, | |
function(ns, v) return math.floor((ns - v)/NSPERDAY) end) | |
timer_event_metric("active", "days", active_by_channel, active_history_by_channel, ns, | |
function(ns, v) return math.floor((ns - v)/NSPERDAY) end) | |
timer_event_metric("delay", "hours", delay_by_channel, delay_history_by_channel, ns, | |
function(ns, v) return math.floor(v/NSPERHOUR) end) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment