Skip to content

Instantly share code, notes, and snippets.

@vitillo
Last active August 29, 2015 14:16
Show Gist options
  • Save vitillo/cdcfbbb94f308e20e4ee to your computer and use it in GitHub Desktop.
Save vitillo/cdcfbbb94f308e20e4ee to your computer and use it in GitHub Desktop.
Bug 1134669
-- This Source Code Form is subject to the terms of the Mozilla Public
-- License, v. 2.0. If a copy of the MPL was not distributed with this
-- file, You can obtain one at http://mozilla.org/MPL/2.0/.
--[[
1) How many days do we need to look back for k% of profiles to be up-to-date?
Each profile is associated to the date in which we last received a submission
for it on our servers. Periodically, we compute for all profiles the difference
between the current date and the date of reception and finally plot a histogram
of the differences expressed in number of days.
2) How may days do we need to look back to observe k% of profile activity?
Each profile is associated with its last activity date. Periodically, we compute
for all profiles the difference from the last activity date to the current date
and finally plot a histogram of the differences expressed in number of days.
3) What’s the delay in hours between the start of the profile activity and the time we receive the submission?
When we receive a new submission for a profile we compute the delay from the
start of the profile activity to the time we received the submission on our servers.
Periodically, we plot a histogram of the latencies for the profiles in hours.
As timeseries of histograms or heatmaps are not supported by the Heka plotting facilities,
only the median and some other percentiles are being output.
Can we say something about the *daily* profile activity?
Given 2, we know that k% of #N profiles were active in the past d days, where #A
is the total number of active profiles. We would like to say something about the
number of active users on day L, where L is a day within the past d days.
Say #L is the number of profiles that were active on day L for which have
received a submission so far. We can define an upper bound to the true number
of active profiles on that day as #L + #U where #U = #N(100 - k)/100. That is,
the proportion of daily active users on day L for which we have received a
submission so far is at least #L/(#L + #U).
Proof: Let’s ignore churn and new profiles and assume that #U is not an upper bound.
That means that there are some profiles that were active on day L but are not
part of #N(100-k)/100, so they must have sent a submission of their activity on
day L within the past d days which wouldn’t have been counted in #L.
--]]
require "circular_buffer"
require "table"
require "string"
require "math"
require "os"
local seen_by_channel = {}
local seen_history_by_channel = {}
local active_by_channel = {}
local active_history_by_channel = {}
local delay_by_channel = {}
local delay_history_by_channel = {}
local rows = read_config("rows") or 1440
local sec_per_row = 2 -- TODO: read_config("sec_per_row") or 60
local LOST_PROFILE_THRESHOLD = 42 -- https://people.mozilla.org/~bsmedberg/fhr-reporting/#usage
local PING_VERSION = "4"
local NSPERHOUR = 60*60*1e9
local NSPERDAY = 24*NSPERHOUR
local MEDIAN = 1
local function log(message)
local dbg = {message}
inject_payload("txt", "debug", table.concat(dbg, "\n"))
end
local function get_channel_entry(t, channel)
local entry = t[channel]
if not entry then
entry = {}
t[channel] = entry
end
return entry
end
local function get_history(unit, metric_history_by_channel, channel)
local history = metric_history_by_channel[channel]
if not history then
history = circular_buffer.new(rows, 1, sec_per_row)
history:set_header(MEDIAN, "Median", unit, "none")
metric_history_by_channel[channel] = history
end
return history
end
local function process_client_metric(metric_by_channel, channel, client_id, value)
local metric = get_channel_entry(metric_by_channel, channel)
metric[client_id] = value
end
function process_message ()
local sample_id = read_message("Fields[sampleId]")
local version = read_message("Fields[sourceVersion]")
if version == PING_VERSION and sample_id then -- TODO sample_id == 0
local ts = read_message("Timestamp")
local channel = read_message("Fields[appUpdateChannel]") or "UNKNOWN"
local client_id = read_message("Fields[clientId]")
local creation_date = read_message("Fields[creationTimestamp]") -- exists only in new "unified" pings
local activity_ts = creation_date
if not activity_ts then return 0 end
process_client_metric(seen_by_channel, channel, client_id, ts)
process_client_metric(active_by_channel, channel, client_id, activity_ts)
process_client_metric(delay_by_channel, channel, client_id, ts - activity_ts)
end
return 0
end
local function timer_event_metric(descr, unit, metric_by_channel, metric_history_by_channel, ns, calc)
for channel, metric in pairs(metric_by_channel) do
sorted_metric = {}
for k, v in pairs(metric) do
sorted_metric[#sorted_metric + 1] = calc(ns, v)
end
table.sort(sorted_metric)
median = sorted_metric[math.ceil(#sorted_metric/2)]
local history = get_history(unit, metric_history_by_channel, channel)
if median then
history:set(ns, MEDIAN, median)
end
inject_payload("cbuf", channel .. " " .. descr, history)
end
end
local function remove_inactive_client(channel, client_id)
seen_by_channel[channel][client_id] = nil
active_by_channel[channel][client_id] = nil
delay_by_channel[channel][client_id] = nil
end
local function remove_inactive_clients(current_ts)
for channel, active in pairs(active_by_channel) do
for client_id, last_active_ts in pairs(active) do
if (current_ts - last_active_ts)/NSPERDAY > LOST_PROFILE_THRESHOLD then
remove_inactive_client(channel, client_id)
end
end
end
end
function timer_event(ns)
remove_inactive_clients(ns)
timer_event_metric("up-to-date", "days", seen_by_channel, seen_history_by_channel, ns,
function(ns, v) return math.floor((ns - v)/NSPERDAY) end)
timer_event_metric("active", "days", active_by_channel, active_history_by_channel, ns,
function(ns, v) return math.floor((ns - v)/NSPERDAY) end)
timer_event_metric("delay", "hours", delay_by_channel, delay_history_by_channel, ns,
function(ns, v) return math.floor(v/NSPERHOUR) end)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment