Quick and dirty Telemetry analysis of slow-shutdown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
def map(k, d, v, cx): | |
j = json.loads(v) | |
try: | |
addons = j['info']['addons'] | |
except: | |
return | |
if addons is None: | |
return | |
for addon in addons.split(','): | |
#remove version number | |
addon = addon.split(':')[0] | |
cx.write(addon, 1) | |
def reduce(k, v, cx): | |
cx.writecsv([sum(v), k]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
./run.sh addons | |
sort -r -V -k 1,1 /mnt/telemetry/addons.out > addons.csv | |
./run_local.sh generic | |
cat header.txt /mnt/telemetry/generic.out > ./generic.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"version": 1, | |
"dimensions": [ | |
{ | |
"field_name": "reason", | |
"allowed_values": ["saved-session"] | |
}, | |
{ | |
"field_name": "appName", | |
"allowed_values": "Firefox" | |
}, | |
{ | |
"field_name": "appUpdateChannel", | |
"allowed_values": ["release"] | |
}, | |
{ | |
"field_name": "appVersion", | |
"allowed_values": "28.0" | |
}, | |
{ | |
"field_name": "appBuildID", | |
"allowed_values": "*" | |
}, | |
{ | |
"field_name": "submission_date", | |
"allowed_values": ["20140420"] | |
} | |
] | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
simple_keys = ["shutdownDuration" , "failedProfileLockCount" ,"maximalNumberOfConcurrentThreads", "uptime", "pingsOverdue", "savedPings", "sessionRestored"] | |
system_keys = ["OS", "cpucount", "memsize", "arch", "version", "adapterVendorID", "flashVersion"] | |
addon_keys = [] | |
# Get top addons | |
with open('../addons.csv') as f: | |
addons = [] | |
subs = -1 | |
lines = f.readlines() | |
total = float(lines[0].split(',')[0]) | |
threshold = 100 | |
for idx, line in enumerate(lines[1:]): | |
if idx == threshold: | |
break | |
nr, name = line.split(',') | |
nr = float(nr) | |
if nr >= 0.01*total: | |
addon_keys.append(name.strip()) | |
else: | |
break | |
assert(len(addon_keys) > 0) | |
with open('../header.txt', 'w') as f: | |
def ph(keys): | |
for key in keys: | |
f.write(key +",") | |
ph(simple_keys) | |
ph(system_keys) | |
ph(addon_keys) | |
f.write('multiplicity\n') | |
def add(keys, dic, add_bool = False): | |
out = [] | |
for key in keys: | |
if key in dic: | |
out.append(dic[key] if not add_bool else 1) | |
else: | |
out.append(None) | |
return out | |
def parse_addons(raw): | |
addons = raw.split(',') | |
out = set([addon.split(':')[0].strip() for addon in addons]) | |
assert(len(out) > 0) | |
return out | |
def map(k, d, v, cx): | |
j = json.loads(v) | |
out = [] | |
out += add(simple_keys, j['simpleMeasurements']) | |
out += add(system_keys, j['info']) | |
if 'addons' in j['info']: | |
out += add(addon_keys, parse_addons(j['info']['addons']), True) | |
cx.write(tuple(out), 1) | |
def reduce(k, v, cx): | |
cx.writecsv(list(k) + [sum(v)]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
cd telemetry-server | |
python -m mapreduce.job ../$1.py \ | |
--input-filter ../filter.json \ | |
--num-mappers 16 \ | |
--num-reducers 4 \ | |
--data-dir /mnt/telemetry/work \ | |
--work-dir /mnt/telemetry/work \ | |
--output /mnt/telemetry/$1.out \ | |
--bucket "telemetry-published-v1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
cd telemetry-server | |
python -m mapreduce.job ../$1.py \ | |
--input-filter ../filter.json \ | |
--num-mappers 16 \ | |
--num-reducers 4 \ | |
--data-dir /mnt/telemetry/work/cache \ | |
--work-dir /mnt/telemetry/work \ | |
--output /mnt/telemetry/$1.out \ | |
--bucket "telemetry-published-v1" \ | |
--local-only |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(dplyr) | |
library(GGally) | |
library(vcd) | |
library(pwr) | |
effect_table <- function(table) { | |
as.numeric(assocstats(table)[5]) | |
} | |
power_chisq <- function(table, test) { | |
pwr.chisq.test(N=sum(table), df=as.numeric(test[2]), sig.level=0.05, w=0.1) | |
} | |
remove_outliers <- function(x, na.rm = TRUE, ...) { | |
qnt <- quantile(x, probs=c(.005, .995), na.rm = na.rm, ...) | |
y <- x | |
y[x < (qnt[1])] <- NA | |
y[x > (qnt[2])] <- NA | |
y | |
} | |
trim <- function (x) gsub("^\\s+|\\s+$", "", x) | |
get_flash_version <- function(str) { | |
sub("\\..*", "", as.character(str)) | |
} | |
clean <- function(df) { | |
df %.% | |
select(-multiplicity, -pingsOverdue, -savedPings) %.% | |
mutate(shutdownDuration = remove_outliers(shutdownDuration), | |
uptime = remove_outliers(uptime), | |
flashMainVersion = as.factor(get_flash_version(flashVersion))) %.% | |
filter(!is.na(uptime), | |
!is.na(shutdownDuration), | |
cpucount %in% c(1, 2, 4, 8, 12)) %.% | |
mutate(cpucount = as.factor(cpucount), | |
failedProfileLockCount = as.factor(failedProfileLockCount >= 1), | |
version = as.factor(version), | |
adapterVendorID = as.factor(adapterVendorID), | |
memsize = cut(memsize, | |
breaks=c(0, 1024, 2048, 4096, 8192, 16384, Inf), | |
labels=c("1024", "2048", "4096", "8192", "16384", "Inf"))) | |
} | |
plot_shutdown <-function(df) { | |
ggplot(aes(x=shutdownDuration), data=df) + | |
geom_histogram(aes(y=..count../sum(..count..)), fill="blue", alpha=0.7, binwidth=0.01) + | |
scale_x_log10(name="Shutdown Duration") + | |
scale_y_continuous(name="Density") + | |
theme_bw() | |
} | |
rem <- function (param){ substring(param, 3) } | |
adistrib <- function(df) { | |
addons <- df[, c(11:length(df))] | |
addons <- summary(sapply(addons, as.factor))[2,] | |
addons <- sapply(addons, rem) | |
addons <- sapply(addons, trim) | |
addons <- sapply(addons, strtoi) | |
} | |
####################################################################################### | |
####################################################################################### | |
# Let's have a look first at a representative sample of the population for release 28 | |
df <- read.csv("generic_big.csv", row.names=NULL) | |
df[is.na(df)] <- 0 # we can safely impute to 0 | |
# Let's clean up the data | |
df <- clean(df) | |
# Let's see if we can spot some pattern | |
#ggpairs(df[, c(1:9)]) | |
# Can't see anything clearly correlated to shutdownDuration, besides a small correlation with | |
# cpucount and memsize | |
# Plot shutdownDuration distribution | |
plot_shutdown(df) | |
# We got a nice spike from 5000 to 8000 | |
# Plot shutdownDuration vs cpucount | |
ggplot(aes(y=shutdownDuration, x=cpucount), data=df) + | |
geom_boxplot(fill="blue", alpha=0.7) + | |
scale_x_discrete(name="Cores") + | |
scale_y_continuous(name="Shutdown Duration") + | |
coord_cartesian(ylim = c(-500, 10000)) + | |
theme_bw() | |
# The number of cores influences negatively shutdownDuration; | |
# Since memsize is correlated to the number of cores there is no need for another plot | |
# Investigate submissions with a shutdown duration of 5000-8000 | |
df_com <- df %.% filter(shutdownDuration <= 5000) | |
df_slow <- df %.% filter(shutdownDuration > 5000 & shutdownDuration < 8000) | |
# Is the distribution of threads different? | |
wilcox.test(df_com$maximalNumberOfConcurrentThreads, df_slow$maximalNumberOfConcurrentThreads) | |
summary(df_com$maximalNumberOfConcurrentThreads) | |
summary(df_slow$maximalNumberOfConcurrentThreads) # Looks like the distribution is shifted to the right by 1 | |
# Let's have a look at the cpu count | |
table <- rbind(summary(df_com$cpucount), summary(df_slow$cpucount)) | |
chisq.test(table, simulate.p.value = TRUE) | |
# df_slow contains machines with less cores, news at 11 | |
# Let's have a look at the memory | |
summary(df_com$memsize) | |
summary(df_slow$memsize) | |
table <- rbind(summary(df_com$memsize), summary(df_slow$memsize)) | |
effect_table(table) # small but still interesting | |
chisq.test(table, simulate.p.value=TRUE) | |
# df_slow contains machiens with less memory, news at 11 | |
# Let's have a look at the flash version | |
summary(df_com$flashMainVersion) | |
summary(df_slow$flashMainVersion) | |
table <- rbind(summary(df_com$flashMainVersion), summary(df_slow$flashMainVersion)) | |
effect_table(table) | |
chisq.test(table, simulate.p.value=TRUE) # Nothing really interesting here | |
# Let's compare the failed profile lock count | |
table <- rbind(summary(df_com$failedProfileLockCount), summary(df_slow$failedProfileLockCount)) | |
effect_table(table) # small, still worth investigating though | |
chisq.test(table, simulate.p.value=TRUE) | |
com <- summary(df_com$failedProfileLockCount)[2] / summary(df_com$failedProfileLockCount)[1] | |
slow <- summary(df_slow$failedProfileLockCount)[2] / summary(df_slow$failedProfileLockCount)[1] | |
cat("slow has", slow/com, "times more locks than com") | |
cat("% of locked profiles for slow:", slow*100) | |
# Note that even though slow has significantly more locks than com (6.5 times), the chance of having a lock | |
# in the first place is quite low (1-2%) | |
# Is the distribution of sessionRestored different? | |
wilcox.test(df_com$sessionRestored, df_slow$sessionRestored) | |
summary(df_com$sessionRestored) | |
summary(df_slow$sessionRestored) | |
# Let's compare the distribution of add-ons | |
addons <- rbind(adistrib(df_com), adistrib(df_slow)) | |
addons[is.na(addons)] <- 0 | |
# Let's just use the most frequent ones, if we can't spot a pattern there then all hopes are lost | |
top_addons = addons[, 1:20] | |
effect_table(addons) # very small | |
test <- chisq.test(top_addons) | |
# Couldn't find any difference, do we have enough power (0.8) for our smallest significant effect size (0.1)? | |
power_chisq(top_addons, test) # We do | |
####################################################################################### | |
####################################################################################### | |
# Let's have a look now only at a "big" sample of submissions with failedProfileLockCount >=1 | |
df_lock <- read.csv("lockcount.csv", row.names=NULL) | |
df_lock[is.na(df_lock)] <- 0 # we can safely impute to 0 | |
df_lock <- clean(df_lock) | |
# Let's look at the OS | |
summary(df_lock$OS) # There is only WINNT | |
df <- df %.% | |
filter(version %in% c(5.1, 5.2, 6.1, 6.2, 6.3)) %.% | |
mutate(version = factor(version)) | |
df_lock <- df_lock %.% | |
filter(version %in% c(5.1, 5.2, 6.1, 6.2, 6.3)) %.% | |
mutate(version = factor(version)) | |
# Let's see if we can spot some pattern | |
#ggpairs(df_lock[, c(1, 3, 4, 6, 7, 8, 9)]) | |
# Plot shutdownDuration distribution | |
plot_shutdown(df_lock) | |
# shutdownDuration in df_lock is signifcantly higher than in df; | |
# it seems to be 10 times higher when comparing medians | |
# Is the distribution of threads different? | |
wilcox.test(df_lock$maximalNumberOfConcurrentThreads, df$maximalNumberOfConcurrentThreads) | |
summary(df$maximalNumberOfConcurrentThreads) | |
summary(df_lock$maximalNumberOfConcurrentThreads) # Looks like the distribution is shifted to the right by 3 | |
# Is the uptime distribution different? | |
wilcox.test(df_lock$uptime, df$uptime) | |
summary(df$uptime) | |
summary(df_lock$uptime) # whoops, uptime seems to be 5 times higher when comparing medians | |
# Is the OS version different? (only WINNT data entries show up in the df_lock) | |
table <- rbind(summary(df$version), summary(df_lock$version)) | |
chisq.test(table) # Yep, let's see what our effect size is | |
effect_table(table) # Medium (0.35), so it's significant | |
# Seems like less users here are on 6.3, while the distribution is the same for the other versions | |
# Is the CPU count distribution different? | |
table <- rbind(summary(df$cpucount), summary(df_lock$cpucount)) | |
chisq.test(table) # Yep, let's see what our effect size is | |
effect_table(table) # Medium (0.35), so it's significant | |
# CPU distribution is shifted slightly to the left, i.e. lower end machines | |
# Let's have a look at the flash version | |
summary(df$flashMainVersion) | |
summary(df_lock$flashMainVersion) | |
table <- rbind(summary(df$flashMainVersion), summary(df_lock$flashMainVersion)) | |
effect_table(table) | |
chisq.test(table, simulate.p.value=TRUE) # There is a clear difference here | |
# Is the distribution of sessionRestored different? | |
wilcox.test(df_lock$sessionRestored, df$sessionRestored) | |
summary(df$sessionRestored) | |
summary(df_lock$sessionRestored) | |
# Is the distribution for the top add-ons different? | |
adistrib(df_lock) # Looks like there isn't an add-on that correlates with all those submissions so don't bother |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment