Created
January 31, 2017 10:45
-
-
Save Dexterp37/30b9ce2e127d0c90a36de93679540c4e to your computer and use it in GitHub Desktop.
Bug 1333806 - Investigate pings with missing activePlugins sections
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# Bug 1333806 - Investigate pings with missing activePlugins sections | |
# In[1]: | |
import ujson as json | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import numpy as np | |
import plotly.plotly as py | |
from plotly.graph_objs import * | |
from moztelemetry import get_pings_properties, get_one_ping_per_client | |
from moztelemetry.dataset import Dataset | |
get_ipython().magic(u'matplotlib inline') | |
# In[5]: | |
pings = Dataset.from_source("telemetry") .where(appName='Firefox') .where(docType='main') .where(submissionDate= lambda x: "20170101" <= x <= "20170131") .where(appUpdateChannel="nightly") .where(sourceVersion="4") .records(sc, sample=0.1) | |
# ... and extract only the attributes we need from the Telemetry submissions: | |
# In[6]: | |
subset = get_pings_properties(pings, ["clientId", | |
"environment/system/os/name", | |
"environment/addons"]) | |
# In[24]: | |
ping_count = subset.count() | |
# Let's try to understand what's going on with the activePlugins section coming from the client pings. | |
# In[13]: | |
def to_os_plugins(x): | |
os_name = x.get("environment/system/os/name", "Unknown") | |
env_addons = x.get("environment/addons", None) | |
if (env_addons is None): | |
return ((os_name, "No env_addons section"), 1) | |
if (not isinstance(env_addons, dict)): | |
return ((os_name, "Env_addons is not a dict"), 1) | |
active_plugins = env_addons["activePlugins"] | |
if (active_plugins is None): | |
return ((os_name, "No activePlugins"), 1) | |
if (isinstance(active_plugins, dict)): | |
return ((os_name, "activePlugins is a dict, not a list"), 1) | |
if (not isinstance(active_plugins, list)): | |
return ((os_name, "activePlugins is not a list either!"), 1) | |
if (len(active_plugins) < 1): | |
return ((os_name, "empty activePlugins list"), 1) | |
return ((os_name, "activePlugins should be fine"), 1) | |
plugins_errors = subset.map(to_os_plugins) | |
# In[25]: | |
error_counts = plugins_errors.countByKey() | |
error_counts | |
# It looks like we're receiving *dicts* instead of *lists* for activePlugins in some pings. Here's the breakdown, per platform: | |
# | |
# * Darwin - 4769 over 14499 (ratio 0.32) | |
# * Linux - 13910 over 27714 (ratio 0.50) | |
# * Windows - 90009 over 340591 (ratio 0.26) | |
# | |
# Let's also check if the dicts contain any key. | |
# In[21]: | |
def filter_valid_dicts(p): | |
env_addons = p.get("environment/addons", None) | |
if (env_addons is None): | |
return False | |
active_plugins = env_addons["activePlugins"] | |
if (active_plugins is None): | |
return False | |
if (isinstance(active_plugins, list)): | |
return False | |
return True | |
plugins_dicts = subset.filter(filter_valid_dicts) | |
# In[22]: | |
dict_keys_counts = plugins_dicts.map(lambda p: len(p.get("environment/addons", {}).get("activePlugins").keys())) | |
# In[23]: | |
dict_keys_counts.countByValue() | |
# Since we're here, also check if we're receiving pings with empty *lists* as activePlugins (which is what we expect). | |
# In[29]: | |
def filter_empty_lists(p): | |
env_addons = p.get("environment/addons", None) | |
if (env_addons is None): | |
return False | |
active_plugins = env_addons["activePlugins"] | |
if (active_plugins is None): | |
return False | |
if (isinstance(active_plugins, dict)): | |
return False | |
if (len(active_plugins) > 0): | |
return False | |
return True | |
plugin_lists = subset.filter(filter_empty_lists) | |
# In[30]: | |
plugin_lists.count() | |
# Oh, that's weird. It looks like every empty *activePlugin* section contains {} rather than []. | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment