Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dexterp37/30b9ce2e127d0c90a36de93679540c4e to your computer and use it in GitHub Desktop.
Save Dexterp37/30b9ce2e127d0c90a36de93679540c4e to your computer and use it in GitHub Desktop.
Bug 1333806 - Investigate pings with missing activePlugins sections
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# Bug 1333806 - Investigate pings with missing activePlugins sections
# In[1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
from plotly.graph_objs import *
from moztelemetry import get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset
get_ipython().magic(u'matplotlib inline')
# In[5]:
pings = Dataset.from_source("telemetry") .where(appName='Firefox') .where(docType='main') .where(submissionDate= lambda x: "20170101" <= x <= "20170131") .where(appUpdateChannel="nightly") .where(sourceVersion="4") .records(sc, sample=0.1)
# ... and extract only the attributes we need from the Telemetry submissions:
# In[6]:
subset = get_pings_properties(pings, ["clientId",
"environment/system/os/name",
"environment/addons"])
# In[24]:
ping_count = subset.count()
# Let's try to understand what's going on with the activePlugins section coming from the client pings.
# In[13]:
def to_os_plugins(x):
os_name = x.get("environment/system/os/name", "Unknown")
env_addons = x.get("environment/addons", None)
if (env_addons is None):
return ((os_name, "No env_addons section"), 1)
if (not isinstance(env_addons, dict)):
return ((os_name, "Env_addons is not a dict"), 1)
active_plugins = env_addons["activePlugins"]
if (active_plugins is None):
return ((os_name, "No activePlugins"), 1)
if (isinstance(active_plugins, dict)):
return ((os_name, "activePlugins is a dict, not a list"), 1)
if (not isinstance(active_plugins, list)):
return ((os_name, "activePlugins is not a list either!"), 1)
if (len(active_plugins) < 1):
return ((os_name, "empty activePlugins list"), 1)
return ((os_name, "activePlugins should be fine"), 1)
plugins_errors = subset.map(to_os_plugins)
# In[25]:
error_counts = plugins_errors.countByKey()
error_counts
# It looks like we're receiving *dicts* instead of *lists* for activePlugins in some pings. Here's the breakdown, per platform:
#
# * Darwin - 4769 over 14499 (ratio 0.32)
# * Linux - 13910 over 27714 (ratio 0.50)
# * Windows - 90009 over 340591 (ratio 0.26)
#
# Let's also check if the dicts contain any key.
# In[21]:
def filter_valid_dicts(p):
env_addons = p.get("environment/addons", None)
if (env_addons is None):
return False
active_plugins = env_addons["activePlugins"]
if (active_plugins is None):
return False
if (isinstance(active_plugins, list)):
return False
return True
plugins_dicts = subset.filter(filter_valid_dicts)
# In[22]:
dict_keys_counts = plugins_dicts.map(lambda p: len(p.get("environment/addons", {}).get("activePlugins").keys()))
# In[23]:
dict_keys_counts.countByValue()
# Since we're here, also check if we're receiving pings with empty *lists* as activePlugins (which is what we expect).
# In[29]:
def filter_empty_lists(p):
env_addons = p.get("environment/addons", None)
if (env_addons is None):
return False
active_plugins = env_addons["activePlugins"]
if (active_plugins is None):
return False
if (isinstance(active_plugins, dict)):
return False
if (len(active_plugins) > 0):
return False
return True
plugin_lists = subset.filter(filter_empty_lists)
# In[30]:
plugin_lists.count()
# Oh, that's weird. It looks like every empty *activePlugin* section contains {} rather than [].
# In[ ]:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment