Dexterp37/Bug 1333806 - Investigate pings with missing activePlugins sections.ipynb Secret

## Bug 1333806 - Investigate pings with missing activePlugins sections.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Bug 1333806 - Investigate pings with missing activePlugins sections.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Bug 1333806 - Investigate pings with missing activePlugins sections.py

# coding: utf-8

# Bug 1333806 - Investigate pings with missing activePlugins sections

# In[1]:

import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py

from plotly.graph_objs import *
from moztelemetry import get_pings_properties, get_one_ping_per_client
from moztelemetry.dataset import Dataset

get_ipython().magic(u'matplotlib inline')


# In[5]:

pings = Dataset.from_source("telemetry")     .where(appName='Firefox')     .where(docType='main')     .where(submissionDate= lambda x: "20170101" <= x <= "20170131")     .where(appUpdateChannel="nightly")     .where(sourceVersion="4")     .records(sc, sample=0.1)


# ... and extract only the attributes we need from the Telemetry submissions:

# In[6]:

subset = get_pings_properties(pings, ["clientId",
                                      "environment/system/os/name",
                                      "environment/addons"])


# In[24]:

ping_count = subset.count()


# Let's try to understand what's going on with the activePlugins section coming from the client pings.

# In[13]:

def to_os_plugins(x):
    os_name = x.get("environment/system/os/name", "Unknown")
    env_addons = x.get("environment/addons", None)

    if (env_addons is None):
        return ((os_name, "No env_addons section"), 1)

    if (not isinstance(env_addons, dict)):
        return ((os_name, "Env_addons is not a dict"), 1)

    active_plugins = env_addons["activePlugins"]
    if (active_plugins is None):
        return ((os_name, "No activePlugins"), 1)

    if (isinstance(active_plugins, dict)):
        return ((os_name, "activePlugins is a dict, not a list"), 1)

    if (not isinstance(active_plugins, list)):
        return ((os_name, "activePlugins is not a list either!"), 1)

    if (len(active_plugins) < 1):
        return ((os_name, "empty activePlugins list"), 1)

    return ((os_name, "activePlugins should be fine"), 1)

plugins_errors = subset.map(to_os_plugins)


# In[25]:

error_counts = plugins_errors.countByKey()
error_counts


# It looks like we're receiving *dicts* instead of *lists* for activePlugins in some pings. Here's the breakdown, per platform:
#
# * Darwin - 4769 over 14499 (ratio 0.32)
# * Linux - 13910 over 27714 (ratio 0.50)
# * Windows - 90009 over 340591 (ratio 0.26)
#
# Let's also check if the dicts contain any key.

# In[21]:

def filter_valid_dicts(p):
    env_addons = p.get("environment/addons", None)
    if (env_addons is None):
        return False

    active_plugins = env_addons["activePlugins"]
    if (active_plugins is None):
        return False

    if (isinstance(active_plugins, list)):
        return False

    return True

plugins_dicts = subset.filter(filter_valid_dicts)


# In[22]:

dict_keys_counts = plugins_dicts.map(lambda p: len(p.get("environment/addons", {}).get("activePlugins").keys()))


# In[23]:

dict_keys_counts.countByValue()


# Since we're here, also check if we're receiving pings with empty *lists* as activePlugins (which is what we expect).

# In[29]:

def filter_empty_lists(p):
    env_addons = p.get("environment/addons", None)
    if (env_addons is None):
        return False

    active_plugins = env_addons["activePlugins"]
    if (active_plugins is None):
        return False

    if (isinstance(active_plugins, dict)):
        return False

    if (len(active_plugins) > 0):
        return False

    return True

plugin_lists = subset.filter(filter_empty_lists)


# In[30]:

plugin_lists.count()


# Oh, that's weird. It looks like every empty *activePlugin* section contains {} rather than [].

# In[ ]:

	# coding: utf-8

	# Bug 1333806 - Investigate pings with missing activePlugins sections

	# In[1]:

	import ujson as json
	import matplotlib.pyplot as plt
	import pandas as pd
	import numpy as np
	import plotly.plotly as py

	from plotly.graph_objs import *
	from moztelemetry import get_pings_properties, get_one_ping_per_client
	from moztelemetry.dataset import Dataset

	get_ipython().magic(u'matplotlib inline')


	# In[5]:

	pings = Dataset.from_source("telemetry") .where(appName='Firefox') .where(docType='main') .where(submissionDate= lambda x: "20170101" <= x <= "20170131") .where(appUpdateChannel="nightly") .where(sourceVersion="4") .records(sc, sample=0.1)


	# ... and extract only the attributes we need from the Telemetry submissions:

	# In[6]:

	subset = get_pings_properties(pings, ["clientId",
	"environment/system/os/name",
	"environment/addons"])


	# In[24]:

	ping_count = subset.count()


	# Let's try to understand what's going on with the activePlugins section coming from the client pings.

	# In[13]:

	def to_os_plugins(x):
	os_name = x.get("environment/system/os/name", "Unknown")
	env_addons = x.get("environment/addons", None)

	if (env_addons is None):
	return ((os_name, "No env_addons section"), 1)

	if (not isinstance(env_addons, dict)):
	return ((os_name, "Env_addons is not a dict"), 1)

	active_plugins = env_addons["activePlugins"]
	if (active_plugins is None):
	return ((os_name, "No activePlugins"), 1)

	if (isinstance(active_plugins, dict)):
	return ((os_name, "activePlugins is a dict, not a list"), 1)

	if (not isinstance(active_plugins, list)):
	return ((os_name, "activePlugins is not a list either!"), 1)

	if (len(active_plugins) < 1):
	return ((os_name, "empty activePlugins list"), 1)

	return ((os_name, "activePlugins should be fine"), 1)

	plugins_errors = subset.map(to_os_plugins)


	# In[25]:

	error_counts = plugins_errors.countByKey()
	error_counts


	# It looks like we're receiving dicts instead of lists for activePlugins in some pings. Here's the breakdown, per platform:
	#
	# * Darwin - 4769 over 14499 (ratio 0.32)
	# * Linux - 13910 over 27714 (ratio 0.50)
	# * Windows - 90009 over 340591 (ratio 0.26)
	#
	# Let's also check if the dicts contain any key.

	# In[21]:

	def filter_valid_dicts(p):
	env_addons = p.get("environment/addons", None)
	if (env_addons is None):
	return False

	active_plugins = env_addons["activePlugins"]
	if (active_plugins is None):
	return False

	if (isinstance(active_plugins, list)):
	return False

	return True

	plugins_dicts = subset.filter(filter_valid_dicts)


	# In[22]:

	dict_keys_counts = plugins_dicts.map(lambda p: len(p.get("environment/addons", {}).get("activePlugins").keys()))


	# In[23]:

	dict_keys_counts.countByValue()


	# Since we're here, also check if we're receiving pings with empty lists as activePlugins (which is what we expect).

	# In[29]:

	def filter_empty_lists(p):
	env_addons = p.get("environment/addons", None)
	if (env_addons is None):
	return False

	active_plugins = env_addons["activePlugins"]
	if (active_plugins is None):
	return False

	if (isinstance(active_plugins, dict)):
	return False

	if (len(active_plugins) > 0):
	return False

	return True

	plugin_lists = subset.filter(filter_empty_lists)


	# In[30]:

	plugin_lists.count()


	# Oh, that's weird. It looks like every empty activePlugin section contains {} rather than [].

	# In[ ]: