Skip to content

Instantly share code, notes, and snippets.

@bsmedberg
Created March 8, 2017 23:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bsmedberg/1af70857106bfe29128a0e8d0b656804 to your computer and use it in GitHub Desktop.
Save bsmedberg/1af70857106bfe29128a0e8d0b656804 to your computer and use it in GitHub Desktop.
downgrade-analysis
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# coding: utf-8
# Downgrade and Channel Switching Analysis
# ========================================
#
# authors:
# - bsmedberg
#
# last run: 2017-03-08
#
# ### Longitudinal Dataset Tutorial
# In[1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
get_ipython().magic(u'pylab inline')
# In[2]:
sc.defaultParallelism
# The longitudinal dataset can be accessed as a Spark [DataFrame](https://spark.apache.org/docs/1.5.2/api/python/pyspark.sql.html#pyspark.sql.DataFrame), which is a distributed collection of data organized into named columns. It is conceptually equivalent to a table in a relational database or a data frame in R/Python.
# In[3]:
frame = sqlContext.sql("SELECT client_id, profile_subsession_counter, build, settings FROM longitudinal")
# In[4]:
sc
# In[10]:
def get_version(v):
if v is None:
return None
try:
return int(v.split(".")[0])
except ValueError:
return None
# In[6]:
print get_version("")
print get_version("12.3")
print get_version("12")
print get_version("abcd")
# In[11]:
count_total = sc.accumulator(0)
count_backwardsversion = sc.accumulator(0)
count_channelswitch = sc.accumulator(0)
count_backwardsversion_releaseonly = sc.accumulator(0)
def mapper(row):
channel_switch = False
release_only = True
backwardsversion = False
# sessions are sorted by subsessionStartDate and then profileSubsessionCounter, newest-first.
last_version = 99
last_channel = None
if row.settings is None:
return
for settings in row.settings:
channel = settings.update.channel
if channel != "release":
release_only = False
if last_channel is None:
last_channel = channel
elif last_channel != channel:
channel_switch = True
for build in row.build:
version = get_version(build.version)
if version is not None:
if version > last_version:
backwardsversion = True
last_version = version
count_total.add(1)
if backwardsversion:
count_backwardsversion.add(1)
if release_only:
count_backwardsversion_releaseonly.add(1)
if channel_switch:
count_channelswitch.add(1)
frame.rdd.foreach(mapper)
total = float(count_total.value)
print "users that switched channels at all: {:.2f}%".format(count_channelswitch.value / total * 100)
print "users that reverted to an older version: {:.2f}%".format(count_backwardsversion.value / total * 100)
print "users that reverted to an older version, staying on the release channel: {:.2f}".format(count_backwardsversion_releaseonly.value / total * 100)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment