Skip to content

Instantly share code, notes, and snippets.

@maurodoglio
Created July 8, 2016 18:43
Show Gist options
  • Save maurodoglio/c4bc5f67baeaeb58121f62eb46515148 to your computer and use it in GitHub Desktop.
Save maurodoglio/c4bc5f67baeaeb58121f62eb46515148 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from pyspark.sql import SQLContext\n",
"from pyspark.sql.types import *\n",
"import pandas as pd\n",
"import plotly.plotly as py\n",
"from plotly.graph_objs import *\n",
"\n",
"\n",
"bucket = \"telemetry-parquet\"\n",
"prefix = \"main_summary/v2\"\n",
"dataset = sqlContext.read.load(\"s3://{}/{}\".format(bucket, prefix), \"parquet\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"by_day = dataset.groupBy('submission_date')\n",
"tot_usage = by_day.sum('subsession_length').toPandas()\n",
"num_pings = by_day.count().toPandas()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number of pings per day"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~mozilla/852.embed\" height=\"525\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"num_pings = num_pings.sort_values(by='submission_date')\n",
"ping_data = Data([Scatter(x=num_pings['submission_date'].apply(pd.to_datetime), y=num_pings['count'])])\n",
"py.iplot(ping_data)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number of usage hours per day"
]
},
{
"cell_type": "code",
"execution_count": 138,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\"seamless=\"seamless\" src=\"https://plot.ly/~mozilla/860.embed\" height=\"525\" width=\"100%\"></iframe>"
],
"text/plain": [
"<plotly.tools.PlotlyDisplay object>"
]
},
"execution_count": 138,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"tot_usage = tot_usage.sort_values(by='submission_date')\n",
"usage_data = Data([Scatter(x=tot_usage['submission_date'].apply(pd.to_datetime), y=tot_usage['sum(subsession_length)'] / 3600)])\n",
"py.iplot(usage_data)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
},
"widgets": {
"state": {},
"version": "1.1.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment