Last active
February 25, 2016 16:08
-
-
Save Dexterp37/c2e1c1d4de4ba22bc4cf to your computer and use it in GitHub Desktop.
Investigate the biggest fields in opt-out Telemetry Release pings, but first distribute the pings in different buckets depending on their size.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat_minor": 0, "cells": [{"execution_count": 120, "cell_type": "code", "source": "import datetime as dt\nimport ujson as json\nimport pandas as pd\nimport numpy as np\nimport copy as cp\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Some utility functions", "cell_type": "markdown", "metadata": {}}, {"execution_count": 121, "cell_type": "code", "source": "def optout_ping_filter(p):\n # Filter out pings with telemetryEnabled which equals to the string \"false\", so our results don't get skewed.\n telemetry_enabled = p.get(\"environment\", {}).get(\"settings\", {}).get(\"telemetryEnabled\", False)\n # Also make sure to remove those pings that are there due to other bugs.\n undesired_fields = p.get(\"payload\", {}).get(\"simpleMeasurements\", {}).get(\"UITelemetry\", None)\n \n return (undesired_fields == None) and (telemetry_enabled != True) and (telemetry_enabled != \"false\")\n\ndef get_optout_pings(pings):\n return pings.filter(optout_ping_filter)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Get the bulk of the pings for Firefox 41, Release", "cell_type": "markdown", "metadata": {}}, {"execution_count": 122, "cell_type": "code", "source": "build_ids = (\"20150917150946\", \"20151014143721\")\nversions = (\"41.0\", \"41.0.2\")\nlast_weeks = (dt.datetime.now() - dt.timedelta(weeks=1)).strftime(\"%Y%m%d\")\nmain_pings = get_pings(sc,\n app=\"Firefox\",\n channel=\"release\",\n build_id=build_ids,\n version=versions,\n submission_date=last_weeks,\n doc_type=\"main\",\n schema=\"v4\",\n fraction=1.0)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 123, "cell_type": "code", "source": "main_pings.count()", "outputs": [{"execution_count": 123, "output_type": "execute_result", "data": {"text/plain": "806852"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Only keep a subset of the pings.", "cell_type": "markdown", "metadata": {}}, {"execution_count": 124, "cell_type": "code", "source": "filteredSubset = get_optout_pings(main_pings)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 125, "cell_type": "code", "source": "filteredSubset.count()", "outputs": [{"execution_count": 125, "output_type": "execute_result", "data": {"text/plain": "592253"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Define the functions to extract the fields from each ping and map them to their json length in bytes.", "cell_type": "markdown", "metadata": {}}, {"execution_count": 126, "cell_type": "code", "source": "def get_from_ping(ping, path):\n try:\n return reduce(lambda d, k: d[k], path.split(\"/\"), ping)\n except (KeyError, IndexError):\n return None\n\ndef extract_fields_size(ping):\n field_list = [\n \"payload\",\n \"payload/simpleMeasurements\",\n \"payload/simpleMeasurements/UITelemetry\",\n \"payload/histograms\",\n \"payload/keyedHistograms\",\n \"payload/info\",\n \"payload/log\",\n \"payload/addonDetails\",\n \"payload/addonHistograms\",\n \"payload/UIMeasurements\",\n \"payload/webrtc\",\n \"payload/childPayloads\",\n \"payload/chromeHangs\",\n \"payload/threadHangStats\",\n \"payload/fileIOReports\",\n \"payload/lateWrites\",\n \"payload/slowSQL\",\n \"payload/slowSQLstartup\",\n \"environment\",\n \"environment/addons\",\n \"environment/addons/activeAddons\",\n \"environment/addons/activePlugins\",\n \"environment/addons/activeGMPlugins\",\n \"environment/addons/persona\",\n \"environment/addons/theme\",\n \"environent/system\",\n \"environent/system/gfx\",\n \"environment/settings\",\n \"environment/settings/userPrefs\",\n \"environment/partner\",\n ]\n # Build a tuple (field_name, json_field_size) for each field.\n p = cp.deepcopy(ping)\n tuples = [(e, len(json.dumps(get_from_ping(p, e)))) for e in field_list if get_from_ping(p, e)] \n return tuples\n\ndef group_median(field_size_tuple):\n values = list(field_size_tuple[1])\n return (field_size_tuple[0], np.median(values), len(values))\n\ndef get_bucket_stats(pings, topN):\n return pings.flatMap(extract_fields_size).groupByKey().map(group_median).takeOrdered(topN, key=lambda x: -x[1])", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Get the pings in the maximum bucket (Size >= 15.5Kb)", "cell_type": "markdown", "metadata": {}}, {"execution_count": 127, "cell_type": "code", "source": "def get_maximum_bucket(pings):\n return pings.filter(lambda p: p[\"meta\"][\"Size\"] >= 15500)\n\nmaximumBucket = get_maximum_bucket(filteredSubset)\nmaximumBucket.count()", "outputs": [{"execution_count": 127, "output_type": "execute_result", "data": {"text/plain": "34939"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 128, "cell_type": "code", "source": "get_bucket_stats(maximumBucket, 15)", "outputs": [{"execution_count": 128, "output_type": "execute_result", "data": {"text/plain": "[('environment', 12831.0, 34939),\n ('environment/addons', 10982.0, 34939),\n ('environment/addons/activePlugins', 8412.0, 34934),\n ('payload', 4216.0, 34939),\n ('environment/addons/activeAddons', 2548.0, 30628),\n ('payload/histograms', 2041.0, 34939),\n ('payload/simpleMeasurements', 935.0, 34939),\n ('payload/info', 887.0, 34939),\n ('environment/settings', 607.0, 34939),\n ('environment/addons/theme', 277.0, 34543),\n ('payload/keyedHistograms', 211.0, 34939),\n ('environment/addons/activeGMPlugins', 166.0, 34933),\n ('environment/settings/userPrefs', 155.0, 34939),\n ('environment/partner', 19.0, 34939),\n ('environment/addons/persona', 8.0, 2839)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Get the pings in the upper bucket (Size >= 12Kb && < 15.5Kb)", "cell_type": "markdown", "metadata": {}}, {"execution_count": 129, "cell_type": "code", "source": "def get_upper_bucket(pings):\n return pings.filter(lambda p: p[\"meta\"][\"Size\"] >= 12000 and p[\"meta\"][\"Size\"] < 15500)\n\nupperBucket = get_upper_bucket(filteredSubset)\nupperBucket.count()", "outputs": [{"execution_count": 129, "output_type": "execute_result", "data": {"text/plain": "121198"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 130, "cell_type": "code", "source": "get_bucket_stats(upperBucket, 15)", "outputs": [{"execution_count": 130, "output_type": "execute_result", "data": {"text/plain": "[('environment', 9137.0, 121198),\n ('environment/addons', 7329.0, 121198),\n ('environment/addons/activePlugins', 5715.0, 121191),\n ('payload', 3967.0, 121198),\n ('payload/histograms', 2014.0, 121198),\n ('environment/addons/activeAddons', 1040.0, 89311),\n ('payload/simpleMeasurements', 935.0, 121198),\n ('payload/info', 735.0, 121198),\n ('environment/settings', 597.0, 121198),\n ('environment/addons/theme', 277.0, 119153),\n ('payload/keyedHistograms', 211.0, 121198),\n ('environment/addons/activeGMPlugins', 166.0, 121145),\n ('environment/settings/userPrefs', 153.0, 121198),\n ('environment/partner', 19.0, 121198),\n ('environment/addons/persona', 8.0, 6022)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Get the pings in the medium bucket (Size >= 7.5Kb && < 12Kb)", "cell_type": "markdown", "metadata": {}}, {"execution_count": 131, "cell_type": "code", "source": "def get_medium_bucket(pings):\n return pings.filter(lambda p: p[\"meta\"][\"Size\"] >= 7500 and p[\"meta\"][\"Size\"] < 12000)\n\nmediumBucket = get_medium_bucket(filteredSubset)\nmediumBucket.count()", "outputs": [{"execution_count": 131, "output_type": "execute_result", "data": {"text/plain": "382791"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 132, "cell_type": "code", "source": "get_bucket_stats(mediumBucket, 15)", "outputs": [{"execution_count": 132, "output_type": "execute_result", "data": {"text/plain": "[('environment', 5732.0, 382791),\n ('environment/addons', 3985.0, 382791),\n ('payload', 3285.0, 382791),\n ('environment/addons/activePlugins', 2935.0, 382218),\n ('payload/histograms', 1266.0, 382791),\n ('payload/simpleMeasurements', 933.0, 382791),\n ('payload/info', 670.0, 382791),\n ('environment/addons/activeAddons', 645.0, 200384),\n ('environment/settings', 567.0, 382791),\n ('environment/addons/theme', 279.0, 375349),\n ('payload/keyedHistograms', 211.0, 382791),\n ('environment/addons/activeGMPlugins', 166.0, 382151),\n ('environment/settings/userPrefs', 115.0, 382791),\n ('environment/partner', 19.0, 382791),\n ('environment/addons/persona', 8.0, 11003)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment