Skip to content

Instantly share code, notes, and snippets.

@Dexterp37
Last active February 25, 2016 16:08
Show Gist options
  • Save Dexterp37/c2e1c1d4de4ba22bc4cf to your computer and use it in GitHub Desktop.
Save Dexterp37/c2e1c1d4de4ba22bc4cf to your computer and use it in GitHub Desktop.
Investigate the biggest fields in opt-out Telemetry Release pings, but first distribute the pings in different buckets depending on their size.
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"execution_count": 120, "cell_type": "code", "source": "import datetime as dt\nimport ujson as json\nimport pandas as pd\nimport numpy as np\nimport copy as cp\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Some utility functions", "cell_type": "markdown", "metadata": {}}, {"execution_count": 121, "cell_type": "code", "source": "def optout_ping_filter(p):\n # Filter out pings with telemetryEnabled which equals to the string \"false\", so our results don't get skewed.\n telemetry_enabled = p.get(\"environment\", {}).get(\"settings\", {}).get(\"telemetryEnabled\", False)\n # Also make sure to remove those pings that are there due to other bugs.\n undesired_fields = p.get(\"payload\", {}).get(\"simpleMeasurements\", {}).get(\"UITelemetry\", None)\n \n return (undesired_fields == None) and (telemetry_enabled != True) and (telemetry_enabled != \"false\")\n\ndef get_optout_pings(pings):\n return pings.filter(optout_ping_filter)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Get the bulk of the pings for Firefox 41, Release", "cell_type": "markdown", "metadata": {}}, {"execution_count": 122, "cell_type": "code", "source": "build_ids = (\"20150917150946\", \"20151014143721\")\nversions = (\"41.0\", \"41.0.2\")\nlast_weeks = (dt.datetime.now() - dt.timedelta(weeks=1)).strftime(\"%Y%m%d\")\nmain_pings = get_pings(sc,\n app=\"Firefox\",\n channel=\"release\",\n build_id=build_ids,\n version=versions,\n submission_date=last_weeks,\n doc_type=\"main\",\n schema=\"v4\",\n fraction=1.0)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 123, "cell_type": "code", "source": "main_pings.count()", "outputs": [{"execution_count": 123, "output_type": "execute_result", "data": {"text/plain": "806852"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Only keep a subset of the pings.", "cell_type": "markdown", "metadata": {}}, {"execution_count": 124, "cell_type": "code", "source": "filteredSubset = get_optout_pings(main_pings)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 125, "cell_type": "code", "source": "filteredSubset.count()", "outputs": [{"execution_count": 125, "output_type": "execute_result", "data": {"text/plain": "592253"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "Define the functions to extract the fields from each ping and map them to their json length in bytes.", "cell_type": "markdown", "metadata": {}}, {"execution_count": 126, "cell_type": "code", "source": "def get_from_ping(ping, path):\n try:\n return reduce(lambda d, k: d[k], path.split(\"/\"), ping)\n except (KeyError, IndexError):\n return None\n\ndef extract_fields_size(ping):\n field_list = [\n \"payload\",\n \"payload/simpleMeasurements\",\n \"payload/simpleMeasurements/UITelemetry\",\n \"payload/histograms\",\n \"payload/keyedHistograms\",\n \"payload/info\",\n \"payload/log\",\n \"payload/addonDetails\",\n \"payload/addonHistograms\",\n \"payload/UIMeasurements\",\n \"payload/webrtc\",\n \"payload/childPayloads\",\n \"payload/chromeHangs\",\n \"payload/threadHangStats\",\n \"payload/fileIOReports\",\n \"payload/lateWrites\",\n \"payload/slowSQL\",\n \"payload/slowSQLstartup\",\n \"environment\",\n \"environment/addons\",\n \"environment/addons/activeAddons\",\n \"environment/addons/activePlugins\",\n \"environment/addons/activeGMPlugins\",\n \"environment/addons/persona\",\n \"environment/addons/theme\",\n \"environent/system\",\n \"environent/system/gfx\",\n \"environment/settings\",\n \"environment/settings/userPrefs\",\n \"environment/partner\",\n ]\n # Build a tuple (field_name, json_field_size) for each field.\n p = cp.deepcopy(ping)\n tuples = [(e, len(json.dumps(get_from_ping(p, e)))) for e in field_list if get_from_ping(p, e)] \n return tuples\n\ndef group_median(field_size_tuple):\n values = list(field_size_tuple[1])\n return (field_size_tuple[0], np.median(values), len(values))\n\ndef get_bucket_stats(pings, topN):\n return pings.flatMap(extract_fields_size).groupByKey().map(group_median).takeOrdered(topN, key=lambda x: -x[1])", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Get the pings in the maximum bucket (Size >= 15.5Kb)", "cell_type": "markdown", "metadata": {}}, {"execution_count": 127, "cell_type": "code", "source": "def get_maximum_bucket(pings):\n return pings.filter(lambda p: p[\"meta\"][\"Size\"] >= 15500)\n\nmaximumBucket = get_maximum_bucket(filteredSubset)\nmaximumBucket.count()", "outputs": [{"execution_count": 127, "output_type": "execute_result", "data": {"text/plain": "34939"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 128, "cell_type": "code", "source": "get_bucket_stats(maximumBucket, 15)", "outputs": [{"execution_count": 128, "output_type": "execute_result", "data": {"text/plain": "[('environment', 12831.0, 34939),\n ('environment/addons', 10982.0, 34939),\n ('environment/addons/activePlugins', 8412.0, 34934),\n ('payload', 4216.0, 34939),\n ('environment/addons/activeAddons', 2548.0, 30628),\n ('payload/histograms', 2041.0, 34939),\n ('payload/simpleMeasurements', 935.0, 34939),\n ('payload/info', 887.0, 34939),\n ('environment/settings', 607.0, 34939),\n ('environment/addons/theme', 277.0, 34543),\n ('payload/keyedHistograms', 211.0, 34939),\n ('environment/addons/activeGMPlugins', 166.0, 34933),\n ('environment/settings/userPrefs', 155.0, 34939),\n ('environment/partner', 19.0, 34939),\n ('environment/addons/persona', 8.0, 2839)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Get the pings in the upper bucket (Size >= 12Kb && < 15.5Kb)", "cell_type": "markdown", "metadata": {}}, {"execution_count": 129, "cell_type": "code", "source": "def get_upper_bucket(pings):\n return pings.filter(lambda p: p[\"meta\"][\"Size\"] >= 12000 and p[\"meta\"][\"Size\"] < 15500)\n\nupperBucket = get_upper_bucket(filteredSubset)\nupperBucket.count()", "outputs": [{"execution_count": 129, "output_type": "execute_result", "data": {"text/plain": "121198"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 130, "cell_type": "code", "source": "get_bucket_stats(upperBucket, 15)", "outputs": [{"execution_count": 130, "output_type": "execute_result", "data": {"text/plain": "[('environment', 9137.0, 121198),\n ('environment/addons', 7329.0, 121198),\n ('environment/addons/activePlugins', 5715.0, 121191),\n ('payload', 3967.0, 121198),\n ('payload/histograms', 2014.0, 121198),\n ('environment/addons/activeAddons', 1040.0, 89311),\n ('payload/simpleMeasurements', 935.0, 121198),\n ('payload/info', 735.0, 121198),\n ('environment/settings', 597.0, 121198),\n ('environment/addons/theme', 277.0, 119153),\n ('payload/keyedHistograms', 211.0, 121198),\n ('environment/addons/activeGMPlugins', 166.0, 121145),\n ('environment/settings/userPrefs', 153.0, 121198),\n ('environment/partner', 19.0, 121198),\n ('environment/addons/persona', 8.0, 6022)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Get the pings in the medium bucket (Size >= 7.5Kb && < 12Kb)", "cell_type": "markdown", "metadata": {}}, {"execution_count": 131, "cell_type": "code", "source": "def get_medium_bucket(pings):\n return pings.filter(lambda p: p[\"meta\"][\"Size\"] >= 7500 and p[\"meta\"][\"Size\"] < 12000)\n\nmediumBucket = get_medium_bucket(filteredSubset)\nmediumBucket.count()", "outputs": [{"execution_count": 131, "output_type": "execute_result", "data": {"text/plain": "382791"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 132, "cell_type": "code", "source": "get_bucket_stats(mediumBucket, 15)", "outputs": [{"execution_count": 132, "output_type": "execute_result", "data": {"text/plain": "[('environment', 5732.0, 382791),\n ('environment/addons', 3985.0, 382791),\n ('payload', 3285.0, 382791),\n ('environment/addons/activePlugins', 2935.0, 382218),\n ('payload/histograms', 1266.0, 382791),\n ('payload/simpleMeasurements', 933.0, 382791),\n ('payload/info', 670.0, 382791),\n ('environment/addons/activeAddons', 645.0, 200384),\n ('environment/settings', 567.0, 382791),\n ('environment/addons/theme', 279.0, 375349),\n ('payload/keyedHistograms', 211.0, 382791),\n ('environment/addons/activeGMPlugins', 166.0, 382151),\n ('environment/settings/userPrefs', 115.0, 382791),\n ('environment/partner', 19.0, 382791),\n ('environment/addons/persona', 8.0, 11003)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment