Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save Dexterp37/027d18b7e821e5b295a7 to your computer and use it in GitHub Desktop.
Save Dexterp37/027d18b7e821e5b295a7 to your computer and use it in GitHub Desktop.
{"nbformat_minor": 0, "cells": [{"source": "### Impact of Addons and Plugins with no desc, name or version", "cell_type": "markdown", "metadata": {}}, {"execution_count": 1, "cell_type": "code", "source": "import datetime as dt\nimport ujson as json\nimport pandas as pd\nimport numpy as np\nimport copy as cp\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Unable to parse whitelist (/home/hadoop/anaconda/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "build_ids = \"20151029151421\"\nversions = \"42.0\"\nsubmission_dates = (\"20151103\", \"20151105\")\nmain_pings = get_pings(sc,\n app=\"Firefox\",\n channel=\"release\",\n build_id=build_ids,\n version=versions,\n submission_date=submission_dates,\n doc_type=\"main\",\n schema=\"v4\",\n fraction=0.1)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "main_pings.count()", "outputs": [{"execution_count": 3, "output_type": "execute_result", "data": {"text/plain": "9287937"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "subset = get_pings_properties(main_pings, [\"meta/sampleId\",\n \"clientId\",\n \"environment/addons/activeAddons\",\n \"environment/addons/activePlugins\",\n \"environment/addons/theme\",])", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "subset.first()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "### See how many addons have no description", "cell_type": "markdown", "metadata": {}}, {"execution_count": 28, "cell_type": "code", "source": "def get_partial_addon_id(p):\n addons = p.get(\"environment/addons/activeAddons\", None)\n if not addons:\n return \"No addons\"\n \n for addon_id in addons:\n addon = addons[addon_id]\n\n if not \"name\" in addon:\n return addon_id\n\n if not \"description\" in addon:\n return addon_id\n \n if not \"version\" in addon:\n return addon_id\n \n return \"\"", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 29, "cell_type": "code", "source": "partial_addons = subset.map(get_partial_addon_id).collect()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 30, "cell_type": "code", "source": "partial_addons_series = pd.Series(partial_addons)\npartial_addons_series.value_counts()", "outputs": [{"execution_count": 30, "output_type": "execute_result", "data": {"text/plain": " 4872951\nNo addons 3449103\n{4ED1F68A-5463-4931-9384-8FFF5ED91D92} 209604\ndefsearchp@gmail.com 93435\nvirtual_keyboard_07402848C2F6470194F131B0F3DE025E@kaspersky.com 92789\ncontent_blocker@kaspersky.com 41520\n4zffxtbr@www.videodownloadconverter.com 34900\nvirtual_keyboard_294FF26A1D5B455495946778FDE7CEDB@kaspersky.com 26688\njqs@sun.com 25671\nanti_banner@kaspersky.com 22558\n{D19CA586-DD6C-4a0a-96F8-14644F340D60} 13409\nLogMeInClient@logmein.com 12362\nurl_advisor@kaspersky.com 11075\nsweetsearch@gmail.com 10215\narthurj8283@gmail.com 9205\n...\nf@qphK.net 1\n4KNTcylnV@u.org 1\nqF@1.net 1\nqw@uaCDkS.org 1\nL5vnythZ@G5.edu 1\nz@U13bV.com 1\n1W@G.net 1\n_ihknpyiciophtfmmqy@ctfpkpjkfotezswt.edu 1\n{21627405-472e-42c0-8785-185f9c8ded59} 1\nvqbhcnfz_xmugt@rg_crafcsuogd.com 1\nii-iaryia@dknyeiy-.net 1\navria1iiii@tmfcmgouyy.org 1\nK@P5D8D.edu 1\nifjhfxhlixyc_uci@sxgdbxqlqybtylk_.com 1\n50c22d644779d@50c22d64477d6.com 1\nLength: 42796, dtype: int64"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 31, "cell_type": "code", "source": "partial_addons_series.describe()", "outputs": [{"execution_count": 31, "output_type": "execute_result", "data": {"text/plain": "count 9287937\nunique 42796\ntop \nfreq 4872951\ndtype: object"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "### How many clients are affected?", "cell_type": "markdown", "metadata": {}}, {"execution_count": 24, "cell_type": "code", "source": "def has_partial_activeAddon_info(p):\n addons = p.get(\"environment/addons/activeAddons\", None)\n if not addons:\n return False\n\n for addon_id in addons:\n addon = addons[addon_id]\n\n if not \"name\" in addon or not \"description\" in addon or not \"version\" in addon:\n return True\n \n return False\n\ndef has_partial_addon_info(p):\n if has_partial_activeAddon_info(p):\n return True\n \n theme = p.get(\"environment/addons/theme\", None)\n if theme and (not \"name\" in theme or not \"description\" in theme or not \"version\" in theme):\n return True\n\n plugins = p.get(\"environment/addons/activePlugins\", None)\n if plugins:\n for plugin in plugins:\n if not \"name\" in plugin or not \"description\" in plugin or not \"version\" in plugin:\n return True\n \n return False", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 25, "cell_type": "code", "source": "affected_pings = subset.filter(has_partial_addon_info)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 26, "cell_type": "code", "source": "affected_pings.count()", "outputs": [{"execution_count": 26, "output_type": "execute_result", "data": {"text/plain": "1330639"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 27, "cell_type": "code", "source": "affected_clients = get_one_ping_per_client(affected_pings)\naffected_clients.count()", "outputs": [{"execution_count": 27, "output_type": "execute_result", "data": {"text/plain": "810093"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 32, "cell_type": "code", "source": "all_clients = get_one_ping_per_client(subset)\nall_clients.count()", "outputs": [{"execution_count": 32, "output_type": "execute_result", "data": {"text/plain": "5583717"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment