Last active
December 18, 2015 12:47
-
-
Save Dexterp37/027d18b7e821e5b295a7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat_minor": 0, "cells": [{"source": "### Impact of Addons and Plugins with no desc, name or version", "cell_type": "markdown", "metadata": {}}, {"execution_count": 1, "cell_type": "code", "source": "import datetime as dt\nimport ujson as json\nimport pandas as pd\nimport numpy as np\nimport copy as cp\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Unable to parse whitelist (/home/hadoop/anaconda/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "build_ids = \"20151029151421\"\nversions = \"42.0\"\nsubmission_dates = (\"20151103\", \"20151105\")\nmain_pings = get_pings(sc,\n app=\"Firefox\",\n channel=\"release\",\n build_id=build_ids,\n version=versions,\n submission_date=submission_dates,\n doc_type=\"main\",\n schema=\"v4\",\n fraction=0.1)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 3, "cell_type": "code", "source": "main_pings.count()", "outputs": [{"execution_count": 3, "output_type": "execute_result", "data": {"text/plain": "9287937"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "subset = get_pings_properties(main_pings, [\"meta/sampleId\",\n \"clientId\",\n \"environment/addons/activeAddons\",\n \"environment/addons/activePlugins\",\n \"environment/addons/theme\",])", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "subset.first()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "### See how many addons have no description", "cell_type": "markdown", "metadata": {}}, {"execution_count": 28, "cell_type": "code", "source": "def get_partial_addon_id(p):\n addons = p.get(\"environment/addons/activeAddons\", None)\n if not addons:\n return \"No addons\"\n \n for addon_id in addons:\n addon = addons[addon_id]\n\n if not \"name\" in addon:\n return addon_id\n\n if not \"description\" in addon:\n return addon_id\n \n if not \"version\" in addon:\n return addon_id\n \n return \"\"", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 29, "cell_type": "code", "source": "partial_addons = subset.map(get_partial_addon_id).collect()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 30, "cell_type": "code", "source": "partial_addons_series = pd.Series(partial_addons)\npartial_addons_series.value_counts()", "outputs": [{"execution_count": 30, "output_type": "execute_result", "data": {"text/plain": " 4872951\nNo addons 3449103\n{4ED1F68A-5463-4931-9384-8FFF5ED91D92} 209604\ndefsearchp@gmail.com 93435\nvirtual_keyboard_07402848C2F6470194F131B0F3DE025E@kaspersky.com 92789\ncontent_blocker@kaspersky.com 41520\n4zffxtbr@www.videodownloadconverter.com 34900\nvirtual_keyboard_294FF26A1D5B455495946778FDE7CEDB@kaspersky.com 26688\njqs@sun.com 25671\nanti_banner@kaspersky.com 22558\n{D19CA586-DD6C-4a0a-96F8-14644F340D60} 13409\nLogMeInClient@logmein.com 12362\nurl_advisor@kaspersky.com 11075\nsweetsearch@gmail.com 10215\narthurj8283@gmail.com 9205\n...\nf@qphK.net 1\n4KNTcylnV@u.org 1\nqF@1.net 1\nqw@uaCDkS.org 1\nL5vnythZ@G5.edu 1\nz@U13bV.com 1\n1W@G.net 1\n_ihknpyiciophtfmmqy@ctfpkpjkfotezswt.edu 1\n{21627405-472e-42c0-8785-185f9c8ded59} 1\nvqbhcnfz_xmugt@rg_crafcsuogd.com 1\nii-iaryia@dknyeiy-.net 1\navria1iiii@tmfcmgouyy.org 1\nK@P5D8D.edu 1\nifjhfxhlixyc_uci@sxgdbxqlqybtylk_.com 1\n50c22d644779d@50c22d64477d6.com 1\nLength: 42796, dtype: int64"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 31, "cell_type": "code", "source": "partial_addons_series.describe()", "outputs": [{"execution_count": 31, "output_type": "execute_result", "data": {"text/plain": "count 9287937\nunique 42796\ntop \nfreq 4872951\ndtype: object"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "### How many clients are affected?", "cell_type": "markdown", "metadata": {}}, {"execution_count": 24, "cell_type": "code", "source": "def has_partial_activeAddon_info(p):\n addons = p.get(\"environment/addons/activeAddons\", None)\n if not addons:\n return False\n\n for addon_id in addons:\n addon = addons[addon_id]\n\n if not \"name\" in addon or not \"description\" in addon or not \"version\" in addon:\n return True\n \n return False\n\ndef has_partial_addon_info(p):\n if has_partial_activeAddon_info(p):\n return True\n \n theme = p.get(\"environment/addons/theme\", None)\n if theme and (not \"name\" in theme or not \"description\" in theme or not \"version\" in theme):\n return True\n\n plugins = p.get(\"environment/addons/activePlugins\", None)\n if plugins:\n for plugin in plugins:\n if not \"name\" in plugin or not \"description\" in plugin or not \"version\" in plugin:\n return True\n \n return False", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 25, "cell_type": "code", "source": "affected_pings = subset.filter(has_partial_addon_info)", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 26, "cell_type": "code", "source": "affected_pings.count()", "outputs": [{"execution_count": 26, "output_type": "execute_result", "data": {"text/plain": "1330639"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 27, "cell_type": "code", "source": "affected_clients = get_one_ping_per_client(affected_pings)\naffected_clients.count()", "outputs": [{"execution_count": 27, "output_type": "execute_result", "data": {"text/plain": "810093"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 32, "cell_type": "code", "source": "all_clients = get_one_ping_per_client(subset)\nall_clients.count()", "outputs": [{"execution_count": 32, "output_type": "execute_result", "data": {"text/plain": "5583717"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment