Created
June 25, 2015 21:40
-
-
Save vitillo/df1b5e685d3a1be3095a to your computer and use it in GitHub Desktop.
Signed Dashboard
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"nbformat_minor": 0, "cells": [{"source": "Gets one ping per client who has signable add-ons installed and active. Each of pings is a list of addons for a client", "cell_type": "markdown", "metadata": {}}, {"execution_count": 1, "cell_type": "code", "source": "import matplotlib.pyplot as plt\nimport pandas as pd\nimport numpy as np\n%pylab inline\nfrom IPython.display import display, display_html\n\nimport re\nimport ujson as json\nfrom datetime import datetime, timedelta\nfrom urllib2 import Request, urlopen\n\nfrom moztelemetry.spark import get_pings, get_one_ping_per_client", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "def load_data():\n global bc_amo_extensions \n amo_extensions = dict()\n req = Request(\"http://www.oxymoronical.com/files/allextensions.txt\", headers={'User-Agent' : \"Firefox\"})\n lines = urlopen(req).readlines()[3:-1]\n for line in lines:\n start, id, uid, status, disabled, end = re.sub(r'\\s*\\|\\s*', \"|\", line).split(\"|\")\n amo_extensions[id] = {\n \"status\": status,\n \"disabled\": disabled == 1,\n }\n bc_amo_extensions = sc.broadcast(amo_extensions)\n\nload_data()\n\ndef is_amo(addon):\n return addon[\"id\"] in bc_amo_extensions.value", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def extract(pingstr):\n data = json.loads(pingstr)\n\n ping = {\n \"clientID\": data[\"clientID\"],\n \"os\": data[\"environment\"][\"system\"][\"os\"],\n \"addons\": []\n }\n\n if \"addons\" in data[\"environment\"]:\n activeAddons = data[\"environment\"][\"addons\"][\"activeAddons\"]\n for (id, addon) in activeAddons.iteritems():\n addon = activeAddons[id]\n if \"signedState\" in addon:\n addon[\"id\"] = id\n addon[\"isAMO\"] = is_amo(addon)\n ping[\"addons\"].append(addon)\n\n return ping\n\n# Dates are inclusive so count from one week till yesterday\nnow = datetime.now()\nstart = now - timedelta(7)\nend = now - timedelta(1)\n\npings = get_pings(sc, \n app=\"Firefox\",\n channel=\"nightly\",\n submission_date=(start.strftime(\"%Y%m%d\"), end.strftime(\"%Y%m%d\")),\n build_id=(\"20150603000000\", \"99999999999999\"),\n fraction=0.05)\n\n# Extracts fields\npings = pings.map(extract)\n\n# Reduce to one per client\npings = get_one_ping_per_client(pings)\nostypes = pings.map(lambda p: p[\"os\"][\"name\"]).distinct().collect()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "anyaddons = pings.filter(lambda p: len(p[\"addons\"]) > 0)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "noaddons = pings.filter(lambda p: len(p[\"addons\"]) == 0)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def is_foreign(addon):\n if not \"foreignInstall\" in addon:\n return False\n return addon[\"foreignInstall\"] != \"false\"\n\ndef is_valid(addon):\n if addon[\"signedState\"] <= 0:\n return False\n if is_foreign(addon) and addon[\"signedState\"] < 2:\n return False\n return True\n\ndef reduce_addons(check):\n def mapper(ping):\n ping[\"addons\"] = list(filter(check, ping[\"addons\"]))\n return ping\n\n return mapper\n\ndef check_any_addon(check):\n def reducer(ping):\n for addon in ping[\"addons\"]:\n if check(addon):\n return True\n return False\n\n return reducer\n\ndef check_all_addons(check):\n def reducer(ping):\n for addon in ping[\"addons\"]:\n if not check(addon):\n return False\n return True\n\n return reducer\n\ndef reduce_addon_state(check):\n def mapper(addons):\n return list(filter(check, addons))\n\n return mapper\n\ndef check_any_unsigned(ping):\n for addon in ping[\"addons\"]:\n if not is_valid(addon):\n return True\n return False\n\ndef check_any_signed(ping):\n for addon in ping[\"addons\"]:\n if is_valid(addon):\n return True\n return False", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "allsigned = anyaddons.filter(check_all_addons(is_valid))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "nonesigned = anyaddons.filter(check_all_addons(lambda a: not is_valid(a)))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def check_some_signed(ping): \n return check_any_signed(ping) and check_any_unsigned(ping)\n\nsomesigned = anyaddons.filter(check_some_signed)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def count_by_os(rdd):\n base = { \"All\": 0 }\n for os in ostypes:\n base[os] = rdd.filter(lambda p: p[\"os\"][\"name\"] == os).count()\n base[\"All\"] += base[os]\n return pd.Series(base)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "df = pd.DataFrame({\n \"No Add-ons\": count_by_os(noaddons),\n \"None Signed\": count_by_os(nonesigned),\n \"Some Signed\": count_by_os(somesigned),\n \"All Signed\": count_by_os(allsigned),\n})", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "totals = df.sum(1)\npercent = df.copy()\nfor index in percent:\n percent[index] = 100.0 * percent[index] / totals", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "plt.figure(figsize=(30, 7))\npercent.plot(kind='bar', stacked=True)\nplt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\nplt.show()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "unsigned = anyaddons.filter(check_any_unsigned).map(reduce_addons(lambda a: not is_valid(a)))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "alladdons = unsigned.flatMap(lambda p: p[\"addons\"])\namoaddons = alladdons.filter(lambda a: a[\"isAMO\"])\ndisplay(alladdons.count())\ndisplay(amoaddons.count())", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def sorted_table(map):\n return sorted(map.items(), key = lambda i: i[1], reverse = True)\n\ndef ordered_ids(pings):\n total = pings.count()\n allids = pings.flatMap(lambda p: [a[\"id\"] for a in p[\"addons\"]])\n return (100.0 / total) * pd.Series(allids.countByValue())\n\ndisplay(ordered_ids(unsigned).nlargest(20))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "for os in ostypes:\n display_html(\"<h2>\" + os + \"</h2>\", raw=True)\n idcounts = unsigned.filter(lambda p: p[\"os\"][\"name\"] == os)\n display(ordered_ids(idcounts).nlargest(20))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment