Skip to content

Instantly share code, notes, and snippets.

@vitillo
Created June 25, 2015 21:40
Show Gist options
  • Save vitillo/df1b5e685d3a1be3095a to your computer and use it in GitHub Desktop.
Save vitillo/df1b5e685d3a1be3095a to your computer and use it in GitHub Desktop.
Signed Dashboard
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"source": "Gets one ping per client who has signable add-ons installed and active. Each of pings is a list of addons for a client", "cell_type": "markdown", "metadata": {}}, {"execution_count": 1, "cell_type": "code", "source": "import matplotlib.pyplot as plt\nimport pandas as pd\nimport numpy as np\n%pylab inline\nfrom IPython.display import display, display_html\n\nimport re\nimport ujson as json\nfrom datetime import datetime, timedelta\nfrom urllib2 import Request, urlopen\n\nfrom moztelemetry.spark import get_pings, get_one_ping_per_client", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "def load_data():\n global bc_amo_extensions \n amo_extensions = dict()\n req = Request(\"http://www.oxymoronical.com/files/allextensions.txt\", headers={'User-Agent' : \"Firefox\"})\n lines = urlopen(req).readlines()[3:-1]\n for line in lines:\n start, id, uid, status, disabled, end = re.sub(r'\\s*\\|\\s*', \"|\", line).split(\"|\")\n amo_extensions[id] = {\n \"status\": status,\n \"disabled\": disabled == 1,\n }\n bc_amo_extensions = sc.broadcast(amo_extensions)\n\nload_data()\n\ndef is_amo(addon):\n return addon[\"id\"] in bc_amo_extensions.value", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def extract(pingstr):\n data = json.loads(pingstr)\n\n ping = {\n \"clientID\": data[\"clientID\"],\n \"os\": data[\"environment\"][\"system\"][\"os\"],\n \"addons\": []\n }\n\n if \"addons\" in data[\"environment\"]:\n activeAddons = data[\"environment\"][\"addons\"][\"activeAddons\"]\n for (id, addon) in activeAddons.iteritems():\n addon = activeAddons[id]\n if \"signedState\" in addon:\n addon[\"id\"] = id\n addon[\"isAMO\"] = is_amo(addon)\n ping[\"addons\"].append(addon)\n\n return ping\n\n# Dates are inclusive so count from one week till yesterday\nnow = datetime.now()\nstart = now - timedelta(7)\nend = now - timedelta(1)\n\npings = get_pings(sc, \n app=\"Firefox\",\n channel=\"nightly\",\n submission_date=(start.strftime(\"%Y%m%d\"), end.strftime(\"%Y%m%d\")),\n build_id=(\"20150603000000\", \"99999999999999\"),\n fraction=0.05)\n\n# Extracts fields\npings = pings.map(extract)\n\n# Reduce to one per client\npings = get_one_ping_per_client(pings)\nostypes = pings.map(lambda p: p[\"os\"][\"name\"]).distinct().collect()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "anyaddons = pings.filter(lambda p: len(p[\"addons\"]) > 0)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "noaddons = pings.filter(lambda p: len(p[\"addons\"]) == 0)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def is_foreign(addon):\n if not \"foreignInstall\" in addon:\n return False\n return addon[\"foreignInstall\"] != \"false\"\n\ndef is_valid(addon):\n if addon[\"signedState\"] <= 0:\n return False\n if is_foreign(addon) and addon[\"signedState\"] < 2:\n return False\n return True\n\ndef reduce_addons(check):\n def mapper(ping):\n ping[\"addons\"] = list(filter(check, ping[\"addons\"]))\n return ping\n\n return mapper\n\ndef check_any_addon(check):\n def reducer(ping):\n for addon in ping[\"addons\"]:\n if check(addon):\n return True\n return False\n\n return reducer\n\ndef check_all_addons(check):\n def reducer(ping):\n for addon in ping[\"addons\"]:\n if not check(addon):\n return False\n return True\n\n return reducer\n\ndef reduce_addon_state(check):\n def mapper(addons):\n return list(filter(check, addons))\n\n return mapper\n\ndef check_any_unsigned(ping):\n for addon in ping[\"addons\"]:\n if not is_valid(addon):\n return True\n return False\n\ndef check_any_signed(ping):\n for addon in ping[\"addons\"]:\n if is_valid(addon):\n return True\n return False", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "allsigned = anyaddons.filter(check_all_addons(is_valid))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "nonesigned = anyaddons.filter(check_all_addons(lambda a: not is_valid(a)))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def check_some_signed(ping): \n return check_any_signed(ping) and check_any_unsigned(ping)\n\nsomesigned = anyaddons.filter(check_some_signed)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def count_by_os(rdd):\n base = { \"All\": 0 }\n for os in ostypes:\n base[os] = rdd.filter(lambda p: p[\"os\"][\"name\"] == os).count()\n base[\"All\"] += base[os]\n return pd.Series(base)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "df = pd.DataFrame({\n \"No Add-ons\": count_by_os(noaddons),\n \"None Signed\": count_by_os(nonesigned),\n \"Some Signed\": count_by_os(somesigned),\n \"All Signed\": count_by_os(allsigned),\n})", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "totals = df.sum(1)\npercent = df.copy()\nfor index in percent:\n percent[index] = 100.0 * percent[index] / totals", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "plt.figure(figsize=(30, 7))\npercent.plot(kind='bar', stacked=True)\nplt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\nplt.show()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "unsigned = anyaddons.filter(check_any_unsigned).map(reduce_addons(lambda a: not is_valid(a)))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "alladdons = unsigned.flatMap(lambda p: p[\"addons\"])\namoaddons = alladdons.filter(lambda a: a[\"isAMO\"])\ndisplay(alladdons.count())\ndisplay(amoaddons.count())", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def sorted_table(map):\n return sorted(map.items(), key = lambda i: i[1], reverse = True)\n\ndef ordered_ids(pings):\n total = pings.count()\n allids = pings.flatMap(lambda p: [a[\"id\"] for a in p[\"addons\"]])\n return (100.0 / total) * pd.Series(allids.countByValue())\n\ndisplay(ordered_ids(unsigned).nlargest(20))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "for os in ostypes:\n display_html(\"<h2>\" + os + \"</h2>\", raw=True)\n idcounts = unsigned.filter(lambda p: p[\"os\"][\"name\"] == os)\n display(ordered_ids(idcounts).nlargest(20))", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment