Skip to content

Instantly share code, notes, and snippets.

@vitillo
Last active August 29, 2015 14:27
Show Gist options
  • Save vitillo/808e1f256063c96966bb to your computer and use it in GitHub Desktop.
Save vitillo/808e1f256063c96966bb to your computer and use it in GitHub Desktop.
Distribution job
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"execution_count": null, "cell_type": "code", "source": "import datetime as dt\nimport operator\nimport pandas as pd\nimport ujson as json\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client\nfrom __future__ import division\n\n%pylab inline", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"source": "### Prepare dataset", "cell_type": "markdown", "metadata": {}}, {"execution_count": null, "cell_type": "code", "source": "yesterday = (dt.datetime.now() - dt.timedelta(days=1)).strftime(\"%Y%m%d\")", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "pings = get_pings(sc, app=\"Firefox\", channel=\"release\", submission_date=yesterday, fraction=1, schema=\"v4\")", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "subset = get_pings_properties(pings, [\"clientId\",\n \"application/channel\",\n \"application/version\",\n \"environment/system/os/name\",\n \"environment/system/os/version\",\n \"environment/system/cpu/count\",\n \"environment/system/memoryMB\",\n \"environment/system/hdd/binary/model\",\n \"environment/system/gfx/adapters\",\n \"environment/settings/telemetryEnabled\"]).\\\n filter(lambda p: p[\"environment/settings/telemetryEnabled\"])", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "def transform(ping): \n channel = ping[\"application/channel\"]\n version = ping[\"application/version\"]\n \n os = ping[\"environment/system/os/name\"]\n os_version = ping[\"environment/system/os/version\"]\n \n memory = ping[\"environment/system/memoryMB\"]\n memory = \"{}-GB\".format(int(round(memory/1000)))\n \n cpucount = ping[\"environment/system/cpu/count\"]\n cpucount = \"{}-cores\".format(cpucount)\n \n adapters = ping[\"environment/system/gfx/adapters\"]\n gfx = \"NA\"\n if adapters:\n for adapter in adapters:\n if adapter.get(\"GPUActive\", False):\n gpu_vendor = adapter.get(\"vendorID\", None)\n\n if gpu_vendor == \"0x10de\":\n gpu_vendor = \"Nvidia-GPU\"\n elif gpu_vendor == \"0x1002\":\n gpu_vendor = \"AMD-GPU\"\n elif gpu_vendor == \"0x8086\":\n gpu_vendor = \"Intel-GPU\"\n else:\n gpu_vendor = \"Other\"\n\n gfx = gpu_vendor \n break\n \n disk = ping[\"environment/system/hdd/binary/model\"]\n if not disk:\n disk = \"NA\"\n else:\n disk = \"SSD\" if \"ssd\" in disk.lower() else \"HDD\"\n \n return \" \".join([\"Firefox\", channel, version, os, os_version, disk, memory, cpucount, gfx])\n\ntransformed = get_one_ping_per_client(subset).map(transform)\ngrouped = pd.Series(transformed.countByValue())", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "!mkdir -p ./output", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "(100*grouped/grouped.sum()).to_csv(\"./output/distribution.csv\")", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment