Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save bcolloran/0660e7458d065e1778a3 to your computer and use it in GitHub Desktop.
Save bcolloran/0660e7458d065e1778a3 to your computer and use it in GitHub Desktop.
clean and save v4 histories (testing).ipynb
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"execution_count": 4, "cell_type": "code", "source": "import ujson as json\nimport matplotlib.pyplot as plt\nimport pandas as pd\nimport numpy as np\nimport plotly.plotly as py\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n\n%pylab inline", "outputs": [{"output_type": "stream", "name": "stdout", "text": "Populating the interactive namespace from numpy and matplotlib\n"}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 13, "cell_type": "code", "source": "numCores = sc.defaultParallelism\nnumCores", "outputs": [{"execution_count": 13, "output_type": "execute_result", "data": {"text/plain": "80"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 6, "cell_type": "code", "source": "histories = get_clients_history(sc, fraction = 0.5)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 14, "cell_type": "code", "source": "histories.count()", "outputs": [{"execution_count": 14, "output_type": "execute_result", "data": {"text/plain": "41578"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 15, "cell_type": "code", "source": "histories.getNumPartitions()", "outputs": [{"execution_count": 15, "output_type": "execute_result", "data": {"text/plain": "41578"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "h = histories.first()", "cell_type": "markdown", "metadata": {}}, {"source": "#Filter and extraction settings", "cell_type": "markdown", "metadata": {}}, {"execution_count": 7, "cell_type": "code", "source": "# in each history, keep pings with creationDate and buildId that\n# fall within the following intervals\n# (using non-strict inequalities-- keep the interval boundaries)\ncreationDateBounds = (\"2015-06-10\",\"2015-07-08\")\nbuildIdBounds = (\"20150610000000\", \"99990507000000\")\n\n# keep only the follow paths within each ping\nv4PathsToV2fields = ['clientId',\n 'meta/appUpdateChannel',\n 'id',\n 'environment',\n 'application',\n 'version',\n 'creationDate',\n 'type',\n 'payload/info',\n 'payload/simpleMeasurements/activeTicks',\n 'payload/simpleMeasurements/totalTime',\n 'payload/simpleMeasurements/main',\n 'payload/simpleMeasurements/firstPaint',\n 'payload/simpleMeasurements/sessionRestored',\n 'payload/histograms/PLACES_PAGES_COUNT',\n 'payload/histograms/PLACES_BOOKMARKS_COUNT',\n 'payload/keyedHistograms/SEARCH_COUNTS',\n 'payload/keyedHistograms/SEARCH_DEFAULT_ENGINE']\n", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "##filter functions", "cell_type": "markdown", "metadata": {}}, {"execution_count": 8, "cell_type": "code", "source": "def getDictPaths(d, paths):\n result = {}\n for path in paths:\n pathItems = path.split(\"/\")\n out = d.get(pathItems.pop(0),{})\n while pathItems:\n out = out.get(pathItems.pop(0),{})\n if out=={}:\n out=\"MISSING\"\n result[path] = out\n return result\n\n\ndef getPathsForDictList(dl,paths):\n return [getDictPaths(d,paths) for d in dl]", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 9, "cell_type": "code", "source": "def dropDupePings(pingListIn):\n pingsAdded = []\n pingListOut = []\n for s in pingListIn:\n if s['id'] not in pingsAdded:\n pingsAdded.append(s['id'])\n pingListOut.append(s)\n return pingListOut", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 10, "cell_type": "code", "source": "def filterPingListByBuildAndDate(pl,build,dates):\n return [p for p in pl\n if ((build[0]<=p[\"application\"][\"buildId\"]<=build[1])\n and (dates[0]<=p[\"creationDate\"][:10]<=dates[1]))]", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "hc = dropDupePings(getPathsForDictList(h,v4PathsToV2fields))", "cell_type": "markdown", "metadata": {}}, {"source": "len(hc)", "cell_type": "markdown", "metadata": {}}, {"source": "hcf = sorted(filterPingListByBuildAndDate(hc,buildIdBounds,creationDateBounds),\n key = lambda p:p[\"creationDate\"])", "cell_type": "markdown", "metadata": {}}, {"source": "len(hcf)", "cell_type": "markdown", "metadata": {}}, {"source": "buildIdBounds[0],u'20150604162752',buildIdBounds[0]<=u'20150604162752'\nbuildIdBounds[0]<=hc[0][\"application\"][\"buildId\"]<=buildIdBounds[1]", "cell_type": "markdown", "metadata": {}}, {"source": "hcf[0].keys()\nhcf[0]['payload/info']\nhcf[0]", "cell_type": "markdown", "metadata": {}}, {"source": "#Filter and trim the data", "cell_type": "markdown", "metadata": {}}, {"execution_count": 16, "cell_type": "code", "source": "historiesTrimmed = histories.map(lambda h: dropDupePings(getPathsForDictList(h,v4PathsToV2fields))) \\\n .map(lambda h: filterPingListByBuildAndDate(h,buildIdBounds,creationDateBounds)) \\\n .filter(lambda h: len(h)>0) \\\n .repartition(500) \\\n .cache()\n", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Summarize the trimmed and filtered data", "cell_type": "markdown", "metadata": {}}, {"execution_count": 21, "cell_type": "code", "source": "historiesTrimmed.getNumPartitions()", "outputs": [{"execution_count": 21, "output_type": "execute_result", "data": {"text/plain": "500"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 17, "cell_type": "code", "source": "historyLengths = historiesTrimmed.map(lambda h: len(h)).collect()", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 30, "cell_type": "code", "source": "import collections\n\npd.Series(historyLengths).plot(kind=\"hist\", bins=100, figsize=(15, 7))\nplt.title(\"Number of fragments per client\")\nplt.ylabel(\"count\")\n{\"mean\": np.mean(historyLengths),\n \"med\": np.median(historyLengths),\n \"max\": np.max(historyLengths),\n \"mode\": max(collections.Counter(historyLengths).items(),key = lambda x:x[1])[0],\n \"numInSample\": len(historyLengths)}", "outputs": [{"execution_count": 30, "output_type": "execute_result", "data": {"text/plain": "{'max': 967,\n 'mean': 50.8350860571609,\n 'med': 30.0,\n 'mode': 1,\n 'numInSample': 31665}"}, "metadata": {}}, {"output_type": "display_data", "data": {"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4oAAAGxCAYAAAAkr0i/AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3X+0X3V95/vnC5AKikbURkBq0AmFzNVGuTW2Ti/RsVym\nteC0t/yYkQZFpwxjdWY6vYJ3TbF2tHRueytdXlhTfxFsi5eqZcVeRCIlnU5rTVWwSMwCvA2SaKJV\nFLVaobzvH3sf8/2enOQk5Ozv+ZzzfT7WOiv7s/f+7u/nHF+SvM/+vPc3VYUkSZIkSTOOWOwJSJIk\nSZLaYqEoSZIkSRpjoShJkiRJGmOhKEmSJEkaY6EoSZIkSRpjoShJkiRJGmOhKEk6bEmuS/Lri/j+\n703ytSR/tZ/j/yXJV5J8cdJzEyTZkeSl/fabkrxzseckSTowC0VJWob6f5jvSXLsyL7XJLl9oLes\n/mvikvwE8DLgxKp60RzHfwj4j8BpVXXipOe3kJKsT/LAYs/jMfh+NqrqbVX12sO9YJJVSR5N4r9l\nJGkA/sdVkpavI4A3TPD9siAXOfR/+D8L2FFV393P8R8CvlpVX93P+x11iO839Rr7mS1I7iRJ4ywU\nJWl5KuC3gP+U5MmzD851NybJliSX9NsXJ/mLJP9XkgeT3Jfkx5O8KskX+ruVvzDrsk9LcmuSh/pr\n/dDItU9LsjnJV5NsT/LzI8euS3JtkpuTfAtYP8d8T0yyqX/9vUle0++/BHgn8GNJvpnkylmvexlw\nK3Bif/w9SZ7Vf++vTnI/8LH+3D9K8qUkX0/yZ0nWjFznqUk+nOQbSbb2S1n/fOT4o0n+bT+3h5K8\nJclzkny8v977kzxu5PyXJ7mz/9n+RZLnjhzbkeSXk3xm5LU/kOQJwEdGvpeHkjwjyQuTfLKf2+4k\nvz1XIPq7kTuTXNEvw/3bJP9q5PgPJPmtJPf317k2yeNnvfZ/T/Il4N37eY/XJtnWz+3uJGvnOOfN\nSd43Mn5Rkr/sfxZ3Jjlz5NiW/mf5P/prfjTJU/vD/73/8+v9z2PdXHOSJD02FoqStHx9EtgC/KeD\nPH/28tEXAp8BjgduAG4EXgA8B3gl8I7sXdoa4F8DbwGeBtwJ/AFAX+BsBn4feDpwAXBNktNH3utC\n4Ner6onAX8wxt/cDXwBOAP434G1JXlJV7wYuBT5eVcdV1a+NfUNVHwP+BfDF/vir2XsH6n8BTgP+\n1378/wL/pJ/jp2fm3/u/gW8CK4ENwC+w71Lbs4DnAy8C3khXwF5Id0fzuf02SZ5PV2i9tv/Z/jdg\n00ghWcDP9/M6BXgecHFVfRs4e+R7eVJV7QauBn6nqp4MPJvuf6f9WQk8FTix/z5+L8mp/bGr+u//\nR/o/TwJ+ddZrn9J/P784+8J98X8lcFFVPQk4B/jaHHOokdecBPwJ8JaqegpdVj84UgzS/9wuBn4Q\nOJq9ef6J/s8n9z+PTxzg+5YkHSILRUlavoruH/q/lORpj+H1f1tVG6uq6IqPE+n+Qf9wVW0GvkdX\nUMz4k6r6H1X1PeD/oLvL90zg5SPXerSq7gQ+RFcMzbipqj4OUFX/MDqJJCcDPw68saq+V1WfAd5F\nV6zB/EsP93f8zVX1nZn3q6rrqurbVfUw8GvAjyQ5LsmRwM8CV1bVd6vqc8DGOa77X6vqW1W1DbgL\n+EhV7aiqh+juBD6/P+/fAP+tqv66OtcD/0BXYM743araXVUPAh8GZu7MzfW9fA9YneRpVfX3B1Ew\n/ef+f8P/Tlccn5ckdIXrf6yqr1fVt4DfoCvqZzza/wwe3s8y39cAv1lVnwKoqs9X1RfmOG/0e3gl\ncHNV3dK/5mN0v+D46f54Ae+tqvv697xxnp+FJGmBWChK0jJWVXfT3bG5nEN/2Myeke3v9Nf7yqx9\nT5x5K2DnyPt+m+5u0ol0PYTr+qWFDyZ5EPhXdHeoZl57oAe0nAh8rb/mjC/Q3fE6HN9/zyRHJLkq\n3RLbbwB/28/raXR3GI+aNced7Gv2z2t0/F3gCf32s4BfnvXzeCbd9zlj96xrPZH9uwQ4Ffhcvyz2\npw9w7oNV9Z2R8f10d2mfBhwLfGpkTh/p98/4Sv9LgP15JvD5Axyfy7OAn5/1s3gx8IyRcw7lZyFJ\nWiAtNaNLkoZxJd1SytHetZmi61jgW/326D/OD1WAk78/SJ5It6xyF11R92dVddZjvPYXgeOTPLG/\n0wXd8se5irVDMVo4/2u6pZL/vKruT7KCrtAN8BXgEbrv797+/JM5NKPv9QXgrVX1tsOcc7ej6j66\nwpskPwd8IMnxswrCGU9JcmxV/X0/fhbwN8Df0RVha6rqSwf73rM8wPgd5oPxBeB9VfVvDvF1BzMf\nSdJh8I6iJC1zVfV54P9h5Amo/Z3BXcBFSY5M8mq63sPD8VNJXpzkaODX6foGd9Etbzw1ySuTPK7/\n+tEkp/WvO+ASwqp6APhL4Df6B648D3g1Xc/jQnki3fLPr/U9ld8v4qrqH+mWyr45yTH9vC9i/kIl\ns7Znxu8ELu0fQpMkT0jy031xPZ89wFOTPOn7F+5+rk/vh9/o5/XoAa7xa/3/Bj9Bt8Tzj/rlxe8E\n3j5zrSQnJTmU4v5ddA9PekH/ff2TjDzQaD9+H/iZJGf1OXx8/+Cc0bvF+8vHV+i+z8PNrSRpDhaK\nkjQd3kJ393C0uHkt8Ct0d5PWMP4Qmbk+F/FAhVHRPfzlSuCrdP14rwSoqm/SPejlArri9Et0/W9H\nH+C9ZrsQWEV3d/FDwK9W1Z8ewuvn+16up1uGuQv4LPDxWee8Dngy3TLIjXQP9xldhjnX+9es7QLo\ne/heC7yD7q7lvcz9cJy5Xru9f+//L8nXkpxA99Cbzyb5JvA7wAWz+zxH7AYepPs5vg/4xaq6pz/2\nRuA+4K/65beb6Za0Huh73Huw6gPAW4E/BB6i+9/pKfN8PzuBc4E3AV+mu8P4y4wXh/v7Of59/35/\n0S9bfeGB5idJOjTpfok40MWTN9A1twd4Z1VdneR4ut9sPwvYAZxXVV/vz7+C7rfE/wi8vqpu7fef\nAVwHPJ6u6X2SnwsmSdKYJL8J/GBVvWqx53KwkqynW+Z5qMtmJUlTaLA7ikn+J7oi8UfpHrX98iTP\noXugwuaqOhW4rR+T7vOqzqf7rfbZdI9On/mN4rXAJVW1mu7JbmcPNW9JkmZL8sNJntcvqXwh3S81\n/3ix5yVJ0lCGXHp6GvCJ/lHi/wj8GfBzdA8L2NifsxF4Rb99LnBD/9jtHXTLX9b1y2qOq6qt/XnX\nj7xGkqRJOA74IN2Df94P/FZVbVrcKT0mPgBGknRQhnzq6WeBt/ZLTb8L/BTdZyOtrKqZR4bvYe/j\n0U8E/mrk9TvpHn3+MONPttvF4T8SXZKkg1ZVnwRWL/Y8DkdVbaF7WqwkSfMarFCsqu19D8etdI9h\nv5Ou93D0nEqyYL/dXMhrSZIkSdJSVFUHfKL4wRj0cxSr6j3AewCSvJXuzuCeJM+oqt39stIv96fv\nYvxzqZ7Zn7+r3x7dv+sA73nYPxRpoSV5c1W9ebHnIc1mNtUy86lWmU21bKFung368RhJfrD/84eA\nn6V7ZPYmYEN/ygbgpn57E3BBkqOTnEK3xGdrVe0GHkqyrn+4zUUjr5GWilWLPQFpP1Yt9gSkA1i1\n2BOQ9mPVYk9AGtqgdxSBDyR5Kl2f4WVV9Y0kVwE3JrmE/uMxAKpqW5IbgW3AI/35M9XwZXQfj3EM\n3cdj3DLwvCVJkiRpag36OYqTlqRceqoWJVnfP0hCaorZVMvMp1plNtWyhaqJLBQlSZIkaZlYqJpo\n0B5FSZ0k6xd7DtJczKZaZj7VKrOpaWChKEmSJEka49JTSZIkSVomXHoqSZIkSRqEhaI0AfYyqFVm\nUy0zn2qV2dQ0sFCUJEmSJI2xR1GSJEmSlgl7FCVJkiRJg7BQlCbAXga1ymyqZeZTrTKbmgYWipIk\nSZKkMfYoSpIkSdIyYY+iJEmSJGkQForSBNjLoFaZTbXMfKpVZlPTwEJRkiRJkjTGHkVJkiRJWibs\nUZQkSZIkDcJCUZoAexnUKrOplplPtcpsahpYKEqSJEmSxiy7HkU47iv7P+Phr1Z95/TJzUiSJEmS\nJmehehSXYaH4+f0c/QbwY9+p+u6xk5yTJEmSJE2KD7PZr2fv52vVIs5J085eBrXKbKpl5lOtMpua\nBsuwUJQkSZIkHY5luPR0f9/Pg8AJLj2VJEmStGy59FSSJEmSNAgLRWkC7GVQq8ymWmY+1SqzqWlg\noShJkiRJGmOPoiRJkiQtE/YoSpIkSZIGYaEoTYC9DGqV2VTLzKdaZTY1DSwUJUmSJElj7FGUJEmS\npGXCHkVJkiRJ0iAsFKUJsJdBrTKbapn5VKvMpqaBhaIkSZIkacyghWKSK5LcneSuJH+Y5AeSHJ9k\nc5J7ktyaZMWs8+9Nsj3JWSP7z+ivcW+Sq4ecszSEqtqy2HOQ5mI21TLzqVaZTU2DwQrFJKuA1wIv\nqKrnAkcCFwCXA5ur6lTgtn5MkjXA+cAa4GzgmiQzTZjXApdU1WpgdZKzh5q3JEmSJE27Ie8oPgQ8\nDByb5CjgWOCLwDnAxv6cjcAr+u1zgRuq6uGq2gHcB6xLcgJwXFVt7c+7fuQ10pJgL4NaZTbVMvOp\nVplNTYPBCsWq+hrw28AX6ArEr1fVZmBlVe3pT9sDrOy3TwR2jlxiJ3DSHPt39fslSZIkSQM4aqgL\nJ3kO8O+BVcA3gD9K8srRc6qqus8+XEgX928JsAJYC6zvx48ekWT9zLrymd8GOXY89LiqtrQ0H8eO\nHTt27Njx4Y1ntDIfx1M9XktX+MDeQuiwpWqB67SZCyfnAz9ZVa/pxxcBLwJeCrykqnanW1Z6e1Wd\nluRygKq6qj//FuBK4P7+nNP7/RcCZ1bVpXO8Z8H+vp8HgRO+U/XdYxf0G5UkSZKkRiSpqsr8Zx7Y\nkD2K24EXJTkmSYCXAduADwMb+nM2ADf125uAC5IcneQUYDWwtap2Aw8lWddf56KR10hLwuzfPkqt\nMJtqmflUq8ympsFgS0+r6jNJrgc+CTwKfBr4PeA44MYklwA7gPP687cluZGumHwEuKz23u68DLgO\nOAa4uapuGWrekiRJkjTtBlt6uhhceipJkiRpmi2FpaeSJEmSpCXIQlGaAHsZ1CqzqZaZT7XKbGoa\nWChKkiRJksbYoyhJkiRJy4Q9ipIkSZKkQVgoShNgL4NaZTbVMvOpVplNTQMLRUmSJEnSGHsUJUmS\nJGmZsEdRkiRJkjQIC0VpAuxlUKvMplpmPtUqs6lpYKEoSZIkSRpjj6IkSZIkLRP2KEqSJEmSBmGh\nKE2AvQxqldlUy8ynWmU2NQ0sFCVJkiRJY+xRlCRJkqRlwh5FSZIkSdIgLBSlCbCXQa0ym2qZ+VSr\nzKamgYWiJEmSJGmMPYqSJEmStEzYoyhJkiRJGoSFojQB9jKoVWZTLTOfapXZ1DSwUJQkSZIkjbFH\nUZIkSZKWCXsUJUmSJEmDsFCUJsBeBrXKbKpl5lOtMpuaBhaKkiRJkqQx9ihKkiRJ0jJhj6IkSZIk\naRAWitIE2MugVplNtcx8qlVmU9PAQlGSJEmSNMYeRUmSJElaJuxRlCRJkiQNwkJRmgB7GdQqs6mW\nmU+1ymxqGgxaKCb54SR3jHx9I8nrkxyfZHOSe5LcmmTFyGuuSHJvku1JzhrZf0aSu/pjVw85b0mS\nJEmaZhPrUUxyBLALeCHwS8DfVdV/TfJG4ClVdXmSNcAfAj8KnAR8DFhdVZVkK/C6qtqa5Gbgd6vq\nllnvYY+iJEmSpKm1FHsUXwbcV1UPAOcAG/v9G4FX9NvnAjdU1cNVtQO4D1iX5ATguKra2p93/chr\nJEmSJEkLaJKF4gXADf32yqra02/vAVb22ycCO0des5PuzuLs/bv6/dKSYC+DWmU21TLzqVaZTU2D\noybxJkmOBn4GeOPsY/2y0gVc/3oxsKrfXgGsBdb340ePSLK+qrb081rfz8GxY8eOp3I8o5X5OHY8\nOp7RynwcOx4ZrwVamo/j6R6vpSt8YG8hdNgm0qOY5Fzg31bV2f14O7C+qnanW1Z6e1WdluRygKq6\nqj/vFuBK4P7+nNP7/RcCZ1bVpbPexx5FSZIkSVMrS6xH8UL2LjsF2ARs6Lc3ADeN7L8gydFJTgFW\nA1urajfwUJJ1SQJcNPIaSZIkSdICGrxQTPIEugfZfGhk91XATya5B3hpP6aqtgE3AtuAjwCX1d5b\nnpcB7wLupXsoztgTT6WWzV5GJbXCbKpl5lOtMpuaBoP3KFbVt4Gnzdr3Nbrica7z3wa8bY79nwKe\nO8QcJUmSJEl7TexzFCfBHkVJkiRJ02yp9ShKkiRJkpYIC0VpAuxlUKvMplpmPtUqs6lpYKEoSZIk\nSRpjj6IkSZIkLRP2KEqSJEmSBmGhKE2AvQxqldlUy8ynWmU2NQ0sFCVJkiRJY+xRlCRJkqRlwh5F\nSZIkSdIgLBSlCbCXQa0ym2qZ+VSrzKamgYWiJEmSJGmMPYqSJEmStEzYoyhJkiRJGoSFojQB9jKo\nVWZTLTOfapXZ1DSwUJQkSZIkjbFHUZIkSZKWCXsUJUmSJEmDsFCUJsBeBrXKbKpl5lOtMpuaBhaK\nkiRJkqQx9ihKkiRJ0jJhj6IkSZIkaRAWitIE2MugVplNtcx8qlVmU9PAQlGSJEmSNMYeRUmSJEla\nJuxRlCRJkiQNwkJRmgB7GdQqs6mWmU+1ymxqGlgoSpIkSZLG2KMoSZIkScuEPYqSJEmSpEFYKEoT\nYC+DWmU21TLzqVaZTU0DC0VJkiRJ0hh7FCVJkiRpmbBHUZIkSZI0iMELxSQrknwgyeeSbEuyLsnx\nSTYnuSfJrUlWjJx/RZJ7k2xPctbI/jOS3NUfu3roeUsLyV4GtcpsqmXmU60ym5oGk7ijeDVwc1Wd\nDjwP2A5cDmyuqlOB2/oxSdYA5wNrgLOBa5LM3Da9FrikqlYDq5OcPYG5S5IkSdLUGbRHMcmTgTuq\n6tmz9m8HzqyqPUmeAWypqtOSXAE8WlW/2Z93C/Bm4H7gT/tikyQXAOur6tJZ17VHUZIkSdLUWio9\niqcAX0ny3iSfTvLOJE8AVlbVnv6cPcDKfvtEYOfI63cCJ82xf1e/X5IkSZK0wI6awPVfALyuqv46\nydvpl5nOqKrq7gQulIuBVf32CmAtsL4fP3pEkvVVtQX2ri937Hjo8WgvQwvzcex4diZbmY9jx6Pj\nmX2tzMex45Hx2qp6e0PzcTzd47V0hQ/sLYQO29BLT58BfLyqTunH/wy4Ang28JKq2p3kBOD26pae\nXg5QVVf1598CXEm39PT22rv09EK6pasuPdWSkOz9BYXUErOplplPtcpsqmVZCktPq2o38ECSU/td\nLwPuBj4MbOj3bQBu6rc3ARckOTrJKcBqYGt/nYfSPTE1wEUjr5Ga518mapXZVMvMp1plNjUNhl56\nCvBLwB8kORr4PPAq4EjgxiSXADuA8wCqaluSG4FtwCPAZbX3ludlwHXAMXRPUb1lAnOXJEmSpKkz\n6NLTSXPpqVrlEhW1ymyqZeZTrTKbatmSWHoqSZIkSVp6vKMoSZIkScuEdxQlSZIkSYOwUJQmYPQz\nwaSWmE21zHyqVWZT08BCUZIkSZI0xh5FSZIkSVom7FGUJEmSJA3CQlGaAHsZ1CqzqZaZT7XKbGoa\nWChKkiRJksbYoyhJkiRJy4Q9ipIkSZKkQVgoShNgL4NaZTbVMvOpVplNTQMLRUmSJEnSGHsUJUmS\nJGmZsEdRkiRJkjQIC0VpAuxlUKvMplpmPtUqs6lpYKEoSZIkSRpjj6IkSZIkLRP2KEqSJEmSBmGh\nKE2AvQxqldlUy8ynWmU2NQ0sFCVJkiRJY+xRlCRJkqRlwh5FSZIkSdIgLBSlCbCXQa0ym2qZ+VSr\nzKamgYWiJEmSJGmMPYqSJEmStEzYoyhJkiRJGoSFojQB9jKoVWZTLTOfapXZ1DSwUJQkSZIkjbFH\nUZIkSZKWCXsUJUmSJEmDsFCUJsBeBrXKbKpl5lOtMpuaBhaKkiRJkqQx9ihKkiRJ0jJhj6IkSZIk\naRCDF4pJdiT5myR3JNna7zs+yeYk9yS5NcmKkfOvSHJvku1JzhrZf0aSu/pjVw89b2kh2cugVplN\ntcx8qlVmU9NgEncUC1hfVc+vqhf2+y4HNlfVqcBt/Zgka4DzgTXA2cA1SWZum14LXFJVq4HVSc6e\nwNwlSZIkaepMaunp7DWy5wAb++2NwCv67XOBG6rq4araAdwHrEtyAnBcVW3tz7t+5DVS86pqy2LP\nQZqL2VTLzKdaZTY1DSZ1R/FjST6Z5LX9vpVVtaff3gOs7LdPBHaOvHYncNIc+3f1+yVJkiRJC+yo\nCbzHi6vqS0meDmxOsn30YFVV97TShXIxsKrfXgGsBdb340ePSLJ+5rdAM+vLHTseejzay9DCfBw7\nnp3JVubj2PHoeGZfK/Nx7HhkvLaq3t7QfBxP93gtXeEDewuhwzbRj8dIciXwLeC1dH2Lu9MtK729\nqk5LcjlAVV3Vn38LcCVwf3/O6f3+C4Ezq+rSWdf34zHUpGTvLyiklphNtcx8qlVmUy3LUvh4jCTH\nJjmu334CcBZwF7AJ2NCftgG4qd/eBFyQ5OgkpwCrga1VtRt4KMm6JAEuGnmN1Dz/MlGrzKZaZj7V\nKrOpaTD00tOVwB93tR1HAX9QVbcm+SRwY5JLgB3AeQBVtS3JjcA24BHgstp7y/My4DrgGODmqrpl\n4LlLkiRJ0lSa6NLTobn0VK1yiYpaZTbVMvOpVplNtWxJLD2VJEmSJC093lGUJEmSpGXCO4qSJEmS\npEFYKEoTMPqZYFJLzKZaZj7VKrOpaWChKEmSJEkaM2+PYpLbquqfz7evBfYoSpIkSZpmC9WjuN/P\nUUxyDHAs8PQkx48cehJw0uG+sSRJkiSpTQdaevqLwCeBHwY+NfK1CXjH8FOTlg97GdQqs6mWmU+1\nymxqGuz3jmJVvR14e5LXV9XvTnBOkiRJkqRFdFCfo5jkx4FVjBSWVXX9cNN6bOxRlCRJkjTNBu9R\nHHmj3weeDdwJ/OPIoeYKRUmSJEnS4Zu3UATOANbUwdx6lDSnJOurastiz0OazWyqZeZTrTKbmgYH\nUyh+FjgB+OLAc5mAfzimW556YAtxq1aSJEmSlqqD+RzFLcBaYCvwD/3uqqpzhp3aoZu/R/F49n/8\n+1exUJQkSZK0JE2sRxF48+G+iSRJkiRp6Tiop54uFd5RVKvsZVCrzKZaZj7VKrOplk3yqaffYm91\ndTTwOOBbVfWkw31zSZIkSVJ7DumOYpIjgHOAF1XV5YPN6jHyjqIkSZKkabZQdxQf09LTJHdW1drD\nffOFZqEoSZIkaZpNcunpz40Mj6D7XMXvHO4bS9PEXga1ymyqZeZTrTKbmgYH89TTn2HvbbhHgB3A\nuUNNSJIkSZK0uHzq6b5XcempJEmSpCVpoZaeHnEQb3Rykj9O8pX+64NJnnm4byxJkiRJatO8hSLw\nXmATcGL/9eF+n6SDlGT9Ys9BmovZVMvMp1plNjUNDqZQfHpVvbeqHu6/rgN+cOB5SZIkSZIWycEU\nil9NclGSI5McleSVwN8NPTFpOfHJaGqV2VTLzKdaZTY1DQ6mUHwVcB6wG/gS8PP9PkmSJEnSMnQw\nheJbgF+oqqdX1dPpisQ3DzoraZmxl0GtMptqmflUq8ympsHBFIo/UlUPzgyq6mvAC4abkiRJkiRp\nMR1MoZgkx48MjgeOHG5K0vJjL4NaZTbVMvOpVplNTYOjDuKc3wY+nuRGIHQ9im8ddFaSJEmSpEUz\n7x3Fqroe+Fngy3QPtPmX/T5JB8leBrXKbKpl5lOtMpuaBgdzR5Gquhu4e+C5SJIkSZIakKpa7Dks\nmCQF+/t+HgSOZ//Hv38VqioLOzNJkiRJGl6SWoh65mAeZnNYkhyZ5I4kH+7HxyfZnOSeJLcmWTFy\n7hVJ7k2yPclZI/vPSHJXf+zqoecsSZIkSdNs8EIReAOwjb238i4HNlfVqcBt/Zgka4DzgTXA2cA1\nSWYq4WuBS6pqNbA6ydkTmLe0YOxlUKvMplpmPtUqs6lpMGihmOSZwE8B76J7YirAOcDGfnsj8Ip+\n+1zghqp6uKp2APcB65KcABxXVVv7864feY0kSZIkaYENfUfxd4BfAR4d2beyqvb023uAlf32icDO\nkfN2AifNsX9Xv19aMvy8JbXKbKpl5lOtMpuaBgf11NPHIsnLgS9X1R37uz1fVdU9gGYhXQys6rdX\nAGuB0bffMjLe0v85e9yZmffMfwwcO3bs2LFjx44dO3bsuLHxWrrCB/YWQodtsKeeJnkbcBHwCPB4\n4EnAh4AfBdZX1e50y0pvr6rTklwOUFVX9a+/BbgSuL8/5/R+/4XAmVV16Rzv6VNP1aQk62f+Dy21\nxGyqZeZTrTKballaf+ppVb2pqk6uqlOAC4A/raqLgE3Ahv60DcBN/fYm4IIkRyc5BVgNbK2q3cBD\nSdYlCV3xeROSJEmSpEEMtvR0DjO38q4CbkxyCbADOA+gqrYluZHuCamPAJfV3tudlwHXAccAN1fV\nLROct3TY/K2jWmU21TLzqVaZTU2DwZaeLgaXnkqSJEmaZs0vPZW010zjsdQas6mWmU+1ymxqGlgo\nSpIkSZLGuPR036u49FSSJEnSkuTSU0mSJEnSICwUpQmwl0GtMptqmflUq8ympoGFoiRJkiRpjD2K\n+17FHkVJkiRJS5I9ipIkSZKkQVgoShNgL4NaZTbVMvOpVplNTQMLRUmSJEnSGHsU972KPYqSJEmS\nliR7FCVJkiRJg7BQlCbAXga1ymyqZeZTrTKbmgYWipIkSZKkMfYo7nsVexQlSZIkLUn2KEqSJEmS\nBmGhKE2AvQxqldlUy8ynWmU2NQ0sFCVJkiRJY+xR3Pcq9ihKkiRJWpLsUZQkSZIkDcJCUZoAexnU\nKrOplplPtcpsahpYKEqSJEmSxtijuO9V7FGUJEmStCTZoyhJkiRJGoSFojQB9jKoVWZTLTOfapXZ\n1DSwUJSYwYAtAAAR/0lEQVQkSZIkjbFHcd+r2KMoSZIkaUmyR1GSJEmSNAgLRWkC7GVQq8ymWmY+\n1SqzqWlgoShJkiRJGmOP4r5XsUdRkiRJ0pJkj6IkSZIkaRAWitIE2MugVplNtcx8qlVmU9PAQlGS\nJEmSNMYexX2vYo+iJEmSpCWp+R7FJI9P8okkdybZluQ3+v3HJ9mc5J4ktyZZMfKaK5Lcm2R7krNG\n9p+R5K7+2NVDzVmSJEmSNGChWFXfBV5SVWuB5wEvSfLPgMuBzVV1KnBbPybJGuB8YA1wNnBNkplK\n+FrgkqpaDaxOcvZQ85aGYC+DWmU21TLzqVaZTU2DQXsUq+rv+82jgSPp1n+eA2zs928EXtFvnwvc\nUFUPV9UO4D5gXZITgOOqamt/3vUjr5EkSZIkLbBBC8UkRyS5E9gD3F5VdwMrq2pPf8oeYGW/fSKw\nc+TlO4GT5ti/q98vLRlVtWWx5yDNxWyqZeZTrTKbmgZHDXnxqnoUWJvkycBHk7xk1vHqHkCzkC4G\nVvXbK4C1wPqR41tGxlv6P2ePOzPLCmb+Y+DYsWPHjh07duzYsWPHjY3X0hU+sLcQOmwTe+ppkv8M\nfAd4DbC+qnanW1Z6e1WdluRygKq6qj//FuBK4P7+nNP7/RcCZ1bVpXO8h089VZOSrJ/5P7TUErOp\nlplPtcpsqmVZAk89fVr6J5omOQb4SeAOYBOwoT9tA3BTv70JuCDJ0UlOAVYDW6tqN/BQknVJAlw0\n8hpJkiRJ0gIb7I5ikufSPazmiP7rfVX1fyY5HrgR+CFgB3BeVX29f82bgFcDjwBvqKqP9vvPAK4D\njgFurqrX7+c9vaMoSZIkaWot1B3FiS09nQQLRUmSJEnTrPmlp5L2mmk8llpjNtUy86lWmU1NAwtF\nSZIkSdIYl57uexWXnkqSJElaklx6KkmSJEkahIWiNAH2MqhVZlMtM59qldnUNLBQlCRJkiSNsUdx\n36vYoyhJkiRpSbJHUZIkSZI0CAvFOSSpA30t9vy09NjLoFaZTbXMfKpVZlPT4KjFnkCbDlQLuipV\nkiRJ0vJmj+K+V5nnHHsYJUmSJLXJHkVJkiRJ0iAsFKUJsJdBrTKbapn5VKvMpqaBPYqPwXwPtHFp\nqiRJkqSlzB7Ffa8yzzn2MEqSJElqkz2KkiRJkqRBWChKE2Avg1plNtUy86lWmU1NAwtFSZIkSdIY\nexT3vco859ijKEmSJKlN9ihKkiRJkgZhoShNgL0MapXZVMvMp1plNjUNLBQlSZIkSWPsUdz3KvOc\nY4+iJEmSpDbZoyhJkiRJGoSFojQB9jKoVWZTLTOfapXZ1DSwUJQkSZIkjbFHcd+rzHOOPYqSJEmS\n2mSPoiRJkiRpEBaK0gTYy6BWmU21zHyqVWZT08BCUZIkSZI0xh7Ffa8yzzn2KEqSJElqkz2KkiRJ\nkqRBWChKE2Avg1plNtUy86lWmU1NAwtFSZIkSdKYQQvFJCcnuT3J3Uk+m+T1/f7jk2xOck+SW5Os\nGHnNFUnuTbI9yVkj+89Icld/7Ooh5y0ttKrasthzkOZiNtUy86lWmU1Ng6HvKD4M/Ieq+qfAi4B/\nl+R04HJgc1WdCtzWj0myBjgfWAOcDVyTZKYR81rgkqpaDaxOcvbAc5ckSZKkqTRooVhVu6vqzn77\nW8DngJOAc4CN/WkbgVf02+cCN1TVw1W1A7gPWJfkBOC4qtran3f9yGuk5tnLoFaZTbXMfKpVZlPT\nYGI9iklWAc8HPgGsrKo9/aE9wMp++0Rg58jLdtIVlrP37+r3S5IkSZIW2FGTeJMkTwQ+CLyhqr65\ndzUpVFV1n3+4UC4GVvXbK4C1wPqR41tGxlv6P2ePWZDjM79tmlnH7nh6x1W1paX5OHbs2LFjx44P\nbzyjlfk4nurxWrrCB/YWQoctVQtYo831BsnjgD8BPlJVb+/3bQfWV9XudMtKb6+q05JcDlBVV/Xn\n3QJcCdzfn3N6v/9C4MyqunTWexXs7/t5EDie/R///lXmOedgjh9YLcAHYEqSJEnSbElqIeqNoZ96\nGuDdwLaZIrG3CdjQb28AbhrZf0GSo5OcAqwGtlbVbuChJOv6a1408poG1QG+NI1m//ZRaoXZVMvM\np1plNjUNhl56+mLglcDfJLmj33cFcBVwY5JLgB3AeQBVtS3JjcA24BHgstp7y/My4DrgGODmqrpl\n4LlLkiRJ0lQafOnpJLWz9PTAx116KkmSJGkIS2LpqSRJkiRp6bFQlCbAXga1ymyqZeZTrTKbmgYW\nipIkSZKkMfYo7nuVec6xR1GSJElSm+xRlCRJkiQNwkJRmgB7GdQqs6mWmU+1ymxqGlgoSpIkSZLG\n2KO471XmOcceRUmSJEltskdRkiRJkjQIC8VFkKQO9LXY89PCs5dBrTKbapn5VKvMpqbBUYs9gek0\n39JVSZIkSVo89ijue5V5zrGHUZIkSVKb7FGUJEmSJA3CQlGaAHsZ1CqzqZaZT7XKbGoaWChKkiRJ\nksbYo7jvVeY5xx5FSZIkSW1aqB5Fn3raoPk+IsNCUpIkSdKQXHrapDrAl5YiexnUKrOplplPtcps\nahpYKEqSJEmSxtijuO9V5jln8Y+79FSSJEnSXPwcRUmSJEnSICwUpQmwl0GtMptqmflUq8ympoGF\noiRJkiRpjD2K+15lnnMW/7g9ipIkSZLmYo+iJEmSJGkQFopLUJI60Ndiz0/7spdBrTKbapn5VKvM\npqbBUYs9AT0W8y1dlSRJkqTHzh7Ffa8yzzntH7eHUZIkSZpO9ihKkiRJkgZhoShNgL0MapXZVMvM\np1plNjUNLBQlSZIkSWPsUdz3KvOc0/5xexQlSZKk6WSPoiRJkiRpEBaK0gTYy6BWmU21zHyqVWZT\n02DQQjHJe5LsSXLXyL7jk2xOck+SW5OsGDl2RZJ7k2xPctbI/jOS3NUfu3rIOUuSJEnStBv6juJ7\ngbNn7bsc2FxVpwK39WOSrAHOB9b0r7kmycza2muBS6pqNbA6yexrSk2rqi2LPQdpLmZTLTOfapXZ\n1DQYtFCsqj+ne4rMqHOAjf32RuAV/fa5wA1V9XBV7QDuA9YlOQE4rqq29uddP/IaSZIkSdICW4we\nxZVVtaff3gOs7LdPBHaOnLcTOGmO/bv6/dKSYS+DWmU21TLzqVaZTU2Doxbzzauquo+0WEgXA6v6\n7RXAWmD9yPEtI+Mt/Z+zxyyL4zP/EZtZHuHYsWPHs8czWpmPY8ej4xmtzMex45HxWvp/eDUyH8fT\nPV5LV/jA3kLosA3+OYpJVgEfrqrn9uPtwPqq2p1uWentVXVakssBquqq/rxbgCuB+/tzTu/3Xwic\nWVWXzvFefo6in6MoSZIkTa0s4c9R3ARs6Lc3ADeN7L8gydFJTgFWA1urajfwUJJ1SQJcNPIaSZIk\nSdICG/rjMW4A/hL44SQPJHkVcBXwk0nuAV7aj6mqbcCNwDbgI8Bltfd252XAu4B7gfuq6pYh5y0t\ntNnLqKRWmE21zHyqVWZT02DQHsWqunA/h162n/PfBrxtjv2fAp67gFNb1jJP36dLUyVJkiQdyOA9\nipNkj+LBHbdQlCRJkpanhepRXNSnnmpxeMdRkiRJ0oEsxsNstOjqAF8agr0MapXZVMvMp1plNjUN\nLBQlSZIkSWPsUdz3KvOcs/yPu/RUkiRJWpqW8ucoSpIkSZIaZqEoTYC9DGqV2VTLzKdaZTY1DSwU\nJUmSJElj7FHc9yrznDMNxw/MHkZJkiSpTX6OogZ0eIWkJEmSpKXNpafSBNjLoFaZTbXMfKpVZlPT\nwEJRkiRJkjTGHsV9rzLPOR63R1GSJElqkz2KalJXrB+YhaYkSZLUNpeeagB1gK/pZC+DWmU21TLz\nqVaZTU0D7yjqkB3MXUNJkiRJS5c9ivteZZ5zPG6PoyRJktSmhepRdOmpJEmSJGmMhaI0AfYyqFVm\nUy0zn2qV2dQ0sEdRE+eTUSVJkqS22aO471XmOcfjwx7vzrFQlCRJkg6dPYqSJEmSpEFYKEoTYC+D\nWmU21TLzqVaZTU0DexS15NjjKEmSJA3LHsV9rzLPOR6fRI/i/PysRkmSJGm2hepR9I6iGjVfsXlg\n8911tJCUJEmS9s8eRS1TdYCvybOXQa0ym2qZ+VSrzKamgYWiJEmSJGmMPYr7XmWeczw+mR7FYedw\noKWnPixHkiRJS5U9itKgDq9HUpIkSVrKXHqqqZSk9vd1uK+f6xr2MqhVZlMtM59qldnUNPCOoqbU\n4d4x9I6jJEmSli97FPe9yjzneHw59CguZg+kJEmSNBR7FKWGHewS1v2x0JQkSdJiskdRGsTsz268\nfWR7ruPjn/N4qD2Q0mNln41aZj7VKrOpabCkCsUkZyfZnuTeJG9c7PlIB+/OQzz/8ArJhfhayO9e\nTVu72BOQDsB8qlVmU8vekll6muRI4B3Ay4BdwF8n2VRVn1vcmUkH4+sLfL3h+0RdPjs1Viz2BKQD\nMJ9qldnUsrdkCkXghcB9VbUDIMn7gXMBC0VpEIdXjA5ZaB7MtS1UJUmSHrulVCieBDwwMt4JrNv3\ntJd+Y+6Xfy/AkxZ+WtLB2LHYE1gEQxeawxaqS8ECFdOrDvP10pBWLfYEpP1YtdgTkIa2lArFg/xH\n3+1PPvDxg/l3zXzneHzY4y3MYYjjGxf5/Q/leCtzGPL1S9/hFsMzr0+yYTHeXzoYjzWf0tDMppa7\npVQo7gJOHhmfTHdX8fv87bYkSZIkHb6l9NTTTwKrk6xKcjRwPrBpkeckSZIkScvOkrmjWFWPJHkd\n8FHgSODdPvFUkiRJkhZeqmwxkSRJkiTttZSWnu5XkrOTbE9yb5I3LvZ8NF2SnJzk9iR3J/lsktf3\n+49PsjnJPUluTbJi5DVX9HndnuSsxZu9pkGSI5PckeTD/dhsqglJViT5QJLPJdmWZJ35VAv6rN2d\n5K4kf5jkB8ymFkuS9yTZk+SukX2HnMckZ/SZvjfJ1fO975IvFJMcCbwDOBtYA1yY5PTFnZWmzMPA\nf6iqfwq8CPh3fQYvBzZX1anAbf2YJGvoemzX0OX2miRL/v+LatobgG3sfXq02VQrrgZurqrTgecB\n2zGfWmRJVgGvBV5QVc+la3m6ALOpxfNeumyNOpQ8zjzw81rgkqpaTffsl9nXHLMcQvxC4L6q2lFV\nDwPvB85d5DlpilTV7qq6s9/+FvA5us/9PIe9n4mxEXhFv30ucENVPVxVO4D76HIsLbgkzwR+CngX\nez9TxGxq0SV5MvATVfUe6J5FUFXfwHxq8T1E90vgY5McBRwLfBGzqUVSVX8OPDhr96HkcV2SE4Dj\nqmprf971I6+Z03IoFE8CHhgZ7+z3SRPX/xby+cAngJVVtac/tAdY2W+fyPhHu5hZDel3gF8BHh3Z\nZzbVglOAryR5b5JPJ3lnkidgPrXIquprwG8DX6ArEL9eVZsxm2rLoeZx9v5dzJPT5VAo+jQeNSHJ\nE4EPAm+oqm+OHqvuqVEHyqo51oJL8nLgy1V1B3vvJo4xm1pERwEvAK6pqhcA36ZfOjXDfGoxJHkO\n8O+BVXT/uH5ikleOnmM21ZKDyONjshwKxV3AySPjkxmvlqXBJXkcXZH4vqq6qd+9J8kz+uMnAF/u\n98/O7DP7fdJC+3HgnCR/C9wAvDTJ+zCbasNOYGdV/XU//gBd4bjbfGqR/c/AX1bVV6vqEeBDwI9h\nNtWWQ/m7fGe//5mz9h8wp8uhUPwkXTPmqiRH0zVvblrkOWmK9A3C7wa2VdXbRw5tAjb02xuAm0b2\nX5Dk6CSnAKuBrUgLrKreVFUnV9UpdA9i+NOqugizqQZU1W7ggSSn9rteBtwNfBjzqcW1HXhRkmP6\nv+NfRvdAMLOplhzS3+X9f3Mf6p8uHeCikdfM6ahh5j05VfVIktcBH6V7KtW7q+pzizwtTZcXA68E\n/ibJHf2+K4CrgBuTXALsAM4DqKptSW6k+0vnEeCy8gNNNRkzOTObasUvAX/Q/6L388Cr6P4uN59a\nNFX1mSTX092MeBT4NPB7wHGYTS2CJDcAZwJPS/IA8Ks8tr/LLwOuA46he+L0LQd8X3MsSZIkSRq1\nHJaeSpIkSZIWkIWiJEmSJGmMhaIkSZIkaYyFoiRJkiRpjIWiJEmSJGmMhaIkSZIkaYyFoiRJkiRp\nzP8P5ttH63UFZngAAAAASUVORK5CYII=\n", "text/plain": "<matplotlib.figure.Figure at 0x7f125b227c10>"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 34, "cell_type": "code", "source": "", "outputs": [{"execution_count": 34, "output_type": "execute_result", "data": {"text/plain": "u'aurora'"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 36, "cell_type": "code", "source": "historiesByChannel = historiesTrimmed.map(lambda h:h[0][\"application\"][\"channel\"]) \\\n .countByValue()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 43, "cell_type": "code", "source": "sorted(historiesByChannel.items(), key = lambda x:x[1], reverse=True)", "outputs": [{"execution_count": 43, "output_type": "execute_result", "data": {"text/plain": "[(u'beta', 24232),\n (u'release', 5186),\n (u'aurora', 1283),\n (u'nightly', 605),\n (u'release-cck-euballot', 98),\n (u'release-cck-yandex', 59),\n (u'release-cck-mozillaonline', 44),\n (u'release-cck-yahoo', 22),\n (u'default', 17),\n (u'release-cck-mozilla14', 17),\n (u'release-cck-webde', 13),\n (u'release-cck-aol', 8),\n (u'release-cck-seznam', 7),\n (u'release-cck-rambler', 7),\n (u'release-cck-yahoode', 6),\n (u'release-cck-mozilla26', 6),\n (u'release-cck-yahooid', 5),\n (u'beta-cck-yandex', 5),\n (u'release-cck-yahootw', 4),\n (u'release-cck-gmx', 4),\n (u'beta-cck-euballot', 4),\n (u'release-cck-mozilla12', 4),\n (u'beta-cck-mozilla19', 3),\n (u'release-cck-mozilla11', 3),\n (u'release-cck-mozillataiwan', 2),\n (u'release-cck-yahoovn', 2),\n (u'release-cck-tonline', 2),\n (u'release-cck-mozilla32', 2),\n (u'release-cck-1und1', 1),\n (u'release-cck-mozilla24', 1),\n (u'release-cck-mozilla28', 1),\n (u'release-cck-yahooph', 1),\n (u'beta-cck-yahooid', 1),\n (u'release-cck-opensuse', 1),\n (u'beta-cck-mozillaonline', 1),\n (u'beta-cck-yahoo', 1),\n (u'release-cck-yahoohk', 1),\n (u'release-cck-nttresonant', 1),\n (u'release-cck-mailru', 1),\n (u'release-cck-mozilla34', 1),\n (u'release-cck-mozilla13', 1),\n (u'release-cck-mozilla17', 1),\n (u'release-cck-yahoofr', 1)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 45, "cell_type": "code", "source": "def channelSwitched(h):\n return len(set([p[\"application\"][\"channel\"] for p in h]))>1", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 46, "cell_type": "code", "source": "channelSwitchers = historiesTrimmed.map(channelSwitched) \\\n .countByValue()\nchannelSwitchers.items()", "outputs": [{"execution_count": 46, "output_type": "execute_result", "data": {"text/plain": "[(False, 31627), (True, 38)]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Output to S3 as sequence file", "cell_type": "markdown", "metadata": {}}, {"execution_count": 24, "cell_type": "code", "source": "outBucketName = \"net-mozaws-prod-us-west-2-pipeline-analysis\"\npathToOutput = \"bcolloran/clientHistoriesTrimmed/2015-07-08/\"\n\nhistoriesTrimmed.map(lambda x: (str(x[0][\"clientId\"]),json.dumps(x)) )\\\n .saveAsSequenceFile( \"s3n://\"+outBucketName +\"/\"+ pathToOutput )", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"source": "#Check info about exported data", "cell_type": "markdown", "metadata": {}}, {"execution_count": 25, "cell_type": "code", "source": "import boto\nconn = boto.connect_s3()", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 27, "cell_type": "code", "source": "mozBucket = conn.get_bucket('net-mozaws-prod-us-west-2-pipeline-analysis') # Substitute in your bucket name\nbl = mozBucket.list(prefix=pathToOutput)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 29, "cell_type": "code", "source": "print \"data size (G):\", sum(key.size for key in bl)/(1.0*10**9)\nlist(bl)[-5:]", "outputs": [{"output_type": "stream", "name": "stdout", "text": "data size (G): 11.536239693\n"}, {"execution_count": 29, "output_type": "execute_result", "data": {"text/plain": "[<Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/clientHistoriesTrimmed/2015-07-08/part-00495>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/clientHistoriesTrimmed/2015-07-08/part-00496>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/clientHistoriesTrimmed/2015-07-08/part-00497>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/clientHistoriesTrimmed/2015-07-08/part-00498>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/clientHistoriesTrimmed/2015-07-08/part-00499>]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment