Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save bcolloran/880128483012a40771e5 to your computer and use it in GitHub Desktop.
Save bcolloran/880128483012a40771e5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{"nbformat_minor": 0, "cells": [{"execution_count": 1, "cell_type": "code", "source": "import ujson as json\nimport matplotlib.pyplot as plt\nimport pandas as pd\nimport numpy as np\nimport plotly.plotly as py\nimport networkx as nx\nimport collections\nimport datetime\n\nfrom moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client\n\n# %pylab inline\n\n", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 2, "cell_type": "code", "source": "sc.defaultParallelism", "outputs": [{"execution_count": 2, "output_type": "execute_result", "data": {"text/plain": "64"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"source": "#Get pings, filter them, and do some basic checks", "cell_type": "markdown", "metadata": {}}, {"execution_count": 3, "cell_type": "code", "source": "v4Pings = get_pings(sc, app=\"Firefox\",\n channel=\"nightly\",\n submission_date=(\"20150515\",\"20150610\"),\n build_id=(\"20150507000000\", \"99990507000000\"),\n fraction=1,\n doc_type=\"main\",\n schema=\"v4\")\\\n .filter(lambda p: p.get(\"payload\",{}).get(\"info\",{}).get(\"reason\", \"idle-daily\")!=\"idle-daily\")", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 4, "cell_type": "code", "source": "# p = pings.first()", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 5, "cell_type": "code", "source": "# p[\"payload\"][\"info\"]\n# p.keys()\n# p.get(\"payload\",{}).get(\"info\",{}).get(\"subsessionId\",False)\n# {k:p[k] for k in p.keys() if k!=\"main\"} ", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "# info = get_pings_properties(v4Pings, [\"id\",\n# \"clientId\",\n# \"type\",\n# \"payload/info\",\n# \"environment/build/buildId\"])", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 11, "cell_type": "code", "source": "# #note that we have to filter out the 'meta' entry, b/c this can contain things like intake timestamps,\n# #which should be expected to change if the same ping is sent twice\n\n# pingsByPingId = pings \\\n# .map(lambda p: (p.get(\"id\",\"MISSING\"),\n# [{k:p[k] for k in p.keys() if k!=\"meta\"}]) ) \\\n# .reduceByKey(lambda l1,l2: l1+l2)\n# pingsByPingId.cache()", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"source": "# Group pings by clientId \nGet the \"payload/info\" section recent pings from builds newer than 20150507000000, and which are not idle-daily pings, and group by clientId", "cell_type": "markdown", "metadata": {}}, {"execution_count": 7, "cell_type": "code", "source": "info = get_pings_properties(v4Pings, [\"id\",\n \"clientId\",\n \"type\",\n \"payload/info\",\n \"environment/build/buildId\"])\n\n# subsess = info.filter(\n# lambda p: ((p[\"payload/info\"].get(\"reason\", \"idle-daily\") != \"idle-daily\") and\n# (p[\"type\"] == \"main\") and\n# (p[\"environment/build/buildId\"]>\"20150507000000\")) )\n\n# subsess.cache()\n# numPings = subsess.count()\n\n# numPings", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 8, "cell_type": "code", "source": "clients = info.map(lambda p: (p.get(\"clientId\",\"noId\"),\n [p[\"payload/info\"]]) ) \\\n .reduceByKey(lambda l1,l2: l1+l2)", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"source": "### Remove duplicate pings for each client", "cell_type": "markdown", "metadata": {}}, {"execution_count": 9, "cell_type": "code", "source": "def dropDupeSubsessions(subsessList):\n subsessIdsAdded = []\n subsessListOut = []\n for s in subsessList:\n if s['subsessionId'] not in subsessIdsAdded:\n subsessIdsAdded.append(s['subsessionId'])\n subsessListOut.append(s)\n return subsessListOut\n \n# def dropDupePings(pingListIn):\n# pingsAdded = []\n# pingListOut = []\n# for s in pingListIn:\n# if s['id'] not in pingsAdded:\n# pingsAdded.append(s['id'])\n# pingListOut.append(s)\n# return pingListOut", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 10, "cell_type": "code", "source": "cleanedClients = clients \\\n .map(lambda id_sesslist: (id_sesslist[0],dropDupeSubsessions(id_sesslist[1])) )\n\n# cleanedClients.cache()\n# numClients = cleanedClients.count()\n# numClients", "outputs": [], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 11, "cell_type": "code", "source": "# cc = cleanedClients.first()", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 13, "cell_type": "code", "source": "# str(cc[0]),json.dumps(cc[1])", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"execution_count": 15, "cell_type": "code", "source": "cleanedClients.getNumPartitions()", "outputs": [{"execution_count": 15, "output_type": "execute_result", "data": {"text/plain": "56154"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 20, "cell_type": "code", "source": "clientInfosForOutput = cleanedClients.repartition(500).map(lambda cc: (str(cc[0]),json.dumps(cc[1])) ).cache()", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 21, "cell_type": "code", "source": "clientInfosForOutput.getNumPartitions()", "outputs": [{"execution_count": 21, "output_type": "execute_result", "data": {"text/plain": "500"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": 22, "cell_type": "code", "source": "clientInfosForOutput.first()", "outputs": [{"execution_count": 22, "output_type": "execute_result", "data": {"text/plain": "('45071d55-8db7-4a67-ad44-da38a16cbe7f',\n '[{\"subsessionStartDate\":\"2015-06-02T00:00:00.0+02:00\",\"profileSubsessionCounter\":2,\"subsessionId\":\"956b3242-7aa7-4126-9dbd-76a080079260\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"previousSessionId\":\"41faca2a-f529-4b07-a378-4bf1ed10b570\",\"sessionId\":\"d3f77091-699c-4dc5-8677-99d5d2ebcd6b\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"previousBuildId\":\"20150528030206\",\"subsessionLength\":27469,\"previousSubsessionId\":\"df3b2fb8-ace1-46b7-be5c-22a888879b17\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-06-02T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/f8d21278244b\"},{\"previousBuildId\":\"20150511122605\",\"profileSubsessionCounter\":7,\"subsessionId\":\"bd87ef08-25e4-4fd5-98cc-9f13090d2edf\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"420ea5ea-607a-4094-a87f-be53500ba533\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-14T00:00:00.0+02:00\",\"subsessionLength\":28490,\"previousSubsessionId\":\"a3582a12-7bdd-40e5-b590-382965697fbc\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-14T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/8b64c75b0b86\"},{\"previousBuildId\":\"20150513030209\",\"profileSubsessionCounter\":9,\"subsessionId\":\"cc1d6ad1-c937-4717-9b21-8040637cb0c1\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"d25eeba2-8b0e-47fb-a1a4-939d25bbb42c\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-18T00:00:00.0+02:00\",\"subsessionLength\":27320,\"previousSubsessionId\":\"5892c581-71fb-446e-b31c-7fdbf54584f8\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-18T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/0f1d42a6745a\"},{\"previousBuildId\":\"20150519030202\",\"profileSubsessionCounter\":13,\"subsessionId\":\"2ce202a1-7f1b-48d0-b69d-ab54231cde20\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"ccc79523-57ff-48f3-98c8-ad061a236c5a\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-25T00:00:00.0+02:00\",\"subsessionLength\":29976,\"previousSubsessionId\":\"e1a58aac-fa0b-428c-8b40-eaba65a80a1e\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-25T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/b9424d63fe35\"},{\"profileSubsessionCounter\":19,\"subsessionId\":\"35c170ba-b846-4998-8549-6c471db422e1\",\"sessionId\":\"687cb4e3-b253-479a-9382-1ef4832ee15b\",\"flashVersion\":\"17.0.0.169\",\"reason\":\"shutdown\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-28T00:00:00.0+02:00\",\"subsessionLength\":27889,\"previousSubsessionId\":\"f735ebb1-b6e9-49d4-b313-841da7921932\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-28T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/c6bbf8f1b02b\"},{\"previousBuildId\":\"20150527030204\",\"profileSubsessionCounter\":1,\"subsessionId\":\"df3b2fb8-ace1-46b7-be5c-22a888879b17\",\"sessionId\":\"41faca2a-f529-4b07-a378-4bf1ed10b570\",\"flashVersion\":\"17.0.0.169\",\"reason\":\"shutdown\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-06-01T00:00:00.0+02:00\",\"subsessionLength\":27074,\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-06-01T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/baa9c64fea6f\"},{\"previousBuildId\":\"20150526030202\",\"profileSubsessionCounter\":20,\"subsessionId\":\"8480e22d-3f5d-40f6-9df9-c19b9f59e53a\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"f1f06658-68de-4971-bf9e-bb703c79ba56\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-29T00:00:00.0+02:00\",\"subsessionLength\":27416,\"previousSubsessionId\":\"35c170ba-b846-4998-8549-6c471db422e1\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-29T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/ff2e07228041\"},{\"previousBuildId\":\"20150512030215\",\"profileSubsessionCounter\":8,\"subsessionId\":\"5892c581-71fb-446e-b31c-7fdbf54584f8\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"d39470a7-ca33-4f3a-bdd9-a5675c4bdea5\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-15T00:00:00.0+02:00\",\"subsessionLength\":24083,\"previousSubsessionId\":\"bd87ef08-25e4-4fd5-98cc-9f13090d2edf\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-15T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/62d9b117c688\"},{\"previousBuildId\":\"20150524030234\",\"profileSubsessionCounter\":15,\"subsessionId\":\"1d09d4b9-d3ef-4de2-9d45-2fc3f8417a04\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"27179bfa-d794-443f-9094-6d05d4fb5456\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-26T00:00:00.0+02:00\",\"subsessionLength\":19830,\"previousSubsessionId\":\"f7f6acd9-7d7c-4ace-9df7-1382c5c0a6eb\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-26T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/b6623a27fa64\"},{\"subsessionStartDate\":\"2015-06-03T00:00:00.0+02:00\",\"profileSubsessionCounter\":3,\"subsessionId\":\"0581e1cc-5e3c-4073-a8c0-5ba1183cc047\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"previousSessionId\":\"d3f77091-699c-4dc5-8677-99d5d2ebcd6b\",\"sessionId\":\"9172be13-03ce-49dd-9b2e-87adc26adafe\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"previousBuildId\":\"20150531030228\",\"subsessionLength\":3059,\"previousSubsessionId\":\"956b3242-7aa7-4126-9dbd-76a080079260\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-06-03T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/666b584fb521\"},{\"previousBuildId\":\"20150514111810\",\"profileSubsessionCounter\":10,\"subsessionId\":\"2169e5e9-83e2-4015-a335-c9f0b1933fa8\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"de2a2182-5e7b-4d04-87c9-3d01d2576e19\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-19T00:00:00.0+02:00\",\"subsessionLength\":26113,\"previousSubsessionId\":\"cc1d6ad1-c937-4717-9b21-8040637cb0c1\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-19T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/2f6ea66057fe\"},{\"previousBuildId\":\"20150517030204\",\"profileSubsessionCounter\":11,\"subsessionId\":\"2f76fa15-fb8d-4262-9462-5d1f50f92dff\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"8246fa2a-8535-4ee2-970d-3649545f6ab0\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-20T00:00:00.0+02:00\",\"subsessionLength\":27417,\"previousSubsessionId\":\"2169e5e9-83e2-4015-a335-c9f0b1933fa8\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-20T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/35918b0441b4\"},{\"previousBuildId\":\"20150518030202\",\"profileSubsessionCounter\":12,\"subsessionId\":\"e1a58aac-fa0b-428c-8b40-eaba65a80a1e\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"4e358059-1a7c-40fb-bb7a-1f972d373bb4\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-22T00:00:00.0+02:00\",\"subsessionLength\":27428,\"previousSubsessionId\":\"2f76fa15-fb8d-4262-9462-5d1f50f92dff\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-22T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/4fb7ff694bf5\"},{\"previousBuildId\":\"20150521030204\",\"profileSubsessionCounter\":14,\"subsessionId\":\"f7f6acd9-7d7c-4ace-9df7-1382c5c0a6eb\",\"reason\":\"aborted-session\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"0fdadf03-a2a7-4558-adb5-1a8259b279c2\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-26T00:00:00.0+02:00\",\"subsessionLength\":7054,\"previousSubsessionId\":\"2ce202a1-7f1b-48d0-b69d-ab54231cde20\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-26T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/d44425c6730c\"},{\"previousBuildId\":\"20150525030205\",\"profileSubsessionCounter\":16,\"subsessionId\":\"a8efb713-08b5-4cb2-adc1-083c61e444d5\",\"reason\":\"shutdown\",\"flashVersion\":\"17.0.0.169\",\"sessionId\":\"c6210cd7-e4d1-4ff2-a2ee-d4ec02034474\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-27T00:00:00.0+02:00\",\"subsessionLength\":27036,\"previousSubsessionId\":\"1d09d4b9-d3ef-4de2-9d45-2fc3f8417a04\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-27T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/c6bbf8f1b02b\"},{\"profileSubsessionCounter\":17,\"subsessionId\":\"0ce9c3d3-26e8-4208-98af-98d8c29bcf49\",\"sessionId\":\"b3726943-89e6-4a34-ab67-820024d94cf4\",\"flashVersion\":\"17.0.0.169\",\"reason\":\"shutdown\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-27T00:00:00.0+02:00\",\"subsessionLength\":895,\"previousSubsessionId\":\"a8efb713-08b5-4cb2-adc1-083c61e444d5\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-27T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/c6bbf8f1b02b\"},{\"profileSubsessionCounter\":18,\"subsessionId\":\"f735ebb1-b6e9-49d4-b313-841da7921932\",\"sessionId\":\"7f42466a-d020-42ab-b8bf-a73c9b082cd7\",\"flashVersion\":\"17.0.0.169\",\"reason\":\"shutdown\",\"subsessionCounter\":1,\"asyncPluginInit\":false,\"subsessionStartDate\":\"2015-05-27T00:00:00.0+02:00\",\"subsessionLength\":277,\"previousSubsessionId\":\"0ce9c3d3-26e8-4208-98af-98d8c29bcf49\",\"addons\":\"%7B972ce4c6-7e08-4474-a285-3208198ce6fd%7D:41.0a1\",\"sessionStartDate\":\"2015-05-27T00:00:00.0+02:00\",\"timezoneOffset\":120,\"revision\":\"https:\\\\/\\\\/hg.mozilla.org\\\\/mozilla-central\\\\/rev\\\\/c6bbf8f1b02b\"}]')"}, "metadata": {}}], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"execution_count": 24, "cell_type": "code", "source": "# pathToOutput = \"s3n://net-mozaws-prod-us-west-2-pipeline-analysis/bcolloran/2015-16-03/payloadInfoSectionPerClient-cleaned\"\noutBucketName = \"net-mozaws-prod-us-west-2-pipeline-analysis\"\npathToOutput = \"/bcolloran/infoSectionPerClient/nightly/2015-06-10/\"\n\nclientInfosForOutput.saveAsSequenceFile( \"s3n://\"+outBucketName+pathToOutput )", "outputs": [], "metadata": {"scrolled": true, "collapsed": false, "trusted": true}}, {"execution_count": 25, "cell_type": "code", "source": "import boto\nconn = boto.connect_s3()", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 27, "cell_type": "code", "source": "mozBucket = conn.get_bucket('net-mozaws-prod-us-west-2-pipeline-analysis') # Substitute in your bucket name\nbl = mozBucket.list(prefix=\"bcolloran/infoSectionPerClient/nightly/2015-06-10/\")\nprint \"data size:\", sum(key.size for key in bl)\nlist(bl)[-5:]", "outputs": [{"output_type": "stream", "name": "stdout", "text": "data size: 2843548569 2\n"}, {"execution_count": 27, "output_type": "execute_result", "data": {"text/plain": "[<Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/infoSectionPerClient/nightly/2015-06-10/part-00495>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/infoSectionPerClient/nightly/2015-06-10/part-00496>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/infoSectionPerClient/nightly/2015-06-10/part-00497>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/infoSectionPerClient/nightly/2015-06-10/part-00498>,\n <Key: net-mozaws-prod-us-west-2-pipeline-analysis,bcolloran/infoSectionPerClient/nightly/2015-06-10/part-00499>]"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}, {"execution_count": null, "cell_type": "code", "source": "", "outputs": [], "metadata": {"collapsed": true, "trusted": true}}, {"execution_count": 28, "cell_type": "code", "source": "clientInfosForOutput.count()", "outputs": [{"execution_count": 28, "output_type": "execute_result", "data": {"text/plain": "123930"}, "metadata": {}}], "metadata": {"collapsed": false, "trusted": true}}], "nbformat": 4, "metadata": {"kernelspec": {"display_name": "Python 2", "name": "python2", "language": "python"}, "language_info": {"mimetype": "text/x-python", "nbconvert_exporter": "python", "version": "2.7.9", "name": "python", "file_extension": ".py", "pygments_lexer": "ipython2", "codemirror_mode": {"version": 2, "name": "ipython"}}}}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment