Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dexterp37/e5176ac397b5ea37ad17 to your computer and use it in GitHub Desktop.
Save Dexterp37/e5176ac397b5ea37ad17 to your computer and use it in GitHub Desktop.
Verify the core pings received from Fennec
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bug 1247605 - Validation of the Fennec beta \"core\" ping submissions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Validate \"core\" pings sent by Firefox for Android to make sure the data they contain makes sense."
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"import datetime as dt\n",
"from uuid import UUID\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n",
"\n",
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"submission_dates = (\"20160202\", \"20160217\")\n",
"core_pings = get_pings(sc,\n",
" app=\"Fennec\",\n",
" channel=\"beta\",\n",
" doc_type=\"core\",\n",
" source_version=\"1\",\n",
" submission_date=submission_dates,\n",
" fraction=1.0)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4154305"
]
},
"execution_count": 102,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings_count = core_pings.count()\n",
"pings_count"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many different clients are we seeing?"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"194318"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"one_per_client = get_one_ping_per_client(core_pings)\n",
"num_clients = one_per_client.count()\n",
"num_clients"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Are we seeing docId dupes?"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"488"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_ids = core_pings.map(lambda p: p[\"meta\"][\"documentId\"])\n",
"dupes = pings_count - doc_ids.distinct().count()\n",
"dupes"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dupes_docids = doc_ids.map(lambda d: (d, 1)).reduceByKey(lambda x, y: x + y).filter(lambda x: x[1] > 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get the list of duplicated document Ids."
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"duplicated_docIds = dupes_docids.map(lambda x: x[0]).collect()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get the duplicated core pings and group them by documentId."
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"duplicated_core_pings = core_pings.filter(lambda p: p[\"meta\"][\"documentId\"] in duplicated_docIds)\n",
"grouped_dupes = duplicated_core_pings.map(lambda p: (p[\"meta\"][\"documentId\"], p)).groupByKey()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Do the dupes have the same docId+clientId+sequence number?"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def check_same_ids(duped_pings):\n",
" doc_id = duped_pings[0]\n",
" ping_list = list(duped_pings[1])\n",
" num_pings = len(ping_list)\n",
" \n",
" if num_pings < 2:\n",
" # That shouldn't really be happening here. We need at least 2 pings for \"duplicates\" to\n",
" # be meaningful.\n",
" return (doc_id, False, num_pings)\n",
"\n",
" for i in range(1, len(ping_list)):\n",
" prev_ping = ping_list[i - 1]\n",
" curr_ping = ping_list[i]\n",
" \n",
" if prev_ping[\"meta\"][\"documentId\"] != curr_ping[\"meta\"][\"documentId\"] or \\\n",
" prev_ping[\"clientId\"] != curr_ping[\"clientId\"] or \\\n",
" prev_ping[\"seq\"] != curr_ping[\"seq\"]:\n",
" return (doc_id, False, num_pings)\n",
" \n",
" return (doc_id, True, num_pings)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Map each group of pings (grouped by document Id) to True (they have the same clientId, documentId and sequence number) or False (any of the previous is different)."
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"same_ids = grouped_dupes.map(check_same_ids)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sanity check: do we have *any* \"duplicated\" group with less than 2 pings? That would be odd."
]
},
{
"cell_type": "code",
"execution_count": 110,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"same_ids.filter(lambda x: x[2] < 2).count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many pings have the same documentId but different clientId/sequence number?"
]
},
{
"cell_type": "code",
"execution_count": 134,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"22"
]
},
"execution_count": 134,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dupes_not_matching_ids = same_ids.filter(lambda x: not x[1])\n",
"dupes_not_matching_ids.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Are the pings respecting our desired schema?"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 112,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def is_valid_core_ping(p):\n",
" # That's a sort-of schema to validate the required fields and their types.\n",
" req_fields = {\n",
" \"v\": int,\n",
" \"clientId\": unicode,\n",
" \"seq\": int,\n",
" \"locale\": unicode,\n",
" \"os\": unicode,\n",
" \"osversion\": unicode,\n",
" \"device\": unicode,\n",
" \"arch\": unicode,\n",
" }\n",
" \n",
" opt_fields = {\n",
" \"experiments\": list,\n",
" }\n",
" \n",
" # Does the ping contain all the required top-level fields?\n",
" if not all([f in p for f in req_fields.keys()]):\n",
" return False\n",
" \n",
" # Do they have the expected type?\n",
" if not all([type(p[f]) == req_fields[f] for f in req_fields.keys()]):\n",
" return False\n",
" \n",
" # Does it contain any optional field? If so, make sure it has the correct type. \n",
" for f in opt_fields.keys():\n",
" if f in p:\n",
" if type(p[f]) != opt_fields[f]:\n",
" return False\n",
" \n",
" # Perform some additional sanity checks.\n",
" if p[\"v\"] < 1 or p[\"seq\"] < 0:\n",
" return False\n",
" \n",
" # Validate the clientId\n",
" try:\n",
" UUID(p[\"clientId\"], version=4)\n",
" except ValueError:\n",
" return False\n",
" \n",
" return True\n",
"\n",
"invalid_core_pings = core_pings.filter(lambda p: not is_valid_core_ping(p))\n",
"invalid_core_pings.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's get serious (tm). Group pings per client to verify sequencing, etc."
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def get_ping_info(p):\n",
" return {\n",
" \"clientId\": p[\"clientId\"],\n",
" \"seq\": p[\"seq\"],\n",
" \"v\": p[\"v\"],\n",
" \"Timestamp\": p[\"meta\"][\"Timestamp\"],\n",
" \"submissionDate\": p[\"meta\"][\"submissionDate\"]\n",
" }\n",
"\n",
"def dedupe_and_sort(group):\n",
" key, history = group\n",
" \n",
" seen = set()\n",
" result = []\n",
" \n",
" for fragment in history:\n",
" id = fragment[\"meta\"][\"documentId\"]\n",
" if id in seen:\n",
" continue\n",
" \n",
" seen.add(id)\n",
" result.append(get_ping_info(fragment))\n",
" \n",
" result.sort(key=lambda p: p[\"seq\"])\n",
" return result\n",
"\n",
"grouped = core_pings.groupBy(lambda x: x[\"clientId\"]).map(dedupe_and_sort)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"test_groups = grouped.collect()"
]
},
{
"cell_type": "code",
"execution_count": 115,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def verify_ordering(groups):\n",
" errors_per_client = {}\n",
"\n",
" for history in groups:\n",
" errors = 0\n",
" for i in range(1, len(history)):\n",
" prev_ping = history[i - 1]\n",
" curr_ping = history[i]\n",
"\n",
" # We expect the pings to be sorted by sequence id. Just make sure\n",
" # the timestamps increase\n",
" if prev_ping[\"Timestamp\"] > curr_ping[\"Timestamp\"]:\n",
" errors = errors + 1\n",
" \n",
" prev_submission_date = dt.datetime.strptime(prev_ping[\"submissionDate\"], \"%Y%m%d\")\n",
" curr_submission_date = dt.datetime.strptime(curr_ping[\"submissionDate\"], \"%Y%m%d\")\n",
" \n",
" if prev_submission_date > curr_submission_date:\n",
" errors = errors + 1\n",
" \n",
" if errors > 0:\n",
" errors_per_client[curr_ping[\"clientId\"]] = errors\n",
" \n",
" return errors_per_client\n",
" \n",
"\n",
"ordering_errors = verify_ordering(test_groups)"
]
},
{
"cell_type": "code",
"execution_count": 116,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"210"
]
},
"execution_count": 116,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(ordering_errors)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### What's the average size of a core ping (bytes)?"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"core_sizes = core_pings.map(lambda p: len(json.dumps(p)))"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sizes_series = pd.Series(core_sizes.collect())"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 4154305.000000\n",
"mean 679.080131\n",
"std 8.519859\n",
"min 641.000000\n",
"25% 673.000000\n",
"50% 678.000000\n",
"75% 682.000000\n",
"max 812.000000\n",
"dtype: float64"
]
},
"execution_count": 119,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sizes_series.describe()"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fa12a5fc7d0>"
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaQAAAEACAYAAAD8wQLNAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGdRJREFUeJzt3X+wXGWd5/H3ByIIyhKDLvIjSGYnWGaKHQEFd3TWa6EI\nU7PAuI7GHSWOqR1LXFFrdkdgaiUprdlBCx2tKXBqRQzMyg/BnzUICeod3aqFID8URQaYISMJEN1g\n8MdYbiLf/aOfG9qbm9CE2/ccuO9XVdc9/Zxznueb7pP7uef0092pKiRJ6tpeXRcgSRIYSJKknjCQ\nJEm9YCBJknrBQJIk9YKBJEnqhbEFUpLFSb6e5HtJvpvkrNa+KsnGJLe12ylD+5yT5J4kdyU5aaj9\nuCR3tHUfG2rfN8mVrf3GJC8YWrciyd3tdsZQ+5IkN7V9rkjyjHE9BpKk0Y3zDGkb8N6q+i3gZcA7\nk7wIKOAjVXVMu30FIMky4I3AMuBk4MIkaX1dBKysqqXA0iQnt/aVwJbW/lHg/NbXIuD9wPHtdl6S\nA9s+5wMXtH1+3PqQJHVsbIFUVQ9V1e1t+WfA94HD2urMsMtpwOVVta2qNgD3AickOQQ4oKrWt+0u\nBU5vy6cCa9ryNcCJbfm1wNqq2lpVW4F1wCkt4F4FXN22WzPUlySpQ3PyGlKSI4FjgBtb07uSfDvJ\nxUkWtrZDgY1Du21kEGDT2zfxWLAdBtwPUFXbgUeSHLSbvhYBW6vq0Rn6kiR1aOyBlOTZDM5I3t3O\nlC4ClgAvBh4ELhh3DY2fkSRJPbZgnJ23CQPXAH9bVV8AqKofDq3/JPDldncTsHho98MZnNlsasvT\n26f2OQJ4IMkC4MCq2pJkEzAxtM9i4GvAw8DCJHu1s6TDWx/T6za8JGkPVNVML8mMZJyz7AJcDNxZ\nVX811H7I0GZ/ANzRlr8ELE+yT5IlwFJgfVU9BPwkyQmtz7cAXxzaZ0Vbfj3w1ba8FjgpycIkzwFe\nA1xfg0+S/Trwh227FcAXZqq/qnp3O++88zqvwZqsaT7WZU2j3Z6scZ4hvRx4M/CdJLe1tnOBNyV5\nMYNLaPcBbweoqjuTXAXcCWwHzqzH/oVnAp8G9gOurarrWvvFwGVJ7gG2AMtbXw8n+QBwc9tudQ0m\nNwC8D7giyQeBW1sfkqSOjS2Qqup/M/MZ2Fd2s89fAH8xQ/stwNEztP8SeMMu+roEuGSG9vuAE3ZZ\nuCSpE35Sw1PIxMRE1yXsxJpGY02j62Nd1jQ3MhvX/Z5ukpSPiyQ9MUmoPk5qkCTpiTCQJEm9YCBJ\nknrBQJIk9YKBJEnqBQNJktQLBpIkqRcMJElSLxhIkqReMJAkSb1gIEmSesFAkiT1goEkSeoFA0mS\n1Avj/MZYPcUNvjG+W34NiDR/GEh6HF0GQveBKGnueMlOktQLBpIkqRcMJElSLxhIkqReMJAkSb1g\nIEmSesFAkiT1goEkSeoFA0mS1AsGkiSpFwwkSVIvGEiSpF4wkCRJvWAgSZJ6wUCSJPWCgSRJ6gUD\nSZLUCwaSJKkXxhZISRYn+XqS7yX5bpKzWvuiJOuS3J1kbZKFQ/uck+SeJHclOWmo/bgkd7R1Hxtq\n3zfJla39xiQvGFq3oo1xd5IzhtqXJLmp7XNFkmeM6zGQJI1unGdI24D3VtVvAS8D3pnkRcDZwLqq\nOgr4artPkmXAG4FlwMnAhUnS+roIWFlVS4GlSU5u7SuBLa39o8D5ra9FwPuB49vtvCQHtn3OBy5o\n+/y49SFJ6tjYAqmqHqqq29vyz4DvA4cBpwJr2mZrgNPb8mnA5VW1rao2APcCJyQ5BDigqta37S4d\n2me4r2uAE9vya4G1VbW1qrYC64BTWsC9Crh6hvElSR2ak9eQkhwJHAPcBBxcVZvbqs3AwW35UGDj\n0G4bGQTY9PZNrZ32836AqtoOPJLkoN30tQjYWlWPztCXJKlDC8Y9QJJnMzh7eXdV/fSxq3BQVZWk\nxl3D1HBPZONVq1btWJ6YmGBiYmKWy5Gkp7bJyUkmJydnrb+xBlKbMHANcFlVfaE1b07y/Kp6qF2O\n+2Fr3wQsHtr9cAZnNpva8vT2qX2OAB5IsgA4sKq2JNkETAztsxj4GvAwsDDJXu0s6fDWx06GA0mS\ntLPpf6yvXr36SfU3zll2AS4G7qyqvxpa9SVgRVteAXxhqH15kn2SLAGWAuur6iHgJ0lOaH2+Bfji\nDH29nsEkCYC1wElJFiZ5DvAa4PqqKuDrwB/OML4kqUMZ/I4eQ8fJK4BvAN/hsctl5wDrgasYnNls\nAN7QJh6Q5FzgbcB2Bpf4rm/txwGfBvYDrq2qqSnk+wKXMXh9aguwvE2IIMkfA+e2cT9YVWta+xLg\nCgavJ90KvLmqtk2rvcb1uDyVDPK/y8ch+DxITx1JqKo8/pa72N//8DszkAYMJElPxJMNJD+pQZLU\nCwaSJKkXDCRJUi8YSJKkXjCQJEm9YCBJknrBQJIk9YKBJEnqBQNJktQLBpIkqRcMJElSLxhIkqRe\nMJAkSb1gIEmSesFAkiT1goEkSeoFA0mS1AsGkiSpFwwkSVIvGEiSpF4wkCRJvWAgSZJ6wUCSJPWC\ngSRJ6gUDSZLUCwaSJKkXDCRJUi8YSJKkXjCQJEm9YCBJknrBQJIk9YKBJEnqBQNJktQLBpIkqRcM\nJElSL4w1kJJ8KsnmJHcMta1KsjHJbe12ytC6c5Lck+SuJCcNtR+X5I627mND7fsmubK135jkBUPr\nViS5u93OGGpfkuSmts8VSZ4xzsdAkjSacZ8hXQKcPK2tgI9U1THt9hWAJMuANwLL2j4XJknb5yJg\nZVUtBZYmmepzJbCltX8UOL/1tQh4P3B8u52X5MC2z/nABW2fH7c+JEkdG2sgVdU3GfzSny4ztJ0G\nXF5V26pqA3AvcEKSQ4ADqmp92+5S4PS2fCqwpi1fA5zYll8LrK2qrVW1FVgHnNIC7lXA1W27NUN9\nSZI69LiBlOToMYz7riTfTnJxkoWt7VBg49A2G4HDZmjf1NppP+8HqKrtwCNJDtpNX4uArVX16Ax9\nSZI6NMoZ0kVJbk5y5tBlryfjImAJ8GLgQeCCWehzFDVH48yaJJ3eJGkuLXi8DarqFUmOAt4G3Jpk\nPXBJVa3dkwGr6odTy0k+CXy53d0ELB7a9HAGZzab2vL09ql9jgAeSLIAOLCqtiTZBEwM7bMY+Brw\nMLAwyV7tLOnw1sdOVq1atWN5YmKCiYmJmTabA13mqKEkadcmJyeZnJyctf5SNdovvPYL/3Tg48Aj\nDM6uzq2qax5nvyOBL1fV0e3+IVX1YFt+L/DSqvpPbVLDZxhMQjgMuAH4zaqqJDcBZwHrgb8DPl5V\n1yU5Ezi6qt6RZDlwelUtb5MavgUcy+C36i3AsVW1NclVwDVVdWWSTwC3V9UnptVcoz4u4zQ4S+k6\nkLodvw/Pg6TRJKGq9vgv2ccNpCS/DbwV+H0GkwM+WVW3JjkUuLGqjtjNvpcDrwSeC2wGzmNw5vJi\nBr/p7gPeXlWb2/bnMjgT2w68u6qub+3HAZ8G9gOuraqzWvu+wGXAMcAWYHmbEEGSPwbObaV8sKrW\ntPYlwBUMXk+6FXhzVW2bVreBNKig8/H78DxIGs1cBNLfAxcDV1fVv0xbd0ZVXbqng/eVgbSjgs7H\n78PzIGk0cxFIzwZ+UVW/avf3Bp5ZVT/f00H7zkDaUUHn4/fheZA0micbSKPMsruBwaWyKfszuHQn\nSdKsGSWQnllVP5u6U1U/ZRBKkiTNmlEC6edtUgEASV4C/GJ8JUmS5qPHfR8S8B7gqiQPtvuHMPjM\nOUmSZs1I70NKsg/wQgavcP/D9GnSTzdOathRQefj9+F5kDSasc+ya4P8DoOP+1lA+w31dJzuPcVA\n2lFB5+P34XmQNJonG0iPe8kuyd8CvwHcDvxqaNXTNpAkSXNvlNeQjgOW9eKUQZL0tDXKLLvvMpjI\nIEnS2IxyhvQ84M72Kd+/bG1VVaeOryxJ0nwzSiCtaj+Lx76PwMt3kqRZNeosuyMZfBXEDUn2BxZU\n1U/GXFtnnGW3o4LOx+/D8yBpNGP/LLskfwJ8Fvib1nQ48Pk9HVCSpJmMMqnhncArgJ8AVNXdwL8e\nZ1GSpPlnlED6ZVVNTWaY+uZYr6NIkmbVKIH090n+HNg/yWsYXL778njLkiTNN6N8Qd/ewErgpNZ0\nPYOvMX/aniU5qWFHBZ2P34fnQdJo5uSz7OYbA2lHBZ2P34fnQdJo5uKz7O6bobmq6jf2dFBJkqYb\n5Y2xLx1afibweuCg8ZQjSZqv9uiSXZJbq+rYMdTTC16y21FB5+P34XmQNJq5uGR3HI/9VtoLeAmw\n954OKEnSTEa5ZHcBjwXSdmAD8IZxFSRJmp+cZTcDL9ntqKDz8fvwPEgazVxcsvtTdv6ttONTv6vq\nI3s6uCRJU0b9xtiXAl9iEES/D9wM3D3GuiRJ88won9TwTeD3quqn7f4BwLVV9btzUF8nvGS3o4LO\nx+/D8yBpNGP/+gkGn+y9bej+Nvy0b0nSLBvlkt2lwPokn2PwJ/PpwJqxViVJmndG/cbY4xh8JxLA\nN6rqtrFW1TEv2e2ooPPx+/A8SBrNXFyyA9gf+GlVfQzYmGTJng4oSdJMRpnUsIrBTLsXVtVRSQ4D\nrqqql89BfZ3wDGlHBZ2P34fnQdJo5uIM6Q+A04CfA1TVJuCAPR1QkqSZjPoV5o9O3UnyrDHWI0ma\np0YJpM8m+RtgYZI/Ab4KfHK8ZUmS5pvdBlIGL2JcCVzTbkcB/72qPj5K50k+lWRzkjuG2hYlWZfk\n7iRrkywcWndOknuS3JXkpKH245Lc0dZ9bKh93yRXtvYbk7xgaN2KNsbdSc4Yal+S5Ka2zxVJnjHK\nv0WSNF6jnCFdW1Vrq+q/ttu6J9D/JcDJ09rOBtZV1VEMzrbOBkiyDHgjsKztc2ELRICLgJVVtRRY\nmmSqz5XAltb+UeD81tci4P3A8e12XpID2z7nAxe0fX7c+pAkdWy3gdSmmt2S5Pg96byqvsngl/6w\nU3nsjbVrGLzRFgYTJy6vqm1VtQG4FzghySHAAVW1vm136dA+w31dA5zYll8LrK2qrVW1FVgHnNIC\n7lXA1TOML0nq0Cif1PAy4M1J/pk2045BVv3bPRzz4Kra3JY3Awe35UOBG4e22wgcxuCjijYOtW9q\n7bSf97eCtid5JMlBra+NM/S1CNg6NEljuC9JUod2GUhJjqiqHzA42yge+8qJWVNVlWSu3mjiG1ok\nqcd2d4b0ReCYqtqQ5Jqq+o+zNObmJM+vqofa5bgftvZNwOKh7Q5ncGazqS1Pb5/a5wjggSQLgAOr\nakuSTcDE0D6Lga8BDzOYLbhXO0s6vPWxk1WrVu1YnpiYYGJiYqbNJGnempycZHJyctb62+UnNSS5\nraqOmb78hAdIjgS+XFVHt/sfYjAR4fwkZwMLq+rsNqnhMwwmIRwG3AD8ZjuLugk4C1gP/B3w8aq6\nLsmZwNFV9Y4ky4HTq2p5m9TwLeBYBmd2twDHVtXWJFcB11TVlUk+AdxeVZ+YVrOf1DCooPPx+/A8\nSBrN2L8x9slIcjnwSuC5Se5nMPPtL4GrkqwENgBvAKiqO1tY3AlsB84cSoUzgU8D+zGY9Xdda78Y\nuCzJPcAWYHnr6+EkH2DwRYIAq9vkBoD3AVck+SBwa+tDktSx3Z0h/Qr4l3Z3P+AXQ6urqv7VmGvr\njGdIOyrofPw+PA+SRjO2M6Sq2ntPO5Uk6Yka9esnJEkaKwNJktQLBpIkqRcMJElSLxhIkqReMJAk\nSb1gIEmSesFAkiT1goEkSeoFA0mS1AsGkiSpFwwkSVIvGEiSpF4wkCRJvWAgSZJ6wUCSJPWCgSRJ\n6gUDSZLUCwaSJKkXDCRJUi8YSJKkXjCQJEm9YCBJknrBQJIk9YKBJEnqBQNJktQLBpIkqRcMJElS\nLxhIkqReMJAkSb1gIEmSesFAkiT1goEkSeoFA0mS1AsGkiSpFzoLpCQbknwnyW1J1re2RUnWJbk7\nydokC4e2PyfJPUnuSnLSUPtxSe5o6z421L5vkitb+41JXjC0bkUb4+4kZ8zVv1mStGtdniEVMFFV\nx1TV8a3tbGBdVR0FfLXdJ8ky4I3AMuBk4MIkaftcBKysqqXA0iQnt/aVwJbW/lHg/NbXIuD9wPHt\ndt5w8EmSutH1JbtMu38qsKYtrwFOb8unAZdX1baq2gDcC5yQ5BDggKpa37a7dGif4b6uAU5sy68F\n1lbV1qraCqxjEHKSpA51fYZ0Q5JvJfnPre3gqtrcljcDB7flQ4GNQ/tuBA6boX1Ta6f9vB+gqrYD\njyQ5aDd9SZI6tKDDsV9eVQ8meR6wLsldwyurqpJUR7WxatWqHcsTExNMTEx0VYok9dLk5CSTk5Oz\n1l9ngVRVD7afP0ryeQav52xO8vyqeqhdjvth23wTsHho98MZnNlsasvT26f2OQJ4IMkC4MCq2pJk\nEzAxtM9i4GvT6xsOJEnSzqb/sb569eon1V8nl+yS7J/kgLb8LOAk4A7gS8CKttkK4Att+UvA8iT7\nJFkCLAXWV9VDwE+SnNAmObwF+OLQPlN9vZ7BJAmAtcBJSRYmeQ7wGuD6Mf1TJUkj6uoM6WDg822i\n3ALgf1XV2iTfAq5KshLYALwBoKruTHIVcCewHTizqqYu550JfBrYD7i2qq5r7RcDlyW5B9gCLG99\nPZzkA8DNbbvVbXKDJKlDeez3uqYkqT48LoPA7rKO7sfvw/MgaTRJqKrps6dH1vW0b0mSAANJktQT\nBpIkqRcMJElSLxhIkqReMJAkSb1gIEmSeqHLz7KTHtdj3zIy93wPlDS3DCT1XFeh0F0QSvOVl+wk\nSb1gIEmSesFAkiT1goEkSeoFA0mS1AsGkiSpFwwkSVIvGEiSpF4wkCRJvWAgSZJ6wUCSJPWCgSRJ\n6gUDSZLUCwaSJKkXDCRJUi8YSJKkXjCQJEm9YCBJknrBQJIk9YKBJEnqBQNJktQLBpIkqRcMJElS\nLxhIkqReMJAkSb1gIEmSemFeBlKSk5PcleSeJO/ruh5J0jwMpCR7A38NnAwsA96U5EXdVjWqya4L\nmMFk1wXMYLLrAnYyOTnZdQk76WNN0M+6rGluzLtAAo4H7q2qDVW1DbgCOK3jmkY02XUBM5jsuoAZ\nTHZdwE76+MujjzVBP+uyprmxoOsCOnAYcP/Q/Y3ACdM3euCBB1i/fv2cFSVJ8918DKQaZaObbrqJ\n173udeOuRT2WZFb7W7169RPavmqkQ1V62sh8O+iTvAxYVVUnt/vnAI9W1flD28yvB0WSZklV7fFf\ncvMxkBYA/wCcCDwArAfeVFXf77QwSZrn5t0lu6ranuS/ANcDewMXG0aS1L15d4YkSeqn+TjtG4Ak\nC5NcneT7Se5sry1NrfvTJI8mWTTUdk57I+1dSU6ao5pOaO3vam3fTTL8WlcXNb0syYuT3JjktiQ3\nJ3npXNWU5IVt3KnbI0nOSrIoybokdydZm2RhxzW9O8mH2+P27SSfS3LgXNW0m7rOGlo/58f57mrq\n6jjfzTHV2XHexnhveyzuSPKZJPt2eZzvpqbZO86ral7egDXA29ryAuDAtrwYuA64D1jU2pYBtwPP\nAI4E7gX2mouagFcB64BntPbn9aCmtcBrW9spwNfnsqah2vYCHmzP2YeAP2vt7wP+sgc1vWZqLOAv\nu6ppel1dH+e7eKw6Pc5nqOmILo9zBm9P+Sdg33b/SmBFl8f5bmp69Wwd5/PyDKkl+O9W1adg8LpS\nVT3SVn8E+LNpu5wGXF5V26pqA4MH9vg5qukdwP+owZt4qaof9aCmRxkEE8BCYNNc1TTNqxm8yfl+\n4FQG4Un7eXqHNf1jVd1fVeuq6tHWfhNweEc1/Vpd7X4nx/kMNU09f50d57uo6Qd0f5wvAPbPYCLW\n/gwmYXV9nE+vaVNV3TBbx/m8DCRgCfCjJJckuTXJ/0yyf5LTgI1V9Z1p2x/K4A20UzYy+Gth3DU9\nC1gK/Pt26WAyyUs6rml/4D3Ah5P8APgwcM4c1jRsOXB5Wz64qja35c3AwR3W9JkZ2t8GXNtRTb9W\nV8fH+fSapp6/Lo/zXdXU2XFeVZuAC4AfMAiirVW1jg6P813UdMO0zZ7UcT5fA2kBcCxwYVUdC/wc\nWM3ggDtvaLvdzaef7dkgM9V0dmt/TlW9DPhvwFU9qOlM4D1VdQTwXuBTc1gTAEn2Af4D8NmdBhxc\nL9jduHNaU5I/B/5fVc0UVGOtaXpd7Q+Kc+nuON+pptbU5XG+q5o6O86TPIfB2dCRDH6xPzvJm39t\nwDk+zndR0x8NrX/Sx/l8DaSNDP5CvLndvxo4hsED/e0k9zE47bwlycEMTtUXD+1/OI+dvo+7pvuB\nzwG0dY8meW6HNR0HnFFVnx9qmzoNn4uappwC3DJ0aWdzkucDJDkE+GEPaiLJW4HfA/5oaLu5rGl6\nXf+Gbo/zmWqCwbHW1XG+q5q6PM5fDdxXVVuqajuDx+bfAQ91eJzPVNPvtFreymwc57P5otdT6QZ8\nAziqLa8Czp+2fqYXe/dhcBnrH2lT5sddE/B2YHVrOwr4Qcc1fQj4HvDK1nYicPNc1tTGugJYMXT/\nQ8D72vLZ7PzCahc1ndweq+dO227Oapqprmnr5vw438Vj1elxvoua7uzqOGcQft8F9mNwBrsGeGeX\nx/luapq143ws/wGeCjfgt4GbgW8zSPoDp63/p6n/qO3+uQxelLuLNvNmLmpiMEPlMuAO4BZgogc1\nvRz4VjvY/g9wzBzX9Czg/wIHDLUtAm4A7mYwO2phD2q6B/hn4LZ2u3Aua9pVXT04zmd6rLo+zmeq\nqevjfBXw/faYrGmPUdfH+fSa9pnN49w3xkqSemG+voYkSeoZA0mS1AsGkiSpFwwkSVIvGEiSpF4w\nkCRJvWAgSZJ6wUCSJPXC/wdU0OL9Ez951wAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fa12a8693d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sizes_series.plot(kind='hist', bins=10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How is each client behaving? What's the ping distribution?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's count how many core pings each client is sending per-day and get some descriptive stats (per-client) "
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def counts(ping_array):\n",
" counts = {}\n",
" \n",
" # Get the counts per day.\n",
" for p in ping_array:\n",
" date = p[\"submissionDate\"]\n",
"\n",
" if not date in counts:\n",
" counts[date] = 1\n",
" continue\n",
" \n",
" counts[date] = counts[date] + 1\n",
" \n",
" return counts.values()\n",
" \n",
"some_counts = grouped.map(counts)"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def get_stats(array):\n",
" return {\n",
" \"min\": np.min(array),\n",
" \"max\": np.max(array),\n",
" \"avg\": np.mean(array),\n",
" \"std\": np.std(array),\n",
" \"var\": np.var(array),\n",
" \"95p\": np.percentile(array, 95)\n",
" }\n",
"\n",
"stats = some_counts.map(get_stats)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's see how's the distribution of pings count per client"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 194318.000000\n",
"mean 9.047098\n",
"std 10.907071\n",
"min 1.000000\n",
"25% 2.000000\n",
"50% 6.000000\n",
"75% 12.000000\n",
"max 304.000000\n",
"dtype: float64"
]
},
"execution_count": 123,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ping_counts_series = pd.Series(stats.map(lambda x: x[\"max\"]).collect())\n",
"ping_counts_series.describe()"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEDCAYAAADdpATdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAGUtJREFUeJzt3X+MHGd9x/H3F4c4BUqiBAmaYOki4YDTRjL/JKkoykYI\nciECt0CUuqTih5pC1aSo/aMmFYrPUiWgotQqVglpEpsS6lBSATFtCFDdUEsUUrscmGLjWMKVnVAn\nEoQfsmJC8+0fO5vbbG7vZveZ3We+O5+XdMrMs7Mzn5u9PJ79zswz5u6IiEi7PC93ABERmT51/iIi\nLaTOX0SkhdT5i4i0kDp/EZEWUucvItJC6vxFRFpInb+ISAtNtPM3s46Z7Tezj5vZVZPcloiIVDfp\nI/+ngZ8B64GTE96WiIhUNHLnb2Z3m9kpMzs00D5vZkfM7GEz21Y273f3NwLvB3bUkFdERGowzpH/\nbmC+v8HM1gG7yvZLga1mtsmXBw56gu7Rv4iINMBZo77B3feb2dxA8+XAMXc/DmBm9wJbzOxVwDXA\necDHkpKKiEhtRu78h7gIONE3fxK4wt0/BHyupm2IiEhN6ur8xx4X2sw0prSIyBjc3cZ9b11X+zwC\nbOib38AIV/dcf/31LCwscN9993Hw4EHcPczP9u3bs2dQ/vw52pg/cvbI+RcXF9m+fXtyp11X538A\n2Ghmc2Z2NnADcH/VN+/b9+989KPf4cYb/5KdO2+vKdJ0HD9+PHeEJMqfV+T8kbND3PydToeFhYXk\n9Yxc9jGzvcBVwAVmdgK4zd13m9nNwIPAOuAudz9cdZ3uL+anP70FOMrTTx8YNZKISGsURUFRFMnr\nGedqn61D2h8AHhgnxDnnXMKZMx3g6Dhvz+qd73xn7ghJlD+vyPkjZ4e4+TudDp1Ohx070m6dMve8\n51vNzNev38iZM3cAR3n72w9wzz13ZM0kItJUvSP/HTt24A044ZvknHMuATq5Y4yljq9fOSl/XpHz\nR84OcfPXVfNvROcvIiLTpbKPiEggKvuIiLSQyj4NEbVu2KP8eUXOHzk7xM+fqq7hHZI8+eRRoMgd\nQ0Sk8eq6zr8RNf9zz72On/zki8AdqvmLiFRgZvFr/iIiMl2N6Pwjl32i1w2VP6/I+SNnh7j5i6KY\nnRO+utpHRKSauq72Uc1fRCQg1fxFRGRk6vwTRa0b9ih/XpHzR84O8fOnakTn33/C99Of/nvM7Jkf\nERFZVtcJ38bV/OE9LD8S2MidT0SkiVTzFxGRkTW+8296CSh63VD584qcP3J2iJ8/VeM7/24JSKUf\nEZE6Nb7mr/q/iMhzqeYvIiIja0Tnr7F98lH+vCLnj5wd4ubX2D4iIi00s2P7qOYvIrI21fxFRGRk\n6vwTRa0b9ih/XpHzR84O8fOnmnjnb2YvNLP/NLPralhXo2/4EhGJYuI1fzPbAfwMOOzu/7LC65Vr\n/qr/i4h0Tb3mb2Z3m9kpMzs00D5vZkfM7GEz21a2vR74HvD4uAFFRKR+45R9dgPz/Q1mtg7YVbZf\nCmw1s03AVcCVwO8BN9kM1mui1w2VP6/I+SNnh/j5U5016hvcfb+ZzQ00Xw4cc/fjAGZ2L7DF3T9Q\nzr8DeNxVqxERaYSxav5l57/P3S8r598GXOPuN5XzNwJXuPstFdalmr+IyIhSa/4jH/kPkdQTnz69\nBCwAB8uWguU7fouBpbvz/RWkxcVFOp3u8r2vcprXvOY1P0vzRVGwZ88eAObm5kjm7iP/AHPAob75\nK4Ev9c3fCmyruC4/99zrHNzhE+X4zV7+VJvOaXFxMev2Uyl/XpHzR87uHj9/2feN1Ye7e21H/geA\njWU56FHgBmBr1TdHHthNRGSaiqJ45htBinEu9dwLfB24xMxOmNm73P2XwM3Ag3Qv7fyMux9OThdA\n7+tZVMqfV+T8kbND/PypQg3sppO/IiJdGtiNvMM+1PH1Kyflzyty/sjZIX7+VHXV/JOk1/z7vxGI\niMyuumr+M1H2UQlIRNqmKdf5J9HVPiIi1WS72mcS6nyM47Rr/9HrhsqfV+T8kbND3Pydmh7j2IjO\nv14q+4iIrKURNf/16zdy5swdwFFSa/7dadX+RWQ29co+O3bsSKr5N6Lzr/OErzp/EWkDXeefWdS6\nYY/y5xU5f+TsED9/KnX+IiIt1IiyzyRq/v1y/44iInVRzX/Nzl83fonI7FLNP7PodUPlzyty/sjZ\nIX7+VOr8RURaqBFln8nU/FX2EZHZo5r/CJ1/v9y/r4hIHVTzr6R87O8ERK8bKn9ekfNHzg7x86dq\nSecvIiL9WlL2Uf1fRGbLTIznP039wz3rHwIRaasWln3qrf9Hrxsqf16R80fODvHzp2rEkb+e5CUi\nUo2e4TtmzV/1fxGZBar5J1D9X0TaqoU1/37p9f/odUPlzyty/sjZIX7+VC3v/EVE2mmiNX8zexXw\nPuAlwL+5++0rLJOt5q/6v4hE1ejhHdz9iLv/EXAD8JpJbktERKobufM3s7vN7JSZHRponzezI2b2\nsJlt62t/E/BF4F/T4zZP9Lqh8ucVOX/k7BA/f6pxjvx3A/P9DWa2DthVtl8KbDWzTQDuvs/d3wi8\nPTHrRJnZMz8iIrNurJq/mc0B+9z9snL+N4Ht7j5fzr+/XPQ/gLcA64Fvu/vHV1hXI2r+qv+LSCRN\nuc7/IuBE3/xJ4Ap3/xrwtZq2ISIiNamr8086VD59eglYAA6WLQXQ6ZvuNzi/1vKjr6+/9LO4uAhA\np9N9f69O2JvfuXMnmzdvHvp60+eVX/nHne+vmTchz6znL4qCPXv2ADA3N0cydx/5B5gDDvXNXwl8\nqW/+VmBbxXX5uede5+AOnyjvuvLyZ9zpOtbRnV7L4uLimss0mfLnFTl/5Ozu8fOX/dNYfbi711bz\nPwv4PvA64FHgIWCrux+usK6JP8NX9X8RmRVFrmf4mtle4CrgAuAx4DZ3321m1wI7gXXAXe7+wYrr\na9wJX3X+ItJ0U7/Jy923uvuF7r7e3Te4++6y/QF3f6W7v6Jqx98TeUjn/rphRMqfV+T8kbND3PxF\nUbCwsJC8nkaM7XPOOZewfEK2WXT9v4g0SafTqaXzb8R4/k2u+asEJCJNkq3mX7em1/zV+YtIEzV6\nYLeqotT8VyoBRa0b9ih/XpHzR84OcfOr5p9FeRuAiEgmM1Xzj1L2UQlIRJqiKWP7JIlS9hERya13\nwjeVyj6JotYNe5Q/r8j5I2eHuPnrKvs0ovMXEZHpUs1/7OllufehiLSPav7ZLP9D0Lv0U/8IiMik\nqebfKHE7/ah1zx7lzydydoibXzV/EREZm2r+tU3r2n8RmR7V/EVEWkQ1f6lF1Lpnj/LnEzk7xM1f\nV82/EUf+s6J/wDeVgESkyVTzr21aY/+IyPTMRM1/FulbgIg0WSNq/rMpxvDPUeuePcqfT+TsED9/\nKh35T4G+BYhI0zSi5h/lGb51bDP3/haR2PQMX3X+ItJiM/EMX8knet1T+fOJnB3i50+lzl9EpIVU\n9qltWs8BEJHpUdknnBiXgIrIbJto529mW8zsDjO718xeP8ltRWRmz/zkEr3uqfz5RM4O8fOnmuh1\n/u7+BeALZnYe8BHgK5PcXjwrl4NERCZt5Jq/md0NXAc85u6X9bXPAzuBdcCd7v7hvtc+Atzj7ksr\nrK9lNX9dAioi6XLU/HcD8wMh1gG7yvZLga1mtsm6Pgw8sFLHLyIieYzc+bv7fuDHA82XA8fc/bi7\nPwXcC2wBbgZeB7zNzN6TGlbqF73uqfz5RM4O8fOnqqvmfxFwom/+JHCFu98CfGytN58+vQQsAAfL\nloLlh7sUA0sPzq+1fN3rG7Z8Pevr/UF2Op2pzC8tLU11e8o/W/k1P735oijYs2cPAHNzc6Qa6zp/\nM5sD9vVq/mb2VmDe3W8q529kufNfa12q+avmLyIjasp4/o8AG/rmN9A9+q9Ez/Bl6OWe+kdBRPoV\nDXuG7wFgo5nNmdnZwA3A/VXfrGf4wrNv/prejWB1/BHlpPz5RM4OcfN3cj3D18z2AlcBF5jZCeA2\nd99tZjcDD9K91PMudz9cdZ068l/b4DcDfSMQaae6jvw1tk9t05Nadz+dIxCRrqbU/JPoyH81ugtY\nRJY1reafRDX/fKLWPXuUP5/I2SFu/mw1/0nQkb+ISDWq+bem5q/7AkTkuWai5i/j0RVAIjKuRtT8\nVfZJkXZPQNS6Z4/y5xM5O8TNXxRFLTX/RnT+OuErIlJNXSd8VfOvbXr6Nf9u2UfnAUTaSM/wFRGR\nkTWi81fNf3R1Pfc3at2zR/nziZwd4uZXzb/1hpd4+h8Mn/Ph8CJSP9X8G1R/b8o2e5+lzgWIzD7V\n/EVEZGTq/Fsuat2zR/nziZwd4udP1Yg7fHXCtx7D6vv97SoBicSmsX0aXn9v6jZzf94iUg/V/EVE\nZGTq/Ftm8BLQ6HVP5c8ncnaInz9VI2r+Mk3PfTKYRgcVaR/V/GubjrdN3RcgEtdMjOevq31ERKrR\nM3ylFtHrnsqfT+TsEDf/TD3DV/LQuD8i7aWaf23Ts7PNlf4mdKOYSLPoOn+ZInX6IrNCnX/rFbkD\nJIlat+2JnD9ydoifP9VEO38zu9jM7jSzz05yOzIZw54JoGcFiMQ3lZq/mX3W3a8f8ppq/g3b5rDr\n/1fapur/InlMveZvZneb2SkzOzTQPm9mR8zsYTPbNm4gyU9H9SKzb5yyz25gvr/BzNYBu8r2S4Gt\nZrYpPZ5MXrFCmxPl5G70um3k/JGzQ/z8qUbu/N19P/DjgebLgWPuftzdnwLuBbaY2flmdjuwWd8G\nRESao66bvC4CTvTNnwSucPcfAe9d682nTy8BC8DBsqVg+Y7fYmDpwfm1lq97fcOWr3t9VZdPXd9K\n1lp++fXe0VOnk2e+15Zr+23O3+l0GpVn1vMXRcGePXsAmJubI9VYJ3zNbA7Y5+6XlfNvBebd/aZy\n/ka6nf8tFdalE76Bt6kTviJ5NOUmr0eADX3zG+ge/Veigd1yKpLePcpln/3L1nVCOXrdNnL+yNkh\nbv6iKGoZ26euzv8AsNHM5szsbOAG4P6qb9bAbpGNenI4zslkkSaqa2C3kcs+ZrYXuAq4AHgMuM3d\nd5vZtcBOYB1wl7t/sOL6fP36jZw5cwdwFJV94m5zrTGBuvrf29eq8pFIJUU5pPOOHTuSyj4a2K22\naW1zeOe/9rpz/x2KRNOUmn8S1fxzKnIHSBK1btsTOX/k7BA3f9Nq/klU8xcRqSZbzb9uKvvMzjaf\nPSZQP5V9ROqmZ/hKQ618YldE0vRO+KZS2af1itwBkkSt2/ZEzh85O8TNr2f4SuPUcePW4DqqloPG\nfZ9IWzWi5q/r/LXNYc8QGK3z1zkEmX26zr8lnWJbtqnOX2Q0M3Gdv+RU5A6QJGrdtidy/sjZIX7+\nVI2o+etqHxnnfMGo79F5AZkFdV3to7JPbdPa5iTLQStZbeiIKkNN5P7bF0mhso+IiIxMnX/rFbkD\nJCpyB0gSue4cOTvEz5+qETV/kWF6dfrhQ0es/r7+94rIskbU/HWdv7a51jaHXQo6jctIRZpE1/k3\nsIPSNtX5i0yLTvhKoiJ3gERF7gBJItedI2eH+PlTqfMXEWkhlX1qm9Y2VfYRmR6VfUREZGSN6Pw1\nvENORe4AlZjZkMs8i9rWPXwbk1t35LrzJPdbiqqZou57PcNXWsZZLtlEW/+ks+fU1N+tiZnqUdfD\nXBrR+UtOndwBEnVyB0jS6XRyR2ittu97df4iIi2kzr/1itwBEhW5AySJWneeBW3f9+r8RURaaKID\nu5nZC4G/A84Ahbv/4yS3J+Po5A6QqJM7QJK2151zavu+n/SR/1uAf3L3PwTePOFtiYhIRSN3/mZ2\nt5mdMrNDA+3zZnbEzB42s21l80XAiXL6/xKzykQUuQMkKnIHSNL2unNObd/34xz57wbm+xvMbB2w\nq2y/FNhqZpuAk8CGhG2JiMgEjFzzd/f9ZjY30Hw5cMzdjwOY2b3AFuBvgV1mdh1wf1JSmZBO7gCJ\nOmsuMexOz1Hb6zD4kJmrr776Wa8Pf/bw6sustvxKNK5Rt+Y/6r6dJXWd8O0v70D3iP8Kdz8NvLum\nbYiMqX/At6rtdQ5Ot9Y2hy1bJWuV5VcanE+WjbpvZ0NdnX/SP5enTy8BC8DBsqVg+YiuGFh6cH6t\n5ete37Dl615f1eVT17dzxOXXen3Syw++vhPYPIHt172+Ycs/+/VeHbp3JcpKdemiKJ7z+mrLr7b9\nYe+vOl/3+uqar5JnedmCtfZ/E+aLomDPnj0AzM3NkczdR/4B5oBDffNXAl/qm78V2FZxXb5+/UaH\nRYdPlAOFePkz7nQd62jLNheD/56LDd63q0+7r9w2qMoyqy2/0u9Wh1FzTUvV33Nx8bl/+xEsLi76\n9u3be3kZ92es8fzLmv8+d7+snD8L+D7wOuBR4CFgq7sfrrAujeevbbZym+5e6RkDoz6HYO1nHtTz\nLIOmPh9hOdfo+6opv0MVqeP5j1z2MbO9wFXABWZ2ArjN3Xeb2c3Ag8A64K4qHX+PhnQWEamm9wD3\nVHqSV23TUbdZAFcH/j0X6dZrm7hvV5/Wkf9kVD3yL4qivNqqeb9DFVM/8p8EHfmLiFSjI/9QR6fa\nprb53Gkd+U+Gav7V6K5bEZEWUtmn9YrcARIVxL9LWXKIOrZPXWWfRhz56xm+IiLV6Bm+UpNO7gCJ\nOrkDSFBtH89fZR8RkUBU9pGaFLkDJCpyB5Cgotb8VfYREZGxqezTep3cARJ1cgeQoKLW/HWTVwtv\nCtI2Z2ubuslrMnSTVzUq+7RekTtAoiJ3AAkqas2/Lur8RURaSJ1/63VyB0jUyR1Agopa86+LTviK\niASi6/ylJkXuAImK3AEkqKg1f13nLyIiY1Pn33qd3AESdXIHkKDaXvNX5y8i0kLq/FuvyB0gUZE7\ngAQVteZfF3X+IiItpEs9W6+TO0CiTu4AElTUmr/G9mnhWDDa5mxtU2P7TIbG9qlGZZ/WK3IHSFTk\nDiBBqeYvIiKto86/9Tq5AyTq5A4gQUWt+ddlop2/mV1sZnea2WcnuR0RERnNRDt/d/+Bu//BJLch\nqYrcARIVuQNIUKr5V2Bmd5vZKTM7NNA+b2ZHzOxhM9s2mYgyWUu5AySKnl9yWVpq999O1SP/3cB8\nf4OZrQN2le2XAlvNbJOZ/b6Z/Y2ZXVhvVJmMJ3IHSBQ9v+TyxBPt/tup1Pm7+37gxwPNlwPH3P24\nuz8F3AtscfdPufufuvujZna+md0ObNY3AxGR5ki5w/ci4ETf/Engiv4F3P1HwHvXWtGTTz7Ei1/8\nJn7xi//hyScTEskYjucOkOh47gAS1PHjx3NHyCql86/tVrgzZx7nzJkv9rVYDdN1rEPbbP42P1n+\nTHOb9Ux37y4d1jaoyjIrL7/SdLV1VDFqrmkZ5fds6u8wWSmd/yPAhr75DXSP/keScnuyiIiMJ+VS\nzwPARjObM7OzgRuA++uJJSIik1T1Us+9wNeBS8zshJm9y91/CdwMPAh8D/iMux+eXFQREalL1at9\ntrr7he6+3t03uPvusv0Bd3+lu7/C3T846saj3SdgZsfN7Dtm9i0ze6hsO9/MvmJmR83sy2Z2Xu6c\nPSvdn7FaXjO7tfwsjpjZG/KkXjYk/4KZnSw/g2+Z2bV9rzUt/wYzWzSz/zaz75rZn5TtIT6DVfI3\n/jMws3PM7JtmtlRmXyjbo+z7Yfnr2/funuUHWAccA+aA59O9W2dTrjwVM/8AOH+g7a+APy+ntwEf\nyp2zL9trgVcDh9bKS/dejaXys5grP5vnNTD/duDPVli2iflfBmwup18EfB/YFOUzWCV/iM8AeEH5\n37OAb9C9GjHEvl8lf237PufAbiveJ5AxT1WDJ6jfzPLlJp8Efnu6cYbzle/PGJZ3C7DX3Z9y9+N0\n/3gun0bOYYbkh+d+BtDM/P/r7kvl9M+Bw3QvkQ7xGaySHwJ8Bu5+upw8m26n6ATZ9zA0P9S073N2\n/ivdJ3DRkGWbwoGvmtkBM7upbHupu58qp08BL80TrbJheS/k2VdrNfnzuMXMvm1md/V9bW90fjOb\no/st5psE/Az68n+jbGr8Z2BmzzOzJbr7+Mvu/hCB9v2Q/FDTvs/Z+cd5ZM6y17j7q4FrgT82s9f2\nv+jd719hfq8KeZv4u3wcuBjYDPwQ+OtVlm1EfjN7EfDPwPvc/Wf9r0X4DMr899HN/3OCfAbu/rS7\nbwZeDlxhZr8x8Hqj9/0K+X+dGvd9zs6/lvsEpsndf1j+93Hgc3S/Vp0ys5cBmNmvAY/lS1jJsLyD\nn8fLy7ZGcffHvATcyfJX20bmN7Pn0+34P+Xuny+bw3wGffnv6eWP9hm4+0+AReAaAu37nr7883Xu\n+5ydf6j7BMzsBWb2q+X0C4E3AIfoZn5Hudg7gM+vvIbGGJb3fuB3zexsM7sY2Ag8tML7syr/h+35\nHbqfATQwv5kZcBfwPXff2fdSiM9gWP4In4GZvaRXEjGzXwFeT/ecRZR9v2L+3j9cpbR9n/ls9rV0\nryA4BtyaM0uFrBfTPZu+BHy3lxc4H/gqcBT4MnBe7qx9mfcCjwK/oHt+5V2r5QX+ovwsjgDXNDD/\nu4F/AL4DfJvu/7gvbXD+3wKeLv9mvlX+zEf5DIbkvzbCZwBcBvxXmfEQ8IGyPcq+H5a/tn1v5ZtE\nRKRF9AxfEZEWUucvItJC6vxFRFpInb+ISAup8xcRaSF1/iIiLaTOX0SkhdT5i4i00P8DHZ7BpeoS\nDxEAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fa1c1c117d0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig_ppc, ax_ppc = plt.subplots()\n",
"ping_counts_series.hist(ax=ax_ppc, bins=100, bottom=0.1)\n",
"ax_ppc.set_yscale('log')"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Are we seeing any gap (missing sequences) for any client?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Go through all the core pings (per client) and check if there's any gap in the sequence numbers."
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def verify_sequence_gaps(client_history):\n",
" gaps = []\n",
" gap_length = 0\n",
"\n",
" for i in range(1, len(client_history)):\n",
" prev_ping = client_history[i - 1]\n",
" curr_ping = client_history[i]\n",
" \n",
" expected_sequence_num = prev_ping[\"seq\"] + 1\n",
" if curr_ping[\"seq\"] != expected_sequence_num:\n",
" # We found a gap: how big is this gap?\n",
" gap_length = curr_ping[\"seq\"] - prev_ping[\"seq\"]\n",
" \n",
" if gap_length > 0:\n",
" # We had a gap before, but the sequence is ok now.\n",
" gaps.append(gap_length)\n",
" gap_length = 0\n",
"\n",
" return gaps\n",
" \n",
"\n",
"sequence_errors = grouped.map(verify_sequence_gaps)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's discard those clients that don't have any gap in the sequence numbers."
]
},
{
"cell_type": "code",
"execution_count": 126,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"clients_with_gaps = sequence_errors.filter(lambda h: len(h) > 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that we have a list of gaps in the sequence numbers, get some stats about them."
]
},
{
"cell_type": "code",
"execution_count": 127,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def gap_stats(gaps):\n",
" return {\n",
" \"len\": len(gaps),\n",
" \"min\": np.min(gaps),\n",
" \"max\": np.max(gaps),\n",
" \"avg\": np.mean(gaps),\n",
" \"std\": np.std(gaps),\n",
" \"var\": np.var(gaps),\n",
" \"95p\": np.percentile(gaps, 95)\n",
" }\n",
"\n",
"gaps_stats = clients_with_gaps.map(gap_stats) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get some descriptive stats about the number of sequence number gaps for a single client history."
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 57042.000000\n",
"mean 2.660075\n",
"std 3.078267\n",
"min 1.000000\n",
"25% 1.000000\n",
"50% 2.000000\n",
"75% 3.000000\n",
"max 79.000000\n",
"dtype: float64"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gaps_num_series = pd.Series(gaps_stats.map(lambda x: x[\"len\"]).collect())\n",
"gaps_num_series.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get some descriptive stats about the size of the gaps in the sequence numbers. Also, what's the distribution of the gap sizes?"
]
},
{
"cell_type": "code",
"execution_count": 129,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"gaps_size_series = pd.Series(gaps_stats.map(lambda x: x[\"max\"]).collect())"
]
},
{
"cell_type": "code",
"execution_count": 130,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 57042.000000\n",
"mean 10.548736\n",
"std 42.616523\n",
"min 2.000000\n",
"25% 2.000000\n",
"50% 4.000000\n",
"75% 9.000000\n",
"max 5183.000000\n",
"dtype: float64"
]
},
"execution_count": 130,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gaps_size_series.describe()"
]
},
{
"cell_type": "code",
"execution_count": 131,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEDCAYAAAA4FgP0AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF01JREFUeJzt3X+MHHd5x/H3g0MohLZRgsSPYGkjNS42tXJQKQ6lyEv5\nkQspiQRUwW0QCWoKVZOiILVOEMqt/4JUSFwhgqgl9lW0dSi0QNwCAdobZIm2IQ2XmsamthpLdtI4\nqSAFUVogefrHzvrW6z3f7t13PPN95vOSrMzM7s1+P1nbj+f7zA9zd0REpL2eVfcARESkXioEIiIt\np0IgItJyKgQiIi2nQiAi0nIqBCIiLadCICLScioEIiItV2khMLOume03s0+Y2fYqP0tERNam6iOC\nZ4AfAM8Bjlf8WSIisgZTFwIz221mJ8zswMj2WTM7ZGaHzWxnuXm/u78JuBXYlWC8IiKS2FqOCPYA\ns8MbzGwDcGe5fQuww8w2+/KNjJ6if1QgIiINc860P+Du+82sM7L5MuCIux8FMLN7gGvM7GXAFcD5\nwMfWNVIREanE1IVgBRcBx4bWjwPb3P1DwOcSfYaIiFQgVSFY872szUz3wRYRWQN3txT7SXXW0KPA\nxqH1jUxxltDc3ByLi4u4e7hfc3NztY9B+ZSvbdki51tcXGRubi7RX919qQrBA8AlZtYxs3OBa4F7\nJ/3hXq9Ht9tNNJRmOXr0aN1DqJTy5StyNoibr9vt0uv1ku5zLaeP7gW+AWwys2NmdoO7/xS4CbgP\neBj4tLsfnHSfvV6PoiimHYqISOsURZG8EJh7vVP0ZuZ1j6FKRVGEPdoB5ctZ5GwQP5+Z4Yl6BI0o\nBHNzc3S73dBfmohICkVRUBQFu3btilUI6h5DlaL/q0T58hU5G8TPl/KIQHcfFRFpuUYcEVx11VVs\n2rSJbrfL1VdfXet4RESaLOzU0HOf+26efvoxXv7y7/Pgg0Wt4xERyUG4qaEf/egufvzj9/HMM3WP\nJL3op8UqX74iZ4P4+VJKdYuJderRvy+diIicyWBqKKVGTA31b1VUcOmlPZaWilrHIyKSg3BTQyIi\nUp+GFIIesFT3ICoRfZ5S+fIVORvEzVfFLSYaVAhm6h6EiEjjVXHTOfUIREQypB6BiIgko0JQsajz\nlAPKl6/I2SB+vpR0HYGISEZ0HYGIiADqEYiISEIqBBWLPk+pfPmKnA3i50tJhUBEpOXUIxARyZB6\nBCIikkxDCkEP3WsoT8qXr8jZIG4+3WtIRKTldK8hEREB1CMQEZGEVAgqFnWeckD58hU5G8TPl1Ll\nhcDMzjOzb5rZVVV/loiITK/yHoGZ7QJ+ABx0978b87p6BCIiU6q1R2Bmu83shJkdGNk+a2aHzOyw\nme0st70BeBh4MsVgRUQkvbVMDe0BZoc3mNkG4M5y+xZgh5ltBrYDlwO/CdxoZkmqV06iz1MqX74i\nZ4P4+VKa+nkE7r7fzDojmy8Djrj7UQAzuwe4xt0/UK6/E3jS6z5XVURETrOmHkFZCPa5+9Zy/W3A\nFe5+Y7l+HbDN3W+eYF/qEYiITClljyDVE8rW+S/96wF4/PGjzM/PMzMzQ7fbBZYP77Suda1rvc3r\nRVGwsLAAQKfTIaVURwSXAz13ny3XbwOecfc7JthX6COCoihOfqkRKV++ImeD+PmaeETwAHBJWSAe\nA64Fdkz+4z30zGIRkdUVFTyzeC2nj+4FvgFsMrNjZnaDu/8UuAm4j/7pop9294NJR5qpyP8iAeXL\nWeRsED9fSrrpnIhIhnTTuYykPoRrGuXLV+RsED9fSql6BOvUQz0CEZHVVdEj0NSQiEiGmnjW0Dr1\n0BGBiMjqGnHWUDV6RH1UZfR5SuXLV+RsEDdft4JHVTakEIiISF0a0iOYA87n0ks/rx6BiMgZDKaG\ndu3alaxH0JBCoGaxiMg0dB1BRqLOUw4oX74iZ4P4+VJSIRARabmGTA2pRyAiMgn1CEREBFCPICvR\n5ymVL1+Rs0H8fCmpEIiItFxDpobUIxARmYR6BCIiAqhHkJXo85TKl6/I2SB+vpRUCEREWk5TQyIi\nGdLUkIiIJKNCULHo85TKl6/I2SB+vpT0hDIRkYzomcUiIgKoRyAiIgmpEFQs+jyl8uUrcjaIny8l\nFQIRkZartEdgZi8D3gu8APh7d79rzHvUIxARmVI2PQJ3P+TuvwtcC7y6ys8SEZG1mboQmNluMzth\nZgdGts+a2SEzO2xmO4e2vxn4W+CL6x9ufqLPUypfviJng/j5UlrLEcEeYHZ4g5ltAO4st28BdpjZ\nZgB33+fubwJ+a51jFRGRCqypR2BmHWCfu28t118FzLn7bLl+a/nWfwTeAjwHeMjdPzFmX+oRiIhM\nKWWPINWVxRcBx4bWjwPb3P3rwNcTfYaIiFQgVSFY56lH1wPw0ENfx2y5wLn7yXm+brcLkN36/Pw8\nMzMzjRmP8infYH14Dr0J41G+1fMsLCwA0Ol0SCnV1NDlQG9oaug24Bl3v2OCfZ2cGoLXslxTjLpv\nf5FCURQnv9SIlC9fkbNB/Hwpp4ZSFYJzgO8ArwMeA+4Hdrj7wQn2dfKZxXAL0QqBiEhKRROeWWxm\ne4HtwIXAE8Dt7r7HzK4E5oENwN3u/sEJ9xf6iEBEpAq1XlDm7jvc/SXu/hx33+jue8rtX3L3X3T3\nX5i0CCzrAUvTDiULw/OUESlfviJng7j5iqKg1+sl3WdD7jXUA2bqHoSISON1u93khaAhzyNQj0BE\nZBKN6BGkph6BiMj0srnp3OR6qEeQJ+XLV+RsEDefegQiIi0XuEegqSERkWk08V5D69Sj3ywWEZEz\nGTSLU9LUUMWizlMOKF++ImeDuPmqmBpqSCEQEZG6qEcgIpIh9QhERFpKPYIMRZ2nHFC+fEXOBnHz\nqUcgIiLJqUcgIpIh9QhERFpKPYIMRZ2nHFC+fEXOBnHzqUcgIiLJqUcgIpKhgD2C8cyWM6ooiIhU\no+FTQ87yEUKeos5TDihfviJng/j5Ump4IRARkao1pEcw/pnF6heIiJyqdc8sViEQERkv4DOL44o+\nT6l8+YqcDeLnS0mFQESk5TQ1JCKSIU0NiYhIMpUWAjO7xsz+xMzuMbM3VPlZTRV9nlL58hU5G8TP\nl1KlVxa7+xeAL5jZ+cCHga9W+XkiIjK9qXsEZrYbuAp4wt23Dm2fBeaBDcAn3f2Oodc+DPy5uy+N\n2Z96BCIiU6q7R7AHmB0Z0AbgznL7FmCHmW22vjuAL40rAiIiUr+pC4G77we+N7L5MuCIux91958A\n9wDXADcBrwPeZmbvXu9gcxR9nlL58hU5G8TPl1KqHsFFwLGh9ePANne/GfjY6j9+/dDyPKc+pKYA\nTr0T6eLiIt1ut/9q+WU3dX1paalR41E+5dN6nutFUbCwsABAp9MhpTVdR2BmHWDfoEdgZm8FZt39\nxnL9OpYLwWr7mqhHoH6BiMiyJj6P4FFg49D6RvpHBRPqoWcWi4isrmjwM4sfAC4xs46ZnQtcC9w7\n+Y/30DOL86R8+YqcDeLm61bwzOKpjwjMbC+wHbjQzI4Bt7v7HjO7CbiP/umjd7v7wcn32kNHBCIi\nq6viiCCbew2pRyAisqyJPYJ16qEjAhGR1TW5R7BOPdQjyJPy5StyNoibrxE9gmr00BGBiMjq1CMo\nl+ses4hI3eq+15CIiATSkELQA2Leky7qPOWA8uUrcjaIm68oiuQ9ggYVgpjNYhGRlKpoFqtHICKS\nIfUIREQkmYYUgh7qEeRJ+fIVORvEzacegYhIy6lHUC7XPWYRkboFvNfQdIafVqaiICKyPg2ZGpqW\ns3yE0GxR5ykHlC9fkbNB/HwpNeSIoMda7zWkowMRaRPda2iV5bqziIicLbqOQEREklEhqFj0eUrl\ny1fkbBA/X0oqBCIiLacegYhIhgJeR9BDTygTEVmdnlmcoejzlMqXr8jZIG6+Km4x0ZBCICIidVGP\nQEQkQ7qOQEREklEhqFjUecoB5ctX5GwQP19KlRYCM7vYzD5pZp+p8nNERGTtzkqPwMw+4+6/scJr\n6hGIiEyp1h6Bme02sxNmdmBk+6yZHTKzw2a2M8XgRESkemuZGtoDzA5vMLMNwJ3l9i3ADjPbvP7h\n5S/6PKXy5StyNoifL6WpC4G77we+N7L5MuCIux91958A9wDXmNkFZnYXMHM2jhLM7OQvERGZTKpb\nTFwEHBtaPw5sc/fvAu9Z/cevH1qe59SrjIsx7y+A7pjXh3sN5avlvwq63W4t64NtdX2+8infSuvd\nbrdR41G+M68XRcHCwgIAnU6HlNbULDazDrDP3beW628FZt39xnL9OvqF4OYJ9pWsWazGsYi0RRMv\nKHsU2Di0vpH+UcGEesBSoqE0y6CiR6V8+YqcDeLmK4qisfcaegC4xMw6ZnYucC1w7+Q/3iPqTedE\nRFLqVnDTuamnhsxsL7AduBB4Arjd3feY2ZX0J/g3AHe7+wcn3J/DHP3bUN+CpoZERFZWlLeh3rVr\nV7KpoVA3nVMhEJG2aGKPYJ16pO4RDJ9KOnpK6UrbqxB1nnJA+fIVORvEzdfkHsE69UjfI/Ch/447\nOlhpu4hIczWiR5BaVVND/eXx00X9owBNI4lIvvTMYhGRlho0i1MKPDXUDFHnKQeUL1+Rs0HcfFVM\nDTWkEIiISF0a0iNIfx2BegQiEpGuI5h6WYVARGIKeB1BXFHnKQeUL1+Rs0H8fCm16qyhlS4eG94+\n7ujgTBed6WhCRM6mKs4aatXU0FpvTTE6laRpJRGpm6aGREQkGRWCikWfp1S+fEXOBvHzpaRCMOJM\nN6tby8+LiDRdQ3oEZ+c6giqWR///6dRUEamSriOYelmFQERiUrM4I9HnKZUvX5GzQfx8KakQiIi0\nnKaGNDUkIhnS1JCIiCTTkELQI/Uzi5titXnK1Kebnu3TV6PPw0bOFzkbxM2nZxaHlfr5yan3JyJN\noWcWT73c/B5B6p6CehQi7aAegYiIJKNCULGo85QDypevyNkgfr6UVAhERFqu0h6BmZ0HfBz4P6Bw\n978c8x71CNQjEJEp5dQjeAvwV+7+O8DVFX+WiIiswdSFwMx2m9kJMzswsn3WzA6Z2WEz21luvgg4\nVi4/vc6xZin6PKXy5StyNoifL6W1HBHsAWaHN5jZBuDOcvsWYIeZbQaOAxvX8VkiIlKxNfUIzKwD\n7HP3reX6q4A5d58t128t3/pR+gXif4H97r53zL5C9AhOvYr3zHP0p1/xu9r+Rt7tftrrp/7ctD2K\n0/czyXtX/vzJ9hdFGzNHMPy95fidpewRnJNiJ5w6BQT9I4Ft7v4/wLsSfUYGBsVn0veyyvtXKkTT\n7CPVWCYdU8px5aSNmSOY5s9sXKkKwTrL6fVDy/OceruJYsz7C6C7wuvTrk+7v1PfP34ecvn1+fl5\nZmZm6HYne//pr5/+/lPfM9n+Rj9/sD79+0c/b35dn9/09ZW+vzN9n0VRNGb8Z1ofHnsTxlNHvtHf\nz00a/7g8CwsLAHQ6HZJy96l/AR3gwND65cCXh9ZvA3ZOuC+HOYePlDfI8fJXiuXU+zt9eWClz1xc\nXPRR0+1v/Hsm/bnVTPP+8WNaXNfnN924729UrpknyZaz1fIN/5nNyeLios/NzQ3Gvaa/w0d/peoR\nnAN8B3gd8BhwP7DD3Q9OsC8nTI/g9M8c9/93dB7/zPsb/56VegFVXsewlnGv5fdXbtqYOYLhP7M5\nfme19gjMbC+wHbjQzI4Bt7v7HjO7CbgP2ADcPUkRWNaj//B6ERE5k6J8eH1KuvtoxUcEi4uLI/Px\n0Y4ICuC1YY8Ihuf7V5Jr5kmy5Wy1fDoiWJaqWbxOPXREICKyOh0RTL1c/xGBegT5/UtrWm3MHIGO\nCJbpal8RkZZrSCHo0dZnFuevqHsAlYr8/UXOBnHzFXpmsYhIu+mZxVMvq0egHkH12pg5AvUIlums\nIRGRjFRx1pCmhioWdZ5yWVH3ACoV+fuLnA3i5qtiaqghhUBEROrSkB7BHP2poVtQj0A9gty0MXME\nufYIBlNDu3btStYjaEghULN45f2Nf48KQXO0MXMEuRaCAV1QlpGo85TLiroHUKnI31/kbBA/X0oq\nBCIiLaepIU0NTfx+TQ2N18bMEWhqaJmuIxARyYiuI8hQ/HnKou4BVCry9xc5G8TNp+sIREQkOfUI\n1COY+P3qEYzXxswRqEewTEcEIiItp0JQsajzlMuKugdQqcjfX+RsED9fSioEIiIt15AewRy619BK\n+xv/HvUImqONmSPItUegew1NvaxCoEJQvTZmjiDXQjCgZnFG4s9TFnUPoFKRv7/I2SB+vpRUCERE\nWk5TQ5oamvj9mhoar42ZI9DU0LJKjwjM7GIz+6SZfabKzxERkbWrtBC4+yPu/ttVfkbTxZ+nLOoe\nQKUif3+Rs0H8fClNVAjMbLeZnTCzAyPbZ83skJkdNrOd1Qwxb0tLS3UPoWKx80X+/iJng/j5Upr0\niGAPMDu8wcw2AHeW27cAO8xss5m9w8w+YmYvSTvUPD311FN1D6FisfNF/v4iZ4P4+VKaqBC4+37g\neyObLwOOuPtRd/8JcA9wjbt/yt1vcffHzOwCM7sLmNERg4hIM63nwTQXAceG1o8D24bf4O7fBd6z\n2o5+7ufezNNP/xc//OE6RtNQR48erXsIFTta9wAqFfn7i5wN4udLaeLTR82sA+xz963l+luBWXe/\nsVy/Dtjm7jdPNQCz/M7bEhFpgCY8qvJRYOPQ+kb6RwVTSRVERETWZj2njz4AXGJmHTM7F7gWuDfN\nsERE5GyZ9PTRvcA3gE1mdszMbnD3nwI3AfcBDwOfdveD1Q1VREQq4e61/aJ/6ukh4DCws86xTDHm\n3cAJ4MDQtguArwL/DnwFOH/otdvKfIeANw5t/2XgQPnaH9eda2hcG4FF4N+AbwO/Hykj8DPAP9O/\nAOLbQC9SvnJcG4Bv0e/pRct2FPjXMt/9AfOdD3wWOEj/H9jbzka+un+zHgE6wLPLP5ib6/4iJhj3\na4BXcGoh+CPgD8vlncCHyuUtZa5nlzmPsNygvx+4rFz+Iv3GexPyvQiYKZefD3wH2Bws4/PK/54D\n/FP5hy1SvvcBfwHcG/D35yPABSPbIuX7M+BdQ78/f/5s5Ksz8KuALw+t3wrcWvcXMeHYO5xaCA4B\nLyyXXwQcKpdvY+hIB/gycDnwYuDg0Pa3A3fVnWuFrJ8HXh8xI/A84F/oXxMTIh/wUuBr9O/gODgi\nCJGtHMsjwIUj20Lko/+X/n+M2V55vjpvQz3uOoSLahrLer3Q3U+UyyeAF5bLL+HUM6kGGUe3P0oD\ns5enDL+C/lRKmIxm9iwzW6Kf4yvufj9x8n0E+APgmaFtUbJB/3ahXzOzB8zsxnJblHwXA0+a2R4z\ne9DM/tTMzuMs5KuzEIS8fsD7JTj7bGb2fOCvgfe6+w+GX8s9o7s/4+4z9P/1vM3Mfmnk9Szzmdmv\nA0+4+7fo3yf8NLlmG/Jqd38FcCXwe2b2muEXM893DvBK4OPu/krgh/RnSk6qKl+dhSDJdQgNccLM\nXgRgZi8Gnii3j2Z8Kf2Mj5bLw9sfPQvjnIiZPZt+EfiUu3++3BwqI4C7/zf9xvgVxMj3K8DVZvYI\nsBf4NTP7FDGyAeDu/1n+90ngc/Sn9aLkOw4cd/dvluufpV8YHq86X52FINJ1CPcC7yyX30l/Xn2w\n/e1mdq6ZXQxcQv9Mh8eB75vZNus/HeMdQz9Tq3I8dwMPu/v80EshMprZC8zs/HL5ucAb6J+hkX0+\nd3+/u29094vpzwv/g7u/gwDZAMzseWb2s+XyecAb6Z8ZEyJfOa5jZrap3PR6+mfv7aPqfDU3R66k\nf1bKEeC2ups1E455L/AY8GP6PY4b6J/e9TXGn971/jLfIeCKoe2D07uOAB+tO9fQuH6V/vzyEv1T\n9L5F/zTfEBmBrcCDwEPl2D5Qbg+Rb2hs21k+ayhENvpz6Essn/p7W6R85bguBb5Z/v78G/oN5Mrz\n1f6oShERqZceXi8i0nIqBCIiLadCICLScioEIiItp0IgItJyKgQiIi2nQiAi0nIqBCIiLff/gjx+\njmJT52UAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fa1d62de750>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots()\n",
"gaps_size_series.hist(ax=ax, bins=100, bottom=0.1)\n",
"ax.set_yscale('log')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment