Skip to content

Instantly share code, notes, and snippets.

@georgf
Last active March 24, 2016 13:06
Show Gist options
  • Save georgf/c9f4824e5d3bbf112db7 to your computer and use it in GitHub Desktop.
Save georgf/c9f4824e5d3bbf112db7 to your computer and use it in GitHub Desktop.
Validation of the Fennec "core" pings on release for bug 1255458
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Bug 1255458 - Validation of the Fennec release \"core\" ping submissions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Validate \"core\" pings sent by Firefox for Android to make sure the data they contain makes sense."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/bucket-whitelist.json). Assuming all histograms are acceptable.\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/hadoop/anaconda2/lib/python2.7/site-packages/matplotlib/font_manager.py:273: UserWarning: Matplotlib is building the font cache using fc-list. This may take a moment.\n",
" warnings.warn('Matplotlib is building the font cache using fc-list. This may take a moment.')\n"
]
}
],
"source": [
"import ujson as json\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import plotly.plotly as py\n",
"import datetime as dt\n",
"from uuid import UUID\n",
"\n",
"from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client, get_clients_history\n",
"\n",
"%pylab inline"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"submission_dates = (\"20160307\", \"20160325\")\n",
"core_pings = get_pings(sc,\n",
" app=\"Fennec\",\n",
" channel=\"release\",\n",
" doc_type=\"core\",\n",
" source_version=\"1\",\n",
" submission_date=submission_dates,\n",
" fraction=1.0)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"11159099"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pings_count = core_pings.count()\n",
"pings_count"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How many different clients are we seeing?"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"257961"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"one_per_client = get_one_ping_per_client(core_pings)\n",
"num_clients = one_per_client.count()\n",
"num_clients"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Are we seeing docId dupes?"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1202"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"doc_ids = core_pings.map(lambda p: p[\"meta\"][\"documentId\"])\n",
"dupes = pings_count - doc_ids.distinct().count()\n",
"dupes"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"dupes_docids = doc_ids.map(lambda d: (d, 1)).reduceByKey(lambda x, y: x + y).filter(lambda x: x[1] > 1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get the list of duplicated document Ids."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"duplicated_docIds = dupes_docids.map(lambda x: x[0]).collect()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get the duplicated core pings and group them by documentId."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"duplicated_core_pings = core_pings.filter(lambda p: p[\"meta\"][\"documentId\"] in duplicated_docIds)\n",
"grouped_dupes = duplicated_core_pings.map(lambda p: (p[\"meta\"][\"documentId\"], p)).groupByKey()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Do the dupes have the same docId+clientId+sequence number?"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def check_same_ids(duped_pings):\n",
" doc_id = duped_pings[0]\n",
" ping_list = list(duped_pings[1])\n",
" num_pings = len(ping_list)\n",
" \n",
" if num_pings < 2:\n",
" # That shouldn't really be happening here. We need at least 2 pings for \"duplicates\" to\n",
" # be meaningful.\n",
" return (doc_id, False, num_pings)\n",
"\n",
" for i in range(1, len(ping_list)):\n",
" prev_ping = ping_list[i - 1]\n",
" curr_ping = ping_list[i]\n",
" \n",
" if prev_ping[\"meta\"][\"documentId\"] != curr_ping[\"meta\"][\"documentId\"] or \\\n",
" prev_ping[\"clientId\"] != curr_ping[\"clientId\"] or \\\n",
" prev_ping[\"seq\"] != curr_ping[\"seq\"]:\n",
" return (doc_id, False, num_pings)\n",
" \n",
" return (doc_id, True, num_pings)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Map each group of pings (grouped by document Id) to True (they have the same clientId, documentId and sequence number) or False (any of the previous is different)."
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"same_ids = grouped_dupes.map(check_same_ids)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Sanity check: do we have *any* \"duplicated\" group with less than 2 pings? That would be odd."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"same_ids.filter(lambda x: x[2] < 2).count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How many pings have the same documentId but different clientId/sequence number?"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"23"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dupes_not_matching_ids = same_ids.filter(lambda x: not x[1])\n",
"dupes_not_matching_ids.count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Are the pings respecting our desired schema?"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"defaultdict(int,\n",
" {'': 11156514,\n",
" 'check failed: ping.profileDate < 10957': 2015,\n",
" 'check failed: ping.profileDate > 17167': 338,\n",
" 'missing key: profileDate': 232})"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def core_ping_check(p):\n",
" # That's a sort-of schema to validate the required fields and their types.\n",
" req_fields = {\n",
" \"v\": [int],\n",
" \"clientId\": [unicode],\n",
" \"seq\": [int],\n",
" \"locale\": [unicode],\n",
" \"os\": [unicode],\n",
" \"osversion\": [unicode],\n",
" \"device\": [unicode],\n",
" \"arch\": [unicode],\n",
" \"profileDate\": [int, long]\n",
" }\n",
" \n",
" opt_fields = {\n",
" \"experiments\": list,\n",
" }\n",
" \n",
" # Does the ping contain all the required top-level fields?\n",
" for k, types in req_fields.iteritems():\n",
" if not k in p:\n",
" return (\"missing key: \" + k, p)\n",
" if type(p[k]) not in types:\n",
" return (\"wrong type: \" + k, p)\n",
" \n",
" # Does it contain any optional field? If so, make sure it has the correct type. \n",
" for k, v in opt_fields.iteritems():\n",
" if k in p:\n",
" if type(p[k]) != v:\n",
" return (\"wrong type: \" + k, p)\n",
" \n",
" # Perform some additional sanity checks.\n",
" if p[\"v\"] < 1:\n",
" return (\"check failed: ping.v < 1\", p)\n",
" if p[\"seq\"] < 0:\n",
" return (\"check failed: ping.seq < 0\", p)\n",
" if p[\"profileDate\"] < 0:\n",
" return (\"check failed: ping.profileDate < 0\", p)\n",
" if p[\"profileDate\"] < 10957: # profileDates before the year 2000?\n",
" return (\"check failed: ping.profileDate < 10957\", p)\n",
" if p[\"profileDate\"] > 17167: # profileDates after the year 2016?\n",
" return (\"check failed: ping.profileDate > 17167\", p)\n",
" \n",
" # Validate the clientId.\n",
" try:\n",
" UUID(p[\"clientId\"], version=4)\n",
" except ValueError:\n",
" return (\"check failed: clientId is UUID\", p)\n",
" \n",
" return (\"\", p)\n",
"\n",
"checked_pings = core_pings.map(core_ping_check)\n",
"result_counts = checked_pings.countByKey()\n",
"result_counts"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"So we have broken pings. Let's check examples for the types of failures:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"grouped_checked_pings = checked_pings.filter(lambda t: t[0] != '')\\\n",
" .groupByKey()\\\n",
" .collectAsMap()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[('check failed: ping.profileDate > 17167',\n",
" {u'arch': u'armeabi-v7a',\n",
" u'clientId': '...',\n",
" u'device': u'archos-Archos 50 Neon',\n",
" u'experiments': [],\n",
" u'locale': u'en-US',\n",
" 'meta': {u'Host': u'incoming.telemetry.mozilla.org',\n",
" 'Hostname': u'ip-172-31-8-134',\n",
" u'Size': 207.0,\n",
" 'Timestamp': 1458746824044016896L,\n",
" 'Type': u'telemetry',\n",
" u'appBuildId': u'20160303014336',\n",
" u'appName': u'Fennec',\n",
" u'appUpdateChannel': u'release',\n",
" u'appVersion': u'45.0',\n",
" u'clientId': '...',\n",
" u'docType': u'core',\n",
" u'documentId': '...',\n",
" u'geoCity': u'??',\n",
" u'geoCountry': u'IR',\n",
" u'sampleId': 95.0,\n",
" u'sourceName': u'telemetry',\n",
" u'sourceVersion': u'1',\n",
" u'submissionDate': u'20160323'},\n",
" u'os': u'Android',\n",
" u'osversion': u'17',\n",
" u'profileDate': 17817,\n",
" u'seq': 253,\n",
" u'v': 1}),\n",
" ('missing key: profileDate',\n",
" {u'arch': u'armeabi-v7a',\n",
" u'clientId': '...',\n",
" u'device': u'HUAWEI-HUAWEI C8816',\n",
" u'experiments': [u'onboarding-a'],\n",
" u'locale': u'en-US',\n",
" 'meta': {u'Host': u'incoming.telemetry.mozilla.org',\n",
" 'Hostname': u'ip-172-31-39-247',\n",
" u'Size': 197.0,\n",
" 'Timestamp': 1458738984925863168L,\n",
" 'Type': u'telemetry',\n",
" u'appBuildId': u'20160315170748',\n",
" u'appName': u'Fennec',\n",
" u'appUpdateChannel': u'release',\n",
" u'appVersion': u'45.0.1',\n",
" u'clientId': '...',\n",
" u'docType': u'core',\n",
" u'documentId': '...',\n",
" u'geoCity': u'Yangon',\n",
" u'geoCountry': u'MM',\n",
" u'sampleId': 75.0,\n",
" u'sourceName': u'telemetry',\n",
" u'sourceVersion': u'1',\n",
" u'submissionDate': u'20160323'},\n",
" u'os': u'Android',\n",
" u'osversion': u'18',\n",
" u'seq': 1,\n",
" u'v': 1}),\n",
" ('check failed: ping.profileDate < 10957',\n",
" {u'arch': u'armeabi-v7a',\n",
" u'clientId': '...',\n",
" u'device': u'Sony-C6603',\n",
" u'experiments': [],\n",
" u'locale': u'es-BO',\n",
" 'meta': {u'Host': u'incoming.telemetry.mozilla.org',\n",
" 'Hostname': u'ip-172-31-47-142',\n",
" u'Size': 192.0,\n",
" 'Timestamp': 1458820362159601664L,\n",
" 'Type': u'telemetry',\n",
" u'appBuildId': u'20160315170748',\n",
" u'appName': u'Fennec',\n",
" u'appUpdateChannel': u'release',\n",
" u'appVersion': u'45.0.1',\n",
" u'clientId': '...',\n",
" u'docType': u'core',\n",
" u'documentId': '...',\n",
" u'geoCity': u'La Paz',\n",
" u'geoCountry': u'BO',\n",
" u'sampleId': 31.0,\n",
" u'sourceName': u'telemetry',\n",
" u'sourceVersion': u'1',\n",
" u'submissionDate': u'20160324'},\n",
" u'os': u'Android',\n",
" u'osversion': u'18',\n",
" u'profileDate': 51,\n",
" u'seq': 14,\n",
" u'v': 1})]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def sanitized_first(t):\n",
" p = list(t[1])[0]\n",
" p['clientId'] = '...'\n",
" p['meta']['clientId'] = '...'\n",
" p['meta']['documentId'] = '...'\n",
" return (t[0], p)\n",
"map(sanitized_first, grouped_checked_pings.iteritems())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Let's get serious (tm). Group pings per client to verify sequencing, etc."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def get_ping_info(p):\n",
" return {\n",
" \"clientId\": p[\"clientId\"],\n",
" \"seq\": p[\"seq\"],\n",
" \"v\": p[\"v\"],\n",
" \"Timestamp\": p[\"meta\"][\"Timestamp\"],\n",
" \"submissionDate\": p[\"meta\"][\"submissionDate\"]\n",
" }\n",
"\n",
"def dedupe_and_sort(group):\n",
" key, history = group\n",
" \n",
" seen = set()\n",
" result = []\n",
" \n",
" for fragment in history:\n",
" id = fragment[\"meta\"][\"documentId\"]\n",
" if id in seen:\n",
" continue\n",
" \n",
" seen.add(id)\n",
" result.append(get_ping_info(fragment))\n",
" \n",
" result.sort(key=lambda p: p[\"seq\"])\n",
" return result\n",
"\n",
"grouped = core_pings.groupBy(lambda x: x[\"clientId\"]).map(dedupe_and_sort)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"test_groups = grouped.collect()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def verify_ordering(groups):\n",
" errors_per_client = {}\n",
"\n",
" for history in groups:\n",
" errors = 0\n",
" for i in range(1, len(history)):\n",
" prev_ping = history[i - 1]\n",
" curr_ping = history[i]\n",
"\n",
" # We expect the pings to be sorted by sequence id. Just make sure\n",
" # the timestamps increase\n",
" if prev_ping[\"Timestamp\"] > curr_ping[\"Timestamp\"]:\n",
" errors = errors + 1\n",
" \n",
" prev_submission_date = dt.datetime.strptime(prev_ping[\"submissionDate\"], \"%Y%m%d\")\n",
" curr_submission_date = dt.datetime.strptime(curr_ping[\"submissionDate\"], \"%Y%m%d\")\n",
" \n",
" if prev_submission_date > curr_submission_date:\n",
" errors = errors + 1\n",
" \n",
" if errors > 0:\n",
" errors_per_client[curr_ping[\"clientId\"]] = errors\n",
" \n",
" return errors_per_client\n",
" \n",
"\n",
"ordering_errors = verify_ordering(test_groups)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"514"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(ordering_errors)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### What's the average size of a core ping (bytes)?"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"core_sizes = core_pings.map(lambda p: len(json.dumps(p)))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"sizes_series = pd.Series(core_sizes.collect())"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 11159099.000000\n",
"mean 687.295150\n",
"std 5.842787\n",
"min 654.000000\n",
"25% 683.000000\n",
"50% 687.000000\n",
"75% 691.000000\n",
"max 739.000000\n",
"dtype: float64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sizes_series.describe()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.axes._subplots.AxesSubplot at 0x7fab6c1ea650>"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaYAAAEACAYAAAD4NNLwAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHdpJREFUeJzt3X+QXWWd5/H3JyAgCjGgJEsAgwNhwHGUIGF2WWfaaAg4\nNfzYXTHuOoSRnd0SZ2DXLUeiFklm3FWoYY3WDjgzOhBYJUBQwJIlDQU91tTCJkgEhBiyozBJMI0m\nJC5qYYDP/nGewKHTndx0+t5zmv68qrpy7vc85znfe/vkfvs857n3yDYRERFtManpBCIiIupSmCIi\nolVSmCIiolVSmCIiolVSmCIiolVSmCIiolW6XpgkTZZ0i6S1kh6TdJqkKZL6Ja2TtFLS5Fr7hZLW\nl/Zn1OKzJD0i6QlJS2vxAyQtL9vcL+mY2roFpf06SRfU4jMkPVDW3Shp/26/DhER0ZlenDF9CbjT\n9onAO4EfApcB99g+AbgXWAgg6STgfOBE4Czgakkq/VwDXGR7JjBT0rwSvwjYavt4YClwZelrCnA5\ncCpwGrCoVgCvAK4qfW0rfURERAt0tTBJOhR4j+1rAWy/YHs7cA6wrDRbBpxbls8Glpd2TwLrgdmS\npgGH2F5d2l1f26be1wpgTlmeB/Tb3m57G9APnFnWzQFure3/vDF6yhERsY+6fcZ0LPAzSddKekjS\n30g6GJhqexDA9mbgiNJ+OrChtv2mEpsObKzFN5bYq7ax/SKwXdJhI/Ul6XDgWdsv1fo6ckyebURE\n7LNuF6b9gVnAX9meBfyCahhv6PcgjeX3ImnPTTpqExERDej2Rf+NwAbbD5bHt1IVpkFJU20PlmG6\nZ8r6TcDRte2PKrGR4vVtnpa0H3Co7a2SNgF9Q7a5z/aWMiFjUjlrqvf1KpLyRYIREaNge9QnAF09\nYyrDdRskzSyh9wGPAXcAF5bYAuD2snwHML/MtDsWOA5YVYb7tkuaXSZDXDBkmwVl+YNUkykAVgJz\nSxGaAswtMYD7Stuh+x/uObTqZ9GiRY3nkJxeW3klp+Q01j/7qhfTpC8Bvi7pdcCPgD8C9gNulvRR\n4CmqmXjYflzSzcDjwA7gYr/yLD8OXAccRDXL764S/xpwg6T1wBZgfunrWUl/ATxINVS4xNUkCKjO\n2paX9WtKHxER0QJdL0y2H6aasj3U+0do/3ng88PEvwe8Y5j485TCNsy666iK2dD4j6mmkEdERMvk\nmx/Gmb6+vqZT2EVy6lwb80pOnUlOvaOxGA98rZLkvD4REXtHEm7r5IeIiIi9lcIUERGtksIUERGt\nksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIU\nERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGtksIUERGt\nksIUERGtksIUERGtksIUERGtksIUERGt0vXCJOlJSQ9LWiNpVYlNkdQvaZ2klZIm19ovlLRe0lpJ\nZ9TisyQ9IukJSUtr8QMkLS/b3C/pmNq6BaX9OkkX1OIzJD1Q1t0oaf9uvw4REdGZXpwxvQT02T7Z\n9uwSuwy4x/YJwL3AQgBJJwHnAycCZwFXS1LZ5hrgItszgZmS5pX4RcBW28cDS4ErS19TgMuBU4HT\ngEW1AngFcFXpa1vpIyIiWqAXhUnD7OccYFlZXgacW5bPBpbbfsH2k8B6YLakacAhtleXdtfXtqn3\ntQKYU5bnAf22t9veBvQDZ5Z1c4Bba/s/b5+eYbTKtGkzkNTIz7RpM5p++hHjXi8Kk4G7Ja2W9O9L\nbKrtQQDbm4EjSnw6sKG27aYSmw5srMU3ltirtrH9IrBd0mEj9SXpcOBZ2y/V+jpyn59ltMbg4FNU\nh13vf6p9R8S+6MW1ldNt/0TSW4B+Seuo/hfXDX28L7TnJh21iYiIBnS9MNn+Sfn3p5JuA2YDg5Km\n2h4sw3TPlOabgKNrmx9VYiPF69s8LWk/4FDbWyVtAvqGbHOf7S2SJkuaVM6a6n3tYvHixS8v9/X1\n0dfXN1LTiIgJaWBggIGBgTHrT/ZYnqwM6Vw6GJhk+zlJb6C6zrMEeB/VhIUrJH0KmGL7sjL54etU\nkxWmA3cDx9u2pAeAS4DVwHeAL9u+S9LFwG/ZvljSfOBc2/PL5IcHgVlUQ5YPAqfY3ibpJuCbtm+S\ndA3wsO2vDJO/u/n6RHdU82Wa+r2JHDMx0UnC9qhHprpdmI4FvkX1LrE/8HXbXyjXgG6mOtN5Cji/\nTFBA0kKqWXI7gEtt95f4KcB1wEHAnbYvLfEDgRuAk4EtwPwycQJJFwKfKfv/nO3ra3ktB6YAa4CP\n2N4xTP4pTONQClNEs1pdmMa7FKbxKYUpoln7WpjyzQ8REdEqKUwREdEqKUwREdEqKUwREdEqKUwR\nEdEqKUwREdEqKUwREdEqKUwREdEqKUwREdEqKUwREdEqKUwREdEqKUwREdEqKUwREdEqKUwREdEq\nvbi1esQEcmC57UYzpk59K5s3P9nY/iPGQu7HtBu5H9P41PT9mJrbd7X/HLPRtNyPKSIiXlNSmCIi\nolVSmCIiolVSmCIiolVSmCIiolVSmCIiolVSmCIiolVSmCIiolVSmCIiolVSmCIiolVSmCIiolVS\nmCIiolVSmCIiolV6UpgkTZL0kKQ7yuMpkvolrZO0UtLkWtuFktZLWivpjFp8lqRHJD0haWktfoCk\n5WWb+yUdU1u3oLRfJ+mCWnyGpAfKuhsl5fYfEREt0aszpkuBx2uPLwPusX0CcC+wEEDSScD5wInA\nWcDVeuXmNtcAF9meCcyUNK/ELwK22j4eWApcWfqaAlwOnAqcBiyqFcArgKtKX9tKHxER0QJdL0yS\njgI+AHy1Fj4HWFaWlwHnluWzgeW2X7D9JLAemC1pGnCI7dWl3fW1bep9rQDmlOV5QL/t7ba3Af3A\nmWXdHODW2v7P29fnGRERY6MXZ0xfBD7Jq++eNtX2IIDtzcARJT4d2FBrt6nEpgMba/GNJfaqbWy/\nCGyXdNhIfUk6HHjW9ku1vo7clycYERFjp6vXViT9PjBo+/uS+nbTdCxvudnJXRM7vrPi4sWLX17u\n6+ujr69v7zOKiHgNGxgYYGBgYMz66/ZF/9OBsyV9AHg9cIikG4DNkqbaHizDdM+U9puAo2vbH1Vi\nI8Xr2zwtaT/gUNtbJW0C+oZsc5/tLZImS5pUzprqfe2iXpgiImJXQ/9oX7JkyT7119WhPNuftn2M\n7bcB84F7bf8h8G3gwtJsAXB7Wb4DmF9m2h0LHAesKsN92yXNLpMhLhiyzYKy/EGqyRQAK4G5pQhN\nAeaWGMB9pe3Q/UdERMOamib9BeBmSR8FnqKaiYftxyXdTDWDbwdwse2dw3wfB64DDgLutH1XiX8N\nuEHSemALVQHE9rOS/gJ4kGqocEmZBAHVrMDlZf2a0kdERLSAXnnfj6EkOa/P+FOdVDf1e2ty39X+\nc8xG0yRhu+Nr+UPlmx8iIqJVUpgiIqJVUpgiIqJVUpgiIqJVUpgiIqJVUpgiIqJVUpgiIqJVUpgi\nIqJVOipMkt7R7UQiIiKg8zOmqyWtknRx/W6zERERY62jwmT7PcC/o/oW7+9J+oakuV3NLCIiJqS9\n+q68cluJc4EvAz+n+mKwT9v+ZnfSa1a+K298ynfl5ZiNZvXku/Ik/bakLwJrqW5L/ge2TyzLXxzt\nziMiIobq6IxJ0t8DXwVW2P7VkHV/aPuGLuXXqJwxjU85Y8oxG83a1zOmTgvTG4Ff2X6xPJ4EHGT7\nl6Pd8XiQwjQ+pTDlmI1m9eq2F/dQ3Rp9p4NLLCIiYkx1WpgOsv3czgdl+eDupBQRERNZp4XpF5Jm\n7Xwg6RTgV7tpHxERMSr7d9juPwG3SHqaahB9GvChrmUVERETVsefY5L0OuCE8nCd7R1dy6olMvlh\nfMrkhxyz0ayezMorO/oXwAxqZ1m2rx/tjseDFKbxKYUpx2w0a18LU0dDeZJuAH4D+D7wYgkbeE0X\npoiI6L1OrzG9Gzgppw8REdFtnc7K+wHVhIeIiIiu6vSM6c3A45JWAc/vDNo+uytZRUTEhNVpYVrc\nzSQiIiJ22ptZeW8Fjrd9j6SDgf1s/7+uZtewzMobnzIrL8dsNKtXt734Y2AF8NclNB24bbQ7jYiI\nGEmnkx8+DpxOdXNAbK8HjtjTRpIOlPR/JK2R9KikRSU+RVK/pHWSVtZv1y5poaT1ktZKOqMWnyXp\nEUlPSFpaix8gaXnZ5n5Jx9TWLSjt10m6oBafIemBsu5GSZ0OaUZERJd1Wpiet/3rnQ/KG/kexwts\nPw+81/bJwLuAsyTNBi4D7rF9AnAvsLD0exJwPnAicBZwtapxGYBrgItszwRmSppX4hcBW20fDywF\nrix9TQEuB04FTgMW1QrgFcBVpa9tpY+IiGiBTgvT30v6NPB6SXOBW4Bvd7Jh7Z5NB1JNtjBwDrCs\nxJdR3a4d4Gxgue0XbD8JrAdmS5oGHGJ7dWl3fW2bel8rqO6qCzAP6Le93fY2oB84s6ybA9xa2/95\nnTyXiIjovk4L02XAT4FHgf8I3Al8tpMNJU2StAbYDNxdistU24MAtjfzyrDgdGBDbfNNJTYd2FiL\nbyyxV21TbmS4XdJhI/Ul6XDgWdsv1fo6spPnEhER3dfRtZXyJv635WevlG1PlnQo8C1Jb2fXYcCx\nnEbUyUyQjmeLLF68+OXlvr4++vr69j6jiIjXsIGBAQYGBsasv06/K+/HDFM8bL+t0x3Z/rmkAarh\ntEFJU20PlmG6Z0qzTcDRtc2OKrGR4vVtnpa0H3Co7a2SNgF9Q7a5z/YWSZMlTSpFs97XLuqFKSIi\ndjX0j/YlS5bsU3+dDuW9m2oSwanAe4AvA/9zTxtJevPOCQeSXg/MBdYCdwAXlmYLgNvL8h3A/DLT\n7ljgOGBVGe7bLml2mQxxwZBtFpTlD1JNpgBYCcwtRWhK2ffKsu6+0nbo/iMiomEdf8B2lw2l79k+\nZQ9t3kE1uWBS+bnJ9n8t14BupjrTeQo4v0xQQNJCqllyO4BLbfeX+CnAdcBBwJ22Ly3xA4EbgJOB\nLcD8MnECSRcCn6E62/vcztt0lKK3HJgCrAE+Mtz9pfIB2/EpH7DNMRvN6sn9mOq3VacqMO8GPmb7\nnaPd8XiQwjQ+pTDlmI1m9eR+TMBVteUXgCepPm8UERExpkY9lDcR5IxpfMoZU47ZaFav7mD7id2t\nt/3fR5tARERE3d7cwfZUqhlwAH8ArKL6ZoaIiIgx0+nkh+8Cv7/zNheSDgG+Y/t3u5xfozKUNz5l\nKC/HbDSrJ7e9AKYCv649/nWJRUREjKlOh/KuB1ZJ+lZ5fC6vfHFqRETEmNmbO9jOovrWB4Dv2l7T\ntaxaIkN541OG8nLMRrN6NZQHcDDwc9tfAjaWb0+IiIgYU51OflhENTPvBNszJR0J3GL79G4n2KSc\nMY1POWPKMRvN6tUZ03lUN/H7BYDtp4FDRrvTiIiIkXRamH5dTh0MIOkN3UspIiImsk4L082S/hp4\nk6Q/Bu5hFDcNjIiI2JO9mZU3FziDahB9pe27u5lYG+Qa0/iUa0w5ZqNZXb/tRbkr7D223zvanYxX\nKUzjUwpTjtloVtcnP9h+EXhp551oIyIiuqnTb354DnhU0t2UmXkAti/pSlYRETFhdVqYvll+IiIi\numq315gkHWP7n3qYT6vkGtP4lGtMOWajWd2+xnRbbUe3jnYnERERndpTYapXvLd1M5GIiAjYc2Hy\nCMsRERFdsadrTC9SzcIT8HrglztXAbZ9aNczbFCuMY1PucaUYzaata/XmHY7K8/2fqPtOCIiYjT2\n5n5MERERXZfCFBERrZLCFBERrZLCFBERrdLVwiTpKEn3SnpM0qOSLinxKZL6Ja2TtLL+BbGSFkpa\nL2mtpDNq8VmSHpH0hKSltfgBkpaXbe6XdExt3YLSfp2kC2rxGZIeKOtulNTpVzNFRESXdfuM6QXg\nE7bfDvxz4OOSfhO4jOpWGicA9wILASSdBJwPnAicBVytau4vwDXARbZnAjMlzSvxi4Ctto8HlgJX\nlr6mAJcDpwKnAYtqBfAK4KrS17bSR0REtEBXC5Ptzba/X5afA9YCRwHnAMtKs2XAuWX5bGC57Rds\nPwmsB2ZLmgYcYnt1aXd9bZt6XyuAOWV5HtBve7vtbUA/cGZZNwfY+RVLy4DzxuYZR0TEvurZNSZJ\nM4B3AQ8AU20PQlW8gCNKs+nAhtpmm0psOrCxFt9YYq/aptw7arukw0bqS9LhwLO2X6r1deS+P8OI\niBgLPbm2IumNVGczl9p+TtLQj6aP5UfVO/m0ccefSF68ePHLy319ffT19e19RhERr2EDAwMMDAyM\nWX9dL0xlYsEK4Abbt5fwoKSptgfLMN0zJb4JOLq2+VElNlK8vs3T5Tbwh9reKmkT0Ddkm/tsb5E0\nWdKkctZU72sX9cIUERG7GvpH+5IlS/apv14M5f0d8LjtL9VidwAXluUFwO21+Pwy0+5Y4DhgVRnu\n2y5pdpkMccGQbRaU5Q9STaYAWAnMLUVoCjC3xADuK22H7j8iIhq22y9x3efOpdOB7wKPUg3XGfg0\nsAq4mepM5yng/DJBAUkLqWbJ7aAa+usv8VOA64CDgDttX1riBwI3ACcDW4D5ZeIEki4EPlP2+znb\n15f4scByYAqwBviI7R3D5J8vcR2H8iWuOWajWfv6Ja5dLUzjXQrT+JTClGM2mtXtO9hGRET0VApT\nRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0\nSgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpT\nRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SgpTRES0SlcLk6SvSRqU9Egt\nNkVSv6R1klZKmlxbt1DSeklrJZ1Ri8+S9IikJyQtrcUPkLS8bHO/pGNq6xaU9uskXVCLz5D0QFl3\no6T9u/kaRETE3un2GdO1wLwhscuAe2yfANwLLASQdBJwPnAicBZwtSSVba4BLrI9E5gpaWefFwFb\nbR8PLAWuLH1NAS4HTgVOAxbVCuAVwFWlr22lj4iIaImuFibb/wA8OyR8DrCsLC8Dzi3LZwPLbb9g\n+0lgPTBb0jTgENurS7vra9vU+1oBzCnL84B+29ttbwP6gTPLujnArbX9n7dPTzIiIsZUE9eYjrA9\nCGB7M3BEiU8HNtTabSqx6cDGWnxjib1qG9svAtslHTZSX5IOB561/VKtryPH6HlFRMQYaMPkB49h\nX9pzk47aREREQ5q48D8oaartwTJM90yJbwKOrrU7qsRGite3eVrSfsChtrdK2gT0DdnmPttbJE2W\nNKmcNdX7GtbixYtfXu7r66Ovr2/EthERE9HAwAADAwNj1p/ssTxhGWYH0gzg27bfUR5fQTVh4QpJ\nnwKm2L6sTH74OtVkhenA3cDxti3pAeASYDXwHeDLtu+SdDHwW7YvljQfONf2/DL54UFgFtVZ4YPA\nKba3SboJ+KbtmyRdAzxs+ysj5O5uvz4x9qo5M0393prcd7X/HLPRNEnYHvXoVFcLk6RvUJ25HA4M\nAouA24BbqM50ngLOLxMUkLSQapbcDuBS2/0lfgpwHXAQcKftS0v8QOAG4GRgCzC/TJxA0oXAZ6je\nJT5n+/oSPxZYDkwB1gAfsb1jhPxTmMahFKYcs9GsVhem8S6FaXxKYcoxG83a18LUhskPERERL8u3\nHsSYmzZtBoODTzWdRkSMUxnK240M5Y1Os0Np0OxwWvPPPcdsNC1DeRER8ZqSwhQREa2SwhQREa2S\nwhQREa2SwhQREa2SwhQREa2SzzFFvKYcyCv31+ytqVPfyubNTzay73htyeeYdiOfYxqdfI5p4j73\n/H8JyOeYIiLiNSaFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIi\nWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWiWFKSIiWmXCFiZJZ0r6\noaQnJH2q6XwiIqIyIQuTpEnA/wDmAW8HPizpN5vNqjMDAwNNp7CLNuYEA00nMIKBphMYxkDTCeyi\njcdUcuqdCVmYgNnAettP2d4BLAfOaTinjrTxQGxjTm18s60MNJ3AMAbGqJ8DkTQmP+9973v3eptp\n02aM0fMYXhuP8zbmNBYmamGaDmyoPd5YYhExas8DHqOfRXu9zeDgUz14jtELE7UwddWVV145Zn85\nDv1ZsmTJHtsccshburb/TnKKiNgXst10Dj0n6XeAxbbPLI8vA2z7iiHtJt6LExExBmyP+q/UiVqY\n9gPWAe8DfgKsAj5se22jiUVEBPs3nUATbL8o6U+AfqrhzK+lKEVEtMOEPGOKiIj2mtCTHyRNlnSL\npLWSHpN0mqRFkjZKeqj8nFlrv1DS+tL+jF7lVOJ/WmKPSvpC0zlJWi5pTXmNfizpoV7mtJu83inp\n/pLbKknv7mVeI+T025L+t6SHJd0u6Y29yknSzNrvaY2k7ZIukTRFUr+kdZJWSprcgpz+jaQfSHpR\n0qwh2zSV05Vln9+XdKukQ3uV0x7y+vNyPK2RdJekab3Ka6Scauv/i6SXJB026pxsT9gf4Drgj8ry\n/sBkqnmqnxim7YnAmtJuBvB/KWecXc7pUKCPathx/xJ/c9M5DVn/l8Bne5nTbn5/K4EzSuws4L6y\nfFJDr9VkqmuY/7LELgT+vJc51XKbBDwNHA1cAfxZiX8K+EILcjoBOB64F5hVa9OzY2qYnN4PTCrx\nLwCfb+J1GiavN9bifwpc0/Tvrzw+CrgL+DFw2Gh/fxP2jKn85fMe29cC2H7B9vadq4fZ5BxgeWn3\nJLCe6oO63c7p58DHqN44Xijxn7Ugp7rzgW/0Kqfd5LUdeImqGAC8CdhUls/udl67yel42/9Qmt0D\n/Ote5TTE+4F/tL2B6ve0rMSXAec2nZPtdbbXs+v/v54cUyPkdI/tl0r8Aao3Xuj96zQ0r+dq8TdQ\nHfdN5FU/pgC+CHxySJu9/v1N2MIEHAv8TNK15ZT0byQdXNb9STl1/2ptiGPoh3I3MfYfyh0pp5nA\n70p6QNJ9kk5pOKfX71wp6T3AZts/6mFOu8vrPwN/KemfgCuBhT3Ma6Tf32OSzi5tzueVN7devVY7\nfYhX/oCYansQwPZm4IgGc7pxD23aktNHgTsbymmXvCR9rhzn/xa4vKG8Xs6pHOMbbD86pM1e5zSR\nC9P+wCzgr2zPAn4JXAZcDbzN9ruAzcBVDeb0i5LT/sAU278D/BlwS4M5/ZJX3uwBPsye31h6kdcv\nSl4fAy61fQxVkfq7BnP6JdUw2UeBj0taTfXX7a97mBMAkl5H9df0zmNn6Kynns+CGianxo2Uk6TP\nADtsN3GsD5uX7c+W4/zrVMN5TeV0c/mj8NNUl0L22UQuTBupqvuD5fEK4GTbP3UZGAX+lldOOTdR\nje3udBSvDBN1K6dbqd7oNgDfBLC9GnhR0uFl/8f0OKcVwMnw8ufB/hVwU619L16n4fLa+VpdYPs2\nANsrgFN7mNdwr9Us20/Ynmf7VKrvZfzHHua001nA92rDwIOSpgKUC+fPNJjTT/fQrtGcJF0IfIDq\nzKSJnIbNq+YbVP8Pe51X/Zj6DarrRw9L+nHZ70OSjmAU71MTtjCVYYwNkmaW0PuAx+uzW6h+2T8o\ny3cA8yUdIOlY4Diqi9rdzukx4DZgDlQzYoADbG8pOX2ogZweL8tzgbW2n65t0vXXaTd5PQY8Len3\nACS9j2o8uyd57eaYekvJZxLwWeArvcqpZuiZ7R1UEzEAFgC3tyCnuvp1psZyUjUr95PA2bafbyin\n4fI6rrbuXOCHDeT1ck62f2B7mu232T6W6o+0k20/w2jep7o1W2M8/ADvBFYD36c6I5kMXA88UmK3\nUY3F72y/kGpGyVrKzK8e5fQ64AbgUeBB4PeazqnErwX+wzDtu57Tbl6r08trtAa4v/znaPr3dwnV\nN438EPhvvX6tgIOBnwKH1GKHUU3EWEc14/NNLcjpXKrRgV9RfSPL/2pBTuuBp4CHys/VDRznw+W1\novY+dTvwz5p+rYas/xFlVt5ocsoHbCMiolUm7FBeRES0UwpTRES0SgpTRES0SgpTRES0SgpTRES0\nSgpTRES0SgpTRES0SgpTRES0yv8HGuuEb/krTA0AAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fab6c36bad0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sizes_series.plot(kind='hist', bins=10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### How is each client behaving? What's the ping distribution?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's count how many core pings each client is sending per-day and get some descriptive stats (per-client) "
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def counts(ping_array):\n",
" counts = {}\n",
" \n",
" # Get the counts per day.\n",
" for p in ping_array:\n",
" date = p[\"submissionDate\"]\n",
"\n",
" if not date in counts:\n",
" counts[date] = 1\n",
" continue\n",
" \n",
" counts[date] = counts[date] + 1\n",
" \n",
" return counts.values()\n",
" \n",
"some_counts = grouped.map(counts)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def get_stats(array):\n",
" return {\n",
" \"min\": np.min(array),\n",
" \"max\": np.max(array),\n",
" \"avg\": np.mean(array),\n",
" \"std\": np.std(array),\n",
" \"var\": np.var(array),\n",
" \"95p\": np.percentile(array, 95)\n",
" }\n",
"\n",
"stats = some_counts.map(get_stats)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's see how's the distribution of pings count per client"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 257961.000000\n",
"mean 10.843333\n",
"std 12.578439\n",
"min 1.000000\n",
"25% 3.000000\n",
"50% 7.000000\n",
"75% 14.000000\n",
"max 1817.000000\n",
"dtype: float64"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ping_counts_series = pd.Series(stats.map(lambda x: x[\"max\"]).collect())\n",
"ping_counts_series.describe()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAECCAYAAAAciLtvAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFrlJREFUeJzt3X+MXNd12PHvYVzKcFInjoGILRnuqmAUJgoEIkVZoEmb\nCeqazE+2UYBQhpJKgaE2aaUCRhE5gIEdxkBhoz9MI2qcNma4thuGdYM4JhMbYX5oZKhCEgKxwkih\nIhYVKZKtNgkiFy1SuAp1+se8IYerXe682ft23r75foCB9r6ZnblzZpZH99x374vMRJI033bMugOS\npNkzGUiSTAaSJJOBJAmTgSQJk4EkCZOBJAmTgSQJeEvTLxARAXwQeDtwPjM/1fRrSpLq2YqRwRFg\nD/D/gGtb8HqSpJpqJ4OIOBERKxFxYdXxwxHxYkS8FBFPjN31jcB/zcx/Cfz4JvsrSWrANCODk8Ch\n8QMRsQN4sjp+H/BgROyv7r4GvFb9fGPKfkqSGlQ7GWTmM9z6x33kIHApM69k5uvAaYblIYBfBg5H\nxEeBpzfTWUlSM0pNIO8Gro61rzFMEGTm/wXee6dfjgi3TpWkKWRmlHie1pxaurS0xFNPPUVmetvE\nbWlpaeZ96NLNeBrPNt6eeuoplpaWiv4bXGpkcB3YO9beUx2bWL/fL9SV+Xb58uVZd6FTjGdZxrOM\nXq9Hr9fj2LFjxZ5z2pFBVLeR88C+iFiIiJ3AUeBMnSfs9/sMBoMpuyNJ82MwGBT/H+jIrFeuj4hT\nQA94J7ACLGXmyYj4LuA4wwRzIjM/VOM5s24/tLbBYECv15t1NzrDeJZlPMuKCLLQnEHtZNCEiMil\npaWbQx9J0voGgwGDwYBjx451Lxm0oR9d4P95lWU8yzKeZZUcGbTmbCLnDCRpMq2YM2iCIwNJqs+R\ngSTNMUcG2pA12bKMZ1nGs6xOjgwkSbPTmpGBp5ZK0mQ8tVSSdJNlIq3LSfiyjGdZxrO9TAaSpPaU\niZwzkKTJOGcgSbrJOQOty5psWcazLOPZXiYDSVJ7ykTf8i3fDsBHPrLEu971rhn3SJLar5Nlouef\n/1c8//y9PP30F2bdFUmaO61JBvBbwBuz7sS2Z022LONZlvEso4mN6lqUDPrAPbPuhCS1Xq/X63Iy\nUAmu0yjLeJZlPNvLZCBJaj4ZRMR3RMQXIuJjEfH3mn69eWdNtizjWZbxbK+tGBkk8L+Bu4BrW/B6\nkqSaaieDiDgRESsRcWHV8cMR8WJEvBQRT4yOZ+YXMvN7gPcDP7X5LutOrMmWZTzLMp7tNc3I4CRw\naPxAROwAnqyO3wc8GBH7V/3el4Cd03RSktSs2skgM58BXlt1+CBwKTOvZObrwGngCEBE/KOI+Fng\nEwwThhpkTbYs41mW8WyvtxR6nt3A1bH2NYYJgsz8DPCZjZ/iYeAKTz/9BsePH+fAgQM3h5SjL5Bt\n27Ztz3N7MBiwvLwMwOLiIiVNtTdRRCwAZzPz/qr9AHAoMx+t2g8BBzPz8QmfL4fzzD/FBz7wl3zw\ng04tSNJGSu5NVGpkcB3YO9beUx2roQ9cAb6+UJckqZsG1cVtSpr21NKobiPngX0RsRARO4GjwJl6\nT9nH7Sg2r/QXZN4Zz7KMZxm9BrajqD0yiIhTQA94Z0S8Aixl5smIeAw4xzDBnMjMi/WeuY8jA0na\nWBMjg9rJIDPfs87xzwOf33SPtCmjSSeVYTzLMp7t1aK9ifpYJpKkjTVRJmpRMlAJ1mTLMp5lGc/2\nKnU2UQF9nDOQpI01MWfQmmsgu85Akupp4zqDAvo4MpCkjbVpnUED+jiBvHnWZMsynmUZzzKcQJYk\nNcIyUcd4HndZxrMs41mGZSJJkmUibcyabFnGsyzj2V6tSwYf+cjPEBFEBLt2Lc66O5I0F1q0zmCJ\n4ZzBMsM1BwBBG/onSW0ymjM4duxYsXUGLUoGw0Vnw6RgMpCkjZRcdNa6MpE2x5psWcazLOPZXiYD\nSZJlIknariwTSZKKalEy6AMvz7oT25412bKMZ1nGs4zBYNDlRWd9XIEsSRtrYgWycwaStE05ZyBJ\nKmpLkkFEvC0izkfEd2/F680za7JlGc+yjGd7bdXI4AngP2/Ra0mSaqo9ZxARJ4DvBVYy8/6x44eB\n4wwTzInM/HB1/F3AO4G3An+Wmb+2xnM6ZyBJNc16zuAkcGhVh3YAT1bH7wMejIj91d094G8D7wHe\nO3VPJUmNqZ0MMvMZ4LVVhw8ClzLzSma+DpwGjlSP/0Bmvg/4BeDnNtlfbcCabFnGsyzj2V6lLnu5\nG7g61r7GMEHclJmfvPNTPMxwC2sYVpsO3Lxn9AUaXTLPtm3btuexPRgMWF5eBmBxcZGSplpnEBEL\nwNnRnEFEPAAcysxHq/ZDwMHMfHzC53POQJJqKjlnUGpkcB3YO9beUx2roc+tkYEkaT2D6uI2JU17\namlUt5HzwL6IWIiIncBR4MxmO6f6Sn9B5p3xLMt4tlftZBARp4BngXsj4pWIeCQzbwCPAeeAF4DT\nmXmx3jP3cW8iSdpYr9t7E3kNZEmaxKhM5DWQJUkzX3TWkD5vvp7BXUQEEcGuXYtb36VtyJpsWcaz\nLONZxmD+rmfwZYajhGRlxTONJAk6P2ewdpnIkpEkra2N6wwK6OM6A0naWJvWGTSgj6eWbp412bKM\nZ1nGs4wmykQtSgaSpFlp0ZzB2usMnDOQpNvN5ToDk4Ekra2j6wxUgjXZsoxnWcazvUwGkqQ2lYmc\nM5CkSThn0IK+SlJbOGegdVmTLct4lmU828tkIEmyTCRJ25VlIklSUS1KBn3efD0D1WVNtizjWZbx\nLGMOr2cgSVptzq9n8FaGF7uBu+9e4NVXL29xLyWpXTp6PYONjK56BisrRd67JKnSeJkoIvZHxMci\n4tMR8U+bfr15Z022LONZlvFsr8aTQWa+mJk/BvwQ8Heafj1JUn21k0FEnIiIlYi4sOr44Yh4MSJe\niognVt33fcCvAp/bXHe1kV6vN+sudIrxLMt4ttc0I4OTwKHxAxGxA3iyOn4f8GBE7B/dn5lnM/N7\ngIc20VdJUkNqJ4PMfAZ4bdXhg8ClzLySma8Dp4EjABHxHRHx0Yj4WeDXNtth3Zk12bKMZ1nGs71K\nnU20G7g61r7GMEGQmU8DT2/8FA8z3MIa4DhwYOy+wZsePRgMbg45R18w27Zt2+5yezAYsLy8DMDi\n4iIlTbXOICIWgLOZeX/VfgA4lJmPVu2HgIOZ+fiEzzfR3kTuUyRJt7RxncF1YO9Ye091rIY+t0YG\nkqT1DKqL25Q07amlUd1GzgP7ImIhInYCR4Ezm+2c6iv9BZl3xrMs49le05xaegp4Frg3Il6JiEcy\n8wbwGHAOeAE4nZkX6z1zH/cmkqSN9bq9N9HG10B2zkCSvAYyJgNJuqWjF7fp4/UMNs+abFnGsyzj\nWcbA6xlIkjo+Z1CnTOS1DSSpjesMCugz+ToDr20gaX61aZ1BA/pYJto8a7JlGc+yjGcZTZSJWpQM\nJEmz0qI5g3rrDDzNVNK8cp2ByUCSburoOgOVYE22LONZlvFsL5OBJKlNZSLnDCRpEs4ZmAwk6Sbn\nDLQua7JlGc+yjGd7mQwkSZaJJGm7skwkSSqqRcmgj9cz2DxrsmUZz7KMZxlez2BNdxERRAS7di2W\n7ZIktZDXM3D+QJJucs5AklRU48kgIo5ExH+MiF+MiH/Q9OvNO2uyZRnPsoxnezV+pbPM/Czw2Yj4\nGuBfA7/R9GtKkuqpPWcQESeA7wVWMvP+seOHgeMMRxsnMvPDq37v3wD/KTOfW+M5nTOQpJpmPWdw\nEji0qkM7gCer4/cBD0bE/rH7PwR8bq1EIEmavdrJIDOfAV5bdfggcCkzr2Tm68Bp4AhARDwG/H3g\nByPi0U32VxuwJluW8SzLeLZXqTmD3cDVsfY1hgmCzPxp4Kc3foqHGW5hDcNq04Gx+wZrPH4A9Na8\nf/SF6/V6tm3btt2Z9mAwYHl5GYDFxUVKmmqdQUQsAGdHcwYR8QBwKDMfrdoPAQcz8/EJn885A0mq\nqeScQamRwXVg71h7T3Wshj63RgaSpPUMqovblDTtOoOobiPngX0RsRARO4GjwJnNdk71lf6CzDvj\nWZbxbK/aySAiTgHPAvdGxCsR8Uhm3gAeA84BLwCnM/NivWfuM93eROPcp0hS9/W6vTfR9NdAdv5A\n0jwZlYm8BrLJQJJmvuisIX28nsHmWZMty3iWZTzLGHg9A0lSx+cMLBNJUh1tXGdQQB/XGUjSxtq0\nzqABfSwTbZ412bKMZ1nGs4wmykQtSgalueZAkibVojmD8usMnD+Q1EWuMzAZSNJNHV1noBKsyZZl\nPMsynu1lMpAktalM5JyBJE3COQOTgSTd5JyB1mVNtizjWZbxbC+TgSTJMpEkbVfuTVTbcDUywN13\nL/Dqq5cbeh1Jap57E03tywxHCcnKSrc3w7MmW5bxLMt4luHeRJKkRszNnIHzB5K6xlNLJUlFNZ4M\nIuKeiPh4RHy66deSNdnSjGdZxrO9Gk8GmflyZr636deRJE2vdjKIiBMRsRIRF1YdPxwRL0bESxHx\nRLkuqo5erzfrLnSK8SzLeLbXNCODk8Ch8QMRsQN4sjp+H/BgROxf9XtFJjkkSeXVTgaZ+Qzw2qrD\nB4FLmXklM18HTgNHACLiayPiY8ABRwzNsyZblvEsy3i2V6kVyLuBq2PtawwTBJn558CPbfwUD3Nr\nBfJx4MDYfYM1Hj8Aeuvcf+fHj76QoyGrbdu2bW+H9mAwYHl5GYDFxUVKmmqdQUQsAGcz8/6q/QBw\nKDMfrdoPAQcz8/EJn6/x6xnc+vmtDFckuzWFpO1p0MD1DEqdTXQd2DvW3lMdq6FPc9tRjJufrSkk\ndVOvRdtRBLdPCJ8H9kXEQkTsBI4CZ+o9ZR94ecruaGQ0pFQZxrMs41nGYDCYfTKIiFPAs8C9EfFK\nRDySmTeAx4BzwAvA6cy8WO+Z+2zNyECStrcmRgYt2ptoq+YM3KdI0vbWxJxBi5LB1m1UZzKQ1AUd\n3aiuz9bPGQwvehMR7Nq1uMWv3QxrsmUZz7KMZxmtmDNoTp+tnzPwzCJJ20/H5wxmUyayZCRpu/Ia\nyJI0x0YTyCXNeZmoe6zJlmU8yzKeZbRp0ZkkqUNaNGcwm3UGzhlI2m5cZ2AykKSbOrrOQCVYky3L\neJZlPNvLZFDTrl2LnVuoJkktKhNtjzmDCEtLkmbLOQOTgSTd5JyB1mVNtizjWZbxbC+TgSTJMpFl\nIknblXsTNeKu6h962LHjbbzxxl8AcPfdC7z66uUZ9kuSbufeRI26tZ31MBFsz62trcmWZTzLMp5l\nuDeRJKkRzhnUnEtwzkBSW3hqqSSpqMaTQUS8LSKWI+I/RMR7mn69eWdNtizjWZbxbK+tGBn8APBf\nMvOfAN+/Ba8nSaqpdjKIiBMRsRIRF1YdPxwRL0bESxHxxNhde4Cr1c83NtFXTaDX6826C51iPMsy\nnu01zcjgJHBo/EBE7ACerI7fBzwYEfuru68yTAgwnJmVJLVM7WSQmc8Ar606fBC4lJlXMvN14DRw\npLrvM8APRsS/B85uprPtc9fN7ay/4iu+cs2ft3qb67bVZLf7lt/rxXO7v69Zadv3c1pd/PxLrUDe\nza1SEMA1hgmCzPwL4Ec3foqHubUC+ThwYOy+wRqPHwC9de4v+/jRF/jWEHf0+NFCtQFvvPGdjE45\nfeONAJ4CeqysxJt+f57aw0V7TwGwsvKdM+9PqfbwfWX1voafcZv6Z7ubn/9gMGB5eRmAxcVFSppq\nnUFELABnM/P+qv0AcCgzH63aDwEHM/PxCZ+vFdczmGadgZfTvLOursvo6vvSZGb9+Q8auJ5BqbOJ\nrgN7x9p7qmM19JntdhSStD30WrQdRXD7ZPB5YF9ELETETuAocKbeU/aBl6fsjkZGQ0qVYTzLMp5l\nDAaD2SeDiDgFPAvcGxGvRMQjmXkDeAw4B7wAnM7Mi/WeuY8jA0naWBMjgxbtTeScQRfNurbalK6+\nL01m1p9/E3MGLUoG23OjOpPBnc36j6YpXX1fmkxbPv+OblTXxzmDzbMmW5bxLMt4ltGKOYPm9HHO\nQJI21vE5A8tEXdSW4XRpXX1fmkxbPn+vgSxJc2w0gVySZaKOsSZblvEsy3iW0aZFZ5KkDmnRnIHr\nDLqoLbXV0rr6vjSZWX/+rjMwGWw7s/6jaUpX35cm05bPv6PrDFSCNdmyjGdZxrO9TAaSpDaViZwz\n6KK2DKdL6+r70mRm/fk7Z2Ay2HZm/UfTlK6+L02mLZ+/cwZalzXZsoxnWcazvUwGkiTLRJaJmtWW\n4XRpXX1fmkxbPn/3JpKkOebeRNqQNdmyjGdZxrMM9yaSJDXCOQPnDBrVltpqaV19X5pMWz5/Ty2V\nJBXVaDKIiHsi4uMR8ekmX0e3WJMty3iWZTzbq9FkkJkvZ+Z7m3wN3e65556bdRc6xXiWZTzba6Jk\nEBEnImIlIi6sOn44Il6MiJci4olmuqg6vvSlL826C51iPMsynu016cjgJHBo/EBE7ACerI7fBzwY\nEfur+344Iv5dRPy10cML9VeS1ICJkkFmPgO8turwQeBSZl7JzNeB08CR6vGfysz3AV+OiI8BBzYa\nObz97d/HXXf9Qu03oNtdvnx51l3oFONZlvFsr4lPLY2IBeBsZt5ftR8ADmXmo1X7IeBgZj5euxMR\nnpcnSVPo1HYUpd6MJGk6mzmb6Dqwd6y9pzomSdpm6iSD4PaJ4PPAvohYiIidwFHgTMnOSZK2xqSn\nlp4CngXujYhXIuKRzLwBPAacA14ATmfmxea6KklqyqRnE70nM/96Zt6VmXsz82R1/POZ+Y2Z+Q2Z\n+aG6L+46helExOWI+IOI+GJE/F517B0RcS4i/jgifj0ivnrs8T8ZEZci4mJEvHt2PW+HtdbNTBO/\niPjWiLhQfX+Pb/X7aIN1YrkUEdci4ver2+Gx+4zlHUTEnoj47Yh4ISL+MCIer443//3MzJncGCai\n/wYsAH8FeA7YP6v+bKcb8N+Bd6w69mHgJ6qfnwA+VP38zcAXGZ4ssFjFPGb9HmYcv28HDgAXNhM/\n4HeBv1X9/DmGZ9fN/P21IJZLwPvWeOw3GcsN47kLOFD9/FXAHwP7t+L7OcuN6tZdp6ANBW8e1R0B\nPlH9/AngH1Y/fz/DEt5fZuZl4BLD2M+tXHvdTK34RcQu4K9m5vnqcZ8c+525sU4sYe2FpkcwlneU\nma9m5nPVz/8HuMjw5JzGv5+zTAa7gatj7WvVMW0sgd+IiPMRMdr76e7MXIHhFwr4uur46jhfxziv\n5etqxm83w+/siN/f2/3ziHiu2qhyVNIwljVExCLDUdfvUP/vu3ZM3cJ6e/q2zPxW4LuBfxYRf5db\nm6uPuJBvc4zf9H4G+BuZeQB4Ffi3M+7PthMRXwX8EvAvqhFC43/fs0wGrlOYUmb+z+q/fwr8CsOy\nz0pE3A1QDRH/pHr4deDrx37dOK+tbvyM6zoy80+zKlQDP8etsqSxnEBEvIVhIvhUZn62Otz493OW\nycB1ClOIiLdV/9dARHwl8G7gDxnG7uHqYf8YGH2JzgBHI2JnRNwD7AN+b0s73U6r183Uil81VP9f\nEXEwhpe9+pGx35k3t8Wy+sdq5AeA56ufjeVkfh74o8z86Nix5r+fM545P8xwtvwS8P5Zz+Rvhxtw\nD8Mzr77IMAm8vzr+tcBvVvE8B3zN2O/8JMOzDC4C7571e5j1DTgF/A/gy8ArwCPAO+rGD/ib1Wdw\nCfjorN9Xi2L5SeBC9T39FYb1bmM5WTy/Dbgx9jf++9W/k7X/vuvGtBXXQJYkzZYTyJIkk4EkyWQg\nScJkIEnCZCBJwmQgScJkIEkC/j/mt58ppjzWVAAAAABJRU5ErkJggg==\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fa5c6641e10>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig_ppc, ax_ppc = plt.subplots()\n",
"ping_counts_series.hist(ax=ax_ppc, bins=100, bottom=0.1)\n",
"ax_ppc.set_yscale('log')"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"### Are we seeing any gap (missing sequences) for any client?"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Go through all the core pings (per client) and check if there's any gap in the sequence numbers."
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def verify_sequence_gaps(client_history):\n",
" gaps = []\n",
" gap_length = 0\n",
"\n",
" for i in range(1, len(client_history)):\n",
" prev_ping = client_history[i - 1]\n",
" curr_ping = client_history[i]\n",
" \n",
" expected_sequence_num = prev_ping[\"seq\"] + 1\n",
" if curr_ping[\"seq\"] != expected_sequence_num:\n",
" # We found a gap: how big is this gap?\n",
" gap_length = curr_ping[\"seq\"] - prev_ping[\"seq\"]\n",
" \n",
" if gap_length > 0:\n",
" # We had a gap before, but the sequence is ok now.\n",
" gaps.append(gap_length)\n",
" gap_length = 0\n",
"\n",
" return gaps\n",
" \n",
"\n",
"sequence_errors = grouped.map(verify_sequence_gaps)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's discard those clients that don't have any gap in the sequence numbers."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"clients_with_gaps = sequence_errors.filter(lambda h: len(h) > 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now that we have a list of gaps in the sequence numbers, get some stats about them."
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def gap_stats(gaps):\n",
" return {\n",
" \"len\": len(gaps),\n",
" \"min\": np.min(gaps),\n",
" \"max\": np.max(gaps),\n",
" \"avg\": np.mean(gaps),\n",
" \"std\": np.std(gaps),\n",
" \"var\": np.var(gaps),\n",
" \"95p\": np.percentile(gaps, 95)\n",
" }\n",
"\n",
"gaps_stats = clients_with_gaps.map(gap_stats) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get some descriptive stats about the number of sequence number gaps for a single client history."
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 109632.000000\n",
"mean 3.784205\n",
"std 4.718778\n",
"min 1.000000\n",
"25% 1.000000\n",
"50% 2.000000\n",
"75% 4.000000\n",
"max 134.000000\n",
"dtype: float64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gaps_num_series = pd.Series(gaps_stats.map(lambda x: x[\"len\"]).collect())\n",
"gaps_num_series.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get some descriptive stats about the size of the gaps in the sequence numbers. Also, what's the distribution of the gap sizes?"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"gaps_size_series = pd.Series(gaps_stats.map(lambda x: x[\"max\"]).collect())"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 109632.000000\n",
"mean 13.004296\n",
"std 199.656595\n",
"min 2.000000\n",
"25% 2.000000\n",
"50% 4.000000\n",
"75% 11.000000\n",
"95% 41.000000\n",
"99% 109.690000\n",
"99.5% 162.845000\n",
"max 63476.000000\n",
"dtype: float64"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gaps_size_series.describe(percentiles=[.25, .5, .75, .95, .99, .995])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAECCAYAAAD6oXArAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAF4xJREFUeJzt3X+wXPd51/H3o6hKYxqnTZhEHQnphtEYDZqaS5gqDAGy\n0B9SWycqdcFSqlKLyYgExgI6gFNgRtLwTz0wQZm6dQlVdKlbVXXdJpGmCXEy1SYYk6KSXhwbuxE0\n1q+iSzK1TYCMkJ2HP/astXt7He1e7dlzvnvfr5k72u937z37ubrSPvd8n/MjMhNJkvrWNR1AktQu\nFgZJ0hALgyRpiIVBkjTEwiBJGmJhkCQNsTBIkoZYGCRJQ9bX/QIREcA/B24HzmXmw3W/piRp9aax\nx7AH2Az8P+DyFF5PknQLxi4MEXE8IpYi4sll87sj4tmI+FJE3D/w1J8C/kNm/kPg79xiXklSzVaz\nx3AC2DU4ERHrgAer+R3AvojYXj19GXi+evzyKnNKkqZk7MKQmY9z442+bydwPjMvZOZ14BS9JSSA\n3wB2R8SHgM/eSlhJUv0m1XzeBFwaGF+mVyzIzK8D7/1mXxwRXuJVklYhM2PS22zN4aqHDx/m7Nmz\nZGZxH4cPH248w1rMbv7mP8zfzMfZs2c5fPhwbe/Hk9pjuAJsGRhvruZGduTIkQlFmb7nnnuu6Qir\nVnJ2MH/TzN+MTqdDp9Ph6NGjtWx/tXsMUX30nQO2RcTWiNgA7AVOj7PBI0eO0O12VxlHktaObrdb\n7y/T4+7CACeBPwCuAReBA9X8DwC/B5wHPjDmNrNkZ8+ebTrCqpWcPdP8TTN/s6r3zokvVUVv282K\niDx8+PAru0eSpFfX7XbpdrscPXqUnOXm85EjR4otCiUvgZWcHczfNPM3o9Pp1LqU1KrCUOoPSZKm\nqe4eQ2uWktqQQ5JKEhGzv5TkHoMk3VzdewytKgz2GKav5Oxg/qaZvxlrpscgSWqH1vQYPFxVkkZT\n9+GqrSkMbcghSSWZ+eZzyUpdp4Sys4P5m2b+2WRhkCQNac1Skj0GSRqNPQZJ0orsMbRYyeuUJWcH\n8zfN/LPJwiBJGuJSkiQVyqUkSdJUtKYwlHwRvVJzQ9nZwfxNM38z6r6I3vratjymWu9fKkkzpH9o\n/9GjR2vZvj0GSSqUPQZJ0lTUXhgi4p0R8bmIeCgi/nLdr9eEUtcpoezsYP6mmX82TWOPIYGvAa8F\nLk/h9SRJt2DsHkNEHAfuApYy886B+d3AMXrF5nhmPrDs694MfDAz96+wTXsMkjSmNvUYTgC7Bici\nYh3wYDW/A9gXEduXfd0LwIZX2+inPvUpPvOZz/CNb3xjFZEkSZMydmHIzMeB55dN7wTOZ+aFzLwO\nnAL2AETEX4uInwf+Lb3isaJ77vkg73rXfj72sY+NG6lxJa9TlpwdzN8088+mSZ3HsAm4NDC+TK9Y\nkJkfBT56sw28+OJ3smHDFU6ePMnFixeZn59/5RLc/R9eW8eLi4utyuPYsePZHHe7XRYWFgCYm5uj\nLqs6jyEitgJn+j2GiLgb2JWZB6vxfmBnZh4acXsJyetfv5cPf/iH2bt379iZJGmtqavHMKk9hivA\nloHx5mpuDEd46aX/OaE4kjS7utWNeuqy2sNVo/roOwdsi4itEbEB2AucHm+TR1i//s2rjNOsOn9A\ndSs5O5i/aeZvRqfTade1kiLiJNAB3hQRF4HDmXkiIu4DHuPG4arPjLdl9xgkaRR17zGMXRgy8z2v\nMv9J4JO3nKhA/SZRiUrODuZvmvlnU2suomfzWZLG06YT3LRMqeuUUHZ2MH/TzD+bWnM/BnsMkjSa\nunsMLiVJUqHafh7DBLjHIEmjaOt5DDXwPIYmlJwdzN808zej7vMYWlQYJElt0KIew2Fe97rP8ZGP\nHLTHIEnfRH8p6ejRo7X0GFpUGGw+S9I4PI+hxUpdp4Sys4P5m2b+2WRhkCQNadFSkj0GSRqFPQZJ\n0orsMbRYyeuUJWcH8zfN/LPJwiBJGuJSkiQVyqUkSdJUtKgwlHsRvZLXKUvODuZvmvmb0e1218q1\nksq9iJ4kTVPdF9GzxyBJhbLHIEmaiqkUhoi4LSLORcQPTuP1pq3UdUooOzuYv2nmn03T2mO4H/jV\nKb2WJOkWjN1jiIjjwF3AUmbeOTC/GzhGr9gcz8wHqvnvBd4EfCvw1cz8zRW2aY9BksbUph7DCWDX\n4ERErAMerOZ3APsiYnv1dAd4O/Ae4L2rTipJmoqxC0NmPg48v2x6J3A+My9k5nXgFLCn+vx/lpk/\nCfwy8G9uMW8rlbxOWXJ2MH/TzD+b1k9oO5uASwPjy/SKxSsy8xe/+Sbu5dq1p3j00Ze4evUq8/Pz\ndDod4MYPr63jxcXFVuVx7NjxbI673S4LCwsAzM3NUZdVnccQEVuBM/0eQ0TcDezKzIPVeD+wMzMP\njbg9ewySNKa6egyT2mO4AmwZGG+u5sZQ7iUxJGmautWNeuqy2sNVo/roOwdsi4itEbEB2AucvtVw\npajzB1S3krOD+Ztm/tk0dmGIiJPAE8AdEXExIg5k5svAfcBjwNPAqcx8Zrwte60kSRpFZ+1cK8l7\nPkvSKPpLSd7zWZI0pE0nuNWk3OZzyeuUJWcH8zfN/M3oej8GSdKgNdRjcClJksbR9vMYJqDcpSRJ\nmqa2nsdQg3KXkkpdp4Sys4P5m2b+ZtS9lNSiwiBJaoMW9Rg8j0GSRuF5DJKkFa2B8xjKVeo6JZSd\nHczfNPPPJguDJGlIi5aS7DFI0ijsMUiSVmSPocVKXqcsOTuYv2nmn00WBknSEJeSJKlQLiVJkqai\nRYWh3IvolbxOWXJ2MH/TzN8M78cgSRri/RgkSSuyxyBJmoraC0NEbI+IhyLikYh4X92v14RS1ymh\n7Oxg/qaZfzbVXhgy89nMfD9wD/AX6n49SdKtGbvHEBHHgbuApcy8c2B+N3CMXrE5npkPDDz3LuB9\nwMOZeWqFbdpjkKQxtanHcALYNTgREeuAB6v5HcC+iNjefz4zz2TmDwH7byGrJGkKxi4Mmfk48Pyy\n6Z3A+cy8kJnXgVPAHoCIeGdEfCgifh74zVsN3EYlr1OWnB3M3zTzz6b1E9rOJuDSwPgyvWJBZn4W\n+OzNN3Ev1649xaOPvsTVq1eZn5+n0+kAN354bR0vLi62Ko9jx45nc9ztdllYWABgbm6OuqzqPIaI\n2Aqc6fcYIuJuYFdmHqzG+4GdmXloxO3ZY5CkMdXVY5jUHsMVYMvAeHM1N4ZyL4khSdPUrW7UU5fV\nHq4a1UffOWBbRGyNiA3AXuD0rYYrRZ0/oLqVnB3M3zTzz6axC0NEnASeAO6IiIsRcSAzXwbuAx4D\nngZOZeYz423ZayVJ0ig6a+daSd7zWZJG0V9K8p7PkqQhbTrBrSblNp9LXqcsOTuYv2nmb0bX+zFI\nkgatoR6DS0mSNI62n8cwAeUuJUnSNLX1PIYalLuUVOo6JZSdHczfNPM3o+6lpBYVBklSG7Sox+B5\nDJI0Cs9jkCStaA2cx1CuUtcpoezsYP6mmX82WRgkSUNatJRkj0GSRmGPQZK0InsMLVbyOmXJ2cH8\nTTP/bLIwSJKGuJQkSYVyKUmSNBUtKgzlXkSv5HXKkrOD+Ztm/mZ4PwZJ0hDvxyBJWpE9BknSVNRe\nGCJiT0R8OCJ+JSK+r+7Xa0Kp65RQdnYwf9PMP5tqv4NbZn4c+HhEfDvwL4BP1/2akqTVG7vHEBHH\ngbuApcy8c2B+N3CM3l7I8cx8YNnX/UvglzJzcYVt2mOQpDG1qcdwAtg1OBER64AHq/kdwL6I2D7w\n/E8Dn1ipKEiS2mXswpCZjwPPL5veCZzPzAuZeR04BewBiIj7gO8BfjQiDt5i3lYqeZ2y5Oxg/qaZ\nfzZNqsewCbg0ML5Mr1iQmT8D/MzNN3Ev1649xaOPvsTVq1eZn5+n0+kAN354bR0vLi62Ko9jx45n\nc9ztdllYWABgbm6OuqzqPIaI2Aqc6fcYIuJuYFdmHqzG+4GdmXloxO3ZY5CkMdXVY5jUHsMVYMvA\neHM1N4ZyL4khSdPUrW7UU5fVnscQ1UffOWBbRGyNiA3AXuD0rYYrRZ0/oLqVnB3M3zTzz6axC0NE\nnASeAO6IiIsRcSAzXwbuAx4DngZOZeYz423ZayVJ0ig6a+daSd7zWZJG0V9K8p7PkqQhbTrBrSa9\n5vP73vf3iQgigo0b55oONZKS1ylLzg7mb5r5m9Fda/djePHFJSCBZGnpQtOhJKl11lCPobeU9LWv\n/Sq9wgAQtCGfJLVR289jmADPY5CkUbT1PIYalHu4aqnrlFB2djB/08zfjLqXklpUGCRJbdCiHkPv\nPIavf/0s9hgk6dWtufMYbD5L0mjWwHkM5Sp1nRLKzg7mb5r5Z5OFQZI0pEVLSfYYJGkU9hhakE+S\n2sgeQ4uVvE5ZcnYwf9PMP5ssDJKkIS4lSVKhvFaSJAnwWklFKHmdsuTsYP6mmb8ZXitJkjRV9hgk\nqVAeripJmoraC0NEvDUifiEiHqn7tZpS6jollJ0dzN8088+m2gtDZn45M99b9+tIkiZj7B5DRBwH\n7gKWMvPOgfndwDF6xeZ4Zj6w7Oseycy/8SrbtMcgSWNqU4/hBLBrcCIi1gEPVvM7gH0RsX3Z1008\nvCRp8sYuDJn5OPD8sumdwPnMvJCZ14FTwB6AiHhjRDwEzEfE/bcauI1KXqcsOTuYv2nmn02TOvN5\nE3BpYHyZXrEgM/8QeP/NN3Ev1649VT0+Bsy/8kz/h9fpdFo5XlxcbFUex44dz+a42+2ysLAAwNzc\nHHVZ1XkMEbEVONPvMUTE3cCuzDxYjfcDOzPz0Ijb834MkjSibs33Y5jUUUlXgC0D483V3BjKvSSG\nJE1Tp6WXxAiGm8nngG0RsTUiNgB7gdPjbbLci+j1d/VKVHJ2MH/TzN+MbrfbrsIQESeBJ4A7IuJi\nRBzIzJeB+4DHgKeBU5n5zHhbdo9BkkZR9x5Di66VtFKP4VuBawC85S1buXr1uYYSSlJ71N1jaFFh\nWPkENxvRkrSyNp3gVpPV9Rg2bpwjIogINm6cm3ysEZS6TgllZwfzN838zWhdj6E+q+sxLC1doLdX\nkdVjSZpta6jHMPpS0saNc8uKgMtNktYe7/k84MZeAngJJklrTb/5XJfil5LaoNR1Sig7O5i/aeZv\nRt1LSS3aY7iZ1xLh3oEk1a1FPYaVr5U07uM2fD+SVCfPY7AwSNKK1sB5DOUqdZ0Sys4O5m+a+WeT\nhUGSNKRFS0n2GCRpFPYYLAyStCJ7DC1W8jplydnB/E0z/2yyMEiShriUJEmF8lpJkiTAayUVoeR1\nypKzg/mbZv5m1H2tpBYVBklSG9hjkKRCebiqJGkqai8MEXFbRCxExL+OiPfU/XpNKHWdEsrODuZv\nmvln0zT2GH4E+LXM/NvAu6fwepKkWzB2jyEijgN3AUuZeefA/G7gGL1iczwzH6jmPwB8IjOfjIhf\nzswfW2Gb9hgkaUxt6jGcAHYNTkTEOuDBan4HsC8itldPXwI29z91lTklSVMydmHIzMeB55dN7wTO\nZ+aFzLwOnAL2VM99FPjRiPhZ4MythL253u0/I4KNG+demd24cW6s+XGVvE5ZcnYwf9PWYv5JvW+0\n2aTOfN5Eb8+g7zK9YkFm/l/gb918E/dy7dpT1eNjwPzAc90VPr8LdJY9f43eslKXpaW/8spnLi1d\nAM4CHZaW4pV/DL35P/r5/ec7nc5I48XFxbE+37Fjx+WOX+39ZBqv3+12WVhYAGBubo66rOo8hojY\nCpzp9xgi4m5gV2YerMb7gZ2ZeWjE7U3sfgwr9RsixpuXpFfThveNbs33Y5jUUUlXgC0D483V3BjK\nvSSGJE1Tp6WXxAiGG8nngG0RsTUiNgB7gdPjbbLci+j1d/VKVHJ2MH/TzN+MbrfbrsIQESeBJ4A7\nIuJiRBzIzJeB+4DHgKeBU5n5zHhbdo9BkkZR9x5Di66VZI9BUvu14X2j7h5DiwrDZE5wszBIqlOb\n3jfadIJbTewxNKHk7GD+ppm/Ga3rMdTHHoMkjWIN9RhcSpLUfm163/Cez5IkwHs+F6HUdUooOzuY\nv2nmb0ZbT3CTJM2oFvUYPI9BUvu14X3D8xgsDJJapE3vG2vgPIZylbpOCWVnB/M3zfyzycIgSRrS\noqUkewyS2q8N7xv2GCwMklqkTe8b9hharOR1ypKzg/mbZv7ZZGGQJA1xKaklu4SSytCm9w2vlSRJ\nArxWUhFKXqcsOTuYv2nmb4bXSpIkTZU9hpasFUoqQ5veNzxcVZI0FbUWhoh4a0T8QkQ8UufrNK3U\ndUooOzuYv2nmn021FobM/HJmvrfO12iDxcXFpiOsWsnZwfxNM/9sGqkwRMTxiFiKiCeXze+OiGcj\n4ksRcX89EdvvhRdeaDrCqpWcHczfNPPPplH3GE4AuwYnImId8GA1vwPYFxHbq+d+PCI+GBHf2f/0\nCeWVJNVspMKQmY8Dzy+b3gmcz8wLmXkdOAXsqT7/4cz8SeBaRDwEzN9sj+L229/F9eufG/sbaIPn\nnnuu6QirVnJ2MH/TzD+bRj5cNSK2Amcy885qfDewKzMPVuP9wM7MPDR2iAiPE5WkVZjZS2LU8Y1J\nklbnVo5KugJsGRhvruYkSQUbpzAEw03kc8C2iNgaERuAvcDpSYaTJE3fqIerngSeAO6IiIsRcSAz\nXwbuAx4DngZOZeYz9UWVJE1FZjb2AewGngW+BNzfcJbjwBLw5MDcd9ArfL8HfAp4w8BzPwWcB54B\nvn9g/m3Ak9X3dGxgfgO9I7fOA/8R2DLB7JuB36JXoL8IHCos/2uB3wZ+t8p/uKT8A6+xDvgCcLq0\n/MBzwH+pfgb/qcD8bwB+rcrzNPD2EvIDd1R/51+o/nwRONR09on+x1jFf6L/BmwFvgVYBLY3mOcv\nAvMMF4YHgH9cPb4f+Onq8Z+ufojrgbnq++gf4fXbwHdXjz9B78gtgPcDP1c9vofeHtaksm8E5qvH\n31b9Y9peSv5qm7dVf74G+Dy9w6GLyV9t9x8Av8SNwlBMfuD3ge9YNldS/gXgQPV4Pb1CUUz+arvr\ngD8A/kTT2Sf6jY35l/DngU8OjD9A83sNWxkuDM8Cb6kebwSeXSkr8El6v6FsBP7rwPxe4KHq8b8D\n3l49fg3wlRq/j48B31tifuA24HeA7y4pP729tk8DHW4UhpLyfxl407K5IvIDtwP/fYX5IvIPvN73\nA/++DdmbvLrqJuDSwPhyNdcmb87MJYDMvAr07yS0PPuVam4Tve+jb/B7euVrstefeSEi3jjpwBEx\nR2/P5/P0/mEVkT8i1kXE7wJXgU9n5rmS8gP/CvhH3LgeM4XlT+DTEXEuIvrXNysl/1uBr0bEiYj4\nQkR8OCJuKyh/3z3Ayepxo9m97PZ48uafMrKJn7sREd8GPAr8vcz83/zRvK3Nn5nfyMw/S+83750R\nsYNC8kfEDwFLmbl4k+22Mn/lHZn5NuAHgb8bEX+JQv7+6S2rvA342ep7+D/0frMuJT8R8S3Au+n1\nSaDh7E0WhhLOg1iKiLcARMRGoH9T6iv01gH7+tlfbX7oayLiNcDtmfmHkwoaEevpFYWHM/PjpeXv\ny8z/BXTpHZhQSv53AO+OiN8HfgX4qxHxMHC1kPxk5v+o/vwKvaXInZTz938ZuJSZv1ONf51eoSgl\nP8APAP85M79ajRvN3mRhaON5EMvP1TgN3Fs9/gng4wPzeyNiQ0S8FdhG70iOq8CLEbEzerd5+pvL\nvuYnqsd/nd5RRJP0EXprjB8qLX9E/PGIeEP1+HXA99E74qKI/Jn5TzJzS2b+SXr/jn8rM38cOFNC\n/oi4rdrbJCL+GL217i9Szt//EnApIu6opr6H3pFJReSv7KP3S0Vfs9kn3UAZs9mym94RNOeBDzSc\n5SS9IwKuAReBA/QOGftMlfEx4NsHPv+n6B0RsPyQsT9H7z/VeeBDA/OvBR6p5j8PzE0w+zuAl+kd\n2dU/9G038MZC8n9XlXmR3uF2/7SaLyL/su/lndxoPheRn94aff/fzhf7/xdLyV9t/8/Q+2VzEfgN\nekclFZGf3gEXXwFePzDXaPZW3PNZktQeNp8lSUMsDJKkIRYGSdIQC4MkaYiFQZI0xMIgSRpiYZAk\nDfn/IeRLBQQCoIAAAAAASUVORK5CYII=\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7fa611d0ead0>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots()\n",
"gaps_size_series.hist(ax=ax, bins=100, bottom=0.1)\n",
"ax.set_yscale('log')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.11"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment