-
-
Save jcjones/098c9ee81213e6816cf372194f45e918 to your computer and use it in GitHub Desktop.
MITM Search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.\n" | |
] | |
} | |
], | |
"source": [ | |
"from moztelemetry import get_pings_properties, get_one_ping_per_client\n", | |
"from moztelemetry.dataset import Dataset" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"pings = Dataset.from_source(\"telemetry\") \\\n", | |
" .where(submissionDate=lambda xx: xx.startswith(\"201612\")) \\\n", | |
" .where(docType=\"OTHER\") \\\n", | |
" .records(sc, sample=1)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"experiment_data = pings.filter(lambda xx: xx['meta']['docType'] == \"mitm-prevalence-beta51\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"example = experiment_data.take(5)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"PythonRDD[4] at RDD at PythonRDD.scala:48" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"experiment_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{u'application': {u'architecture': u'x86',\n", | |
" u'buildId': u'20161123182536',\n", | |
" u'channel': u'beta',\n", | |
" u'displayVersion': u'50.0.1',\n", | |
" u'name': u'Firefox',\n", | |
" u'platformVersion': u'50.0.1',\n", | |
" u'vendor': u'Mozilla',\n", | |
" u'version': u'50.0.1',\n", | |
" u'xpcomAbi': u'x86-msvc'},\n", | |
" u'creationDate': u'2016-12-06T22:53:40.813Z',\n", | |
" u'id': u'0684c448-d0ff-432f-8be7-d961e3bfc23d',\n", | |
" 'meta': {u'Date': u'Tue, 06 Dec 2016 22:54:40 GMT',\n", | |
" u'Host': u'incoming.telemetry.mozilla.org',\n", | |
" 'Hostname': u'ip-172-31-7-210',\n", | |
" u'Size': 381.0,\n", | |
" 'Timestamp': 1481064883916387328L,\n", | |
" 'Type': u'telemetry',\n", | |
" u'appBuildId': u'20161123182536',\n", | |
" u'appName': u'Firefox',\n", | |
" u'appUpdateChannel': u'beta',\n", | |
" u'appVendor': u'Mozilla',\n", | |
" u'appVersion': u'50.0.1',\n", | |
" u'creationTimestamp': 1.481064820813e+18,\n", | |
" u'docType': u'mitm-prevalence-beta51',\n", | |
" u'documentId': u'0684c448-d0ff-432f-8be7-d961e3bfc23d',\n", | |
" u'geoCity': u'Casablanca',\n", | |
" u'geoCountry': u'MA',\n", | |
" u'normalizedChannel': u'beta',\n", | |
" u'sourceName': u'telemetry',\n", | |
" u'sourceVersion': u'4',\n", | |
" u'submissionDate': u'20161206'},\n", | |
" u'payload': {u'error': u'connection error'},\n", | |
" u'type': u'mitm-prevalence-beta51',\n", | |
" u'version': 4},\n", | |
" {u'application': {u'architecture': u'x86-64',\n", | |
" u'buildId': u'20161123182536',\n", | |
" u'channel': u'beta',\n", | |
" u'displayVersion': u'50.0.1',\n", | |
" u'name': u'Firefox',\n", | |
" u'platformVersion': u'50.0.1',\n", | |
" u'vendor': u'Mozilla',\n", | |
" u'version': u'50.0.1',\n", | |
" u'xpcomAbi': u'x86_64-msvc'},\n", | |
" u'creationDate': u'2016-12-06T22:41:54.713Z',\n", | |
" u'id': u'76a24cf3-103e-4257-af0b-25775c04a843',\n", | |
" 'meta': {u'Date': u'Tue, 06 Dec 2016 22:56:16 GMT',\n", | |
" u'Host': u'incoming.telemetry.mozilla.org',\n", | |
" 'Hostname': u'ip-172-31-27-45',\n", | |
" u'Size': 866.0,\n", | |
" 'Timestamp': 1481064977476990976L,\n", | |
" 'Type': u'telemetry',\n", | |
" u'appBuildId': u'20161123182536',\n", | |
" u'appName': u'Firefox',\n", | |
" u'appUpdateChannel': u'beta',\n", | |
" u'appVendor': u'Mozilla',\n", | |
" u'appVersion': u'50.0.1',\n", | |
" u'creationTimestamp': 1.481064114713e+18,\n", | |
" u'docType': u'mitm-prevalence-beta51',\n", | |
" u'documentId': u'76a24cf3-103e-4257-af0b-25775c04a843',\n", | |
" u'geoCity': u'??',\n", | |
" u'geoCountry': u'FI',\n", | |
" u'normalizedChannel': u'beta',\n", | |
" u'sourceName': u'telemetry',\n", | |
" u'sourceVersion': u'4',\n", | |
" u'submissionDate': u'20161206'},\n", | |
" u'payload': {u'chain': [{u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'197feaf3faa0f0ad637a89c97cb91336bfc114b6b3018203cbd9c3d10c7fa86c',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'154c433c491929c5ef686e838e323664a00e6a0d822ccc958fb4dab03e49a08f',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': True,\n", | |
" u'sha256Fingerprint': u'4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161',\n", | |
" u'signatureAlgorithm': u'sha1WithRSAEncryption'}],\n", | |
" u'error': u'',\n", | |
" u'errorCode': 0},\n", | |
" u'type': u'mitm-prevalence-beta51',\n", | |
" u'version': 4},\n", | |
" {u'application': {u'architecture': u'x86-64',\n", | |
" u'buildId': u'20161123182536',\n", | |
" u'channel': u'beta',\n", | |
" u'displayVersion': u'50.0.1',\n", | |
" u'name': u'Firefox',\n", | |
" u'platformVersion': u'50.0.1',\n", | |
" u'vendor': u'Mozilla',\n", | |
" u'version': u'50.0.1',\n", | |
" u'xpcomAbi': u'x86_64-msvc'},\n", | |
" u'creationDate': u'2016-12-06T23:08:50.206Z',\n", | |
" u'id': u'35ab0474-7ec3-4d47-a413-72f11a74e290',\n", | |
" 'meta': {u'DNT': u'1',\n", | |
" u'Date': u'Tue, 06 Dec 2016 23:09:49 GMT',\n", | |
" u'Host': u'incoming.telemetry.mozilla.org',\n", | |
" 'Hostname': u'ip-172-31-27-43',\n", | |
" u'Size': 866.0,\n", | |
" 'Timestamp': 1481065784682795008L,\n", | |
" 'Type': u'telemetry',\n", | |
" u'appBuildId': u'20161123182536',\n", | |
" u'appName': u'Firefox',\n", | |
" u'appUpdateChannel': u'beta',\n", | |
" u'appVendor': u'Mozilla',\n", | |
" u'appVersion': u'50.0.1',\n", | |
" u'creationTimestamp': 1.481065730206e+18,\n", | |
" u'docType': u'mitm-prevalence-beta51',\n", | |
" u'documentId': u'35ab0474-7ec3-4d47-a413-72f11a74e290',\n", | |
" u'geoCity': u'Curitiba',\n", | |
" u'geoCountry': u'BR',\n", | |
" u'normalizedChannel': u'beta',\n", | |
" u'sourceName': u'telemetry',\n", | |
" u'sourceVersion': u'4',\n", | |
" u'submissionDate': u'20161206'},\n", | |
" u'payload': {u'chain': [{u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'197feaf3faa0f0ad637a89c97cb91336bfc114b6b3018203cbd9c3d10c7fa86c',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'154c433c491929c5ef686e838e323664a00e6a0d822ccc958fb4dab03e49a08f',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': True,\n", | |
" u'sha256Fingerprint': u'4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161',\n", | |
" u'signatureAlgorithm': u'sha1WithRSAEncryption'}],\n", | |
" u'error': u'',\n", | |
" u'errorCode': 0},\n", | |
" u'type': u'mitm-prevalence-beta51',\n", | |
" u'version': 4},\n", | |
" {u'application': {u'architecture': u'x86',\n", | |
" u'buildId': u'20161123182536',\n", | |
" u'channel': u'beta',\n", | |
" u'displayVersion': u'50.0.1',\n", | |
" u'name': u'Firefox',\n", | |
" u'platformVersion': u'50.0.1',\n", | |
" u'vendor': u'Mozilla',\n", | |
" u'version': u'50.0.1',\n", | |
" u'xpcomAbi': u'x86-msvc'},\n", | |
" u'creationDate': u'2016-12-06T23:34:56.142Z',\n", | |
" u'id': u'ba33ff01-11f9-46f3-a951-3c88b38eeb20',\n", | |
" 'meta': {u'Date': u'Tue, 06 Dec 2016 23:35:54 GMT',\n", | |
" u'Host': u'incoming.telemetry.mozilla.org',\n", | |
" 'Hostname': u'ip-172-31-7-97',\n", | |
" u'Size': 860.0,\n", | |
" 'Timestamp': 1481067338784073728L,\n", | |
" 'Type': u'telemetry',\n", | |
" u'appBuildId': u'20161123182536',\n", | |
" u'appName': u'Firefox',\n", | |
" u'appUpdateChannel': u'beta',\n", | |
" u'appVendor': u'Mozilla',\n", | |
" u'appVersion': u'50.0.1',\n", | |
" u'creationTimestamp': 1.4810672961419999e+18,\n", | |
" u'docType': u'mitm-prevalence-beta51',\n", | |
" u'documentId': u'ba33ff01-11f9-46f3-a951-3c88b38eeb20',\n", | |
" u'geoCity': u'Newton',\n", | |
" u'geoCountry': u'US',\n", | |
" u'normalizedChannel': u'beta',\n", | |
" u'sourceName': u'telemetry',\n", | |
" u'sourceVersion': u'4',\n", | |
" u'submissionDate': u'20161206'},\n", | |
" u'payload': {u'chain': [{u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'197feaf3faa0f0ad637a89c97cb91336bfc114b6b3018203cbd9c3d10c7fa86c',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'154c433c491929c5ef686e838e323664a00e6a0d822ccc958fb4dab03e49a08f',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': True,\n", | |
" u'sha256Fingerprint': u'4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161',\n", | |
" u'signatureAlgorithm': u'sha1WithRSAEncryption'}],\n", | |
" u'error': u'',\n", | |
" u'errorCode': 0},\n", | |
" u'type': u'mitm-prevalence-beta51',\n", | |
" u'version': 4},\n", | |
" {u'application': {u'architecture': u'x86',\n", | |
" u'buildId': u'20161129173726',\n", | |
" u'channel': u'beta',\n", | |
" u'displayVersion': u'50.0.2',\n", | |
" u'name': u'Firefox',\n", | |
" u'platformVersion': u'50.0.2',\n", | |
" u'vendor': u'Mozilla',\n", | |
" u'version': u'50.0.2',\n", | |
" u'xpcomAbi': u'x86-msvc'},\n", | |
" u'creationDate': u'2016-12-06T22:38:47.807Z',\n", | |
" u'id': u'69d9d1c5-9c82-4480-a230-b19ffe4adb45',\n", | |
" 'meta': {u'Date': u'Tue, 06 Dec 2016 22:39:47 GMT',\n", | |
" u'Host': u'incoming.telemetry.mozilla.org',\n", | |
" 'Hostname': u'ip-172-31-38-229',\n", | |
" u'Size': 860.0,\n", | |
" 'Timestamp': 1481063989965464576L,\n", | |
" 'Type': u'telemetry',\n", | |
" u'appBuildId': u'20161129173726',\n", | |
" u'appName': u'Firefox',\n", | |
" u'appUpdateChannel': u'beta',\n", | |
" u'appVendor': u'Mozilla',\n", | |
" u'appVersion': u'50.0.2',\n", | |
" u'creationTimestamp': 1.4810639278069998e+18,\n", | |
" u'docType': u'mitm-prevalence-beta51',\n", | |
" u'documentId': u'69d9d1c5-9c82-4480-a230-b19ffe4adb45',\n", | |
" u'geoCity': u'Cairo',\n", | |
" u'geoCountry': u'EG',\n", | |
" u'normalizedChannel': u'beta',\n", | |
" u'sourceName': u'telemetry',\n", | |
" u'sourceVersion': u'4',\n", | |
" u'submissionDate': u'20161206'},\n", | |
" u'payload': {u'chain': [{u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'197feaf3faa0f0ad637a89c97cb91336bfc114b6b3018203cbd9c3d10c7fa86c',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'154c433c491929c5ef686e838e323664a00e6a0d822ccc958fb4dab03e49a08f',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': True,\n", | |
" u'sha256Fingerprint': u'4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161',\n", | |
" u'signatureAlgorithm': u'sha1WithRSAEncryption'}],\n", | |
" u'error': u'',\n", | |
" u'errorCode': 0},\n", | |
" u'type': u'mitm-prevalence-beta51',\n", | |
" u'version': 4}]" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"example" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 30, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"expected = [\"197feaf3faa0f0ad637a89c97cb91336bfc114b6b3018203cbd9c3d10c7fa86c\",\n", | |
" \"154c433c491929c5ef686e838e323664a00e6a0d822ccc958fb4dab03e49a08f\",\n", | |
" \"4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161\"]\n", | |
"def isChainAsExpected(xx):\n", | |
" if 'payload' not in xx:\n", | |
" return\n", | |
" if 'chain' not in xx['payload']:\n", | |
" return\n", | |
" if len(xx['payload']['chain']) != 3:\n", | |
" return xx['payload']['chain']\n", | |
" for idx, val in enumerate(expected):\n", | |
" chainEntry = xx['payload']['chain'][idx]['sha256Fingerprint']\n", | |
" if chainEntry != val:\n", | |
" #return \"Mismatch on {} - {} != {}\".format(idx, chainEntry, val)\n", | |
" return xx['payload']['chain']\n", | |
" return\n", | |
"test_data = map(isChainAsExpected, example)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 34, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[]" | |
] | |
}, | |
"execution_count": 34, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"filter(lambda xx: isChainAsExpected(xx) != None, example)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 35, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"abnormal_data = experiment_data.filter(lambda xx: isChainAsExpected(xx) != None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"PythonRDD[5] at RDD at PythonRDD.scala:48" | |
] | |
}, | |
"execution_count": 36, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"abnormal_data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 37, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"abnormal_result = abnormal_data.first()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 38, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"ename": "KeyboardInterrupt", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m\u001b[0m", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0mTraceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-38-8d06e8e16c8e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mabnormal_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", | |
"\u001b[0;32m/usr/lib/spark/python/pyspark/rdd.py\u001b[0m in \u001b[0;36mcount\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1006\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1007\u001b[0m \"\"\"\n\u001b[0;32m-> 1008\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmapPartitions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1009\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1010\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstats\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/spark/python/pyspark/rdd.py\u001b[0m in \u001b[0;36msum\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 997\u001b[0m \u001b[0;36m6.0\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 998\u001b[0m \"\"\"\n\u001b[0;32m--> 999\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmapPartitions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfold\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moperator\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0madd\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1000\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1001\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/spark/python/pyspark/rdd.py\u001b[0m in \u001b[0;36mfold\u001b[0;34m(self, zeroValue, op)\u001b[0m\n\u001b[1;32m 871\u001b[0m \u001b[0;31m# zeroValue provided to each partition is unique from the one provided\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 872\u001b[0m \u001b[0;31m# to the final reduce call\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 873\u001b[0;31m \u001b[0mvals\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmapPartitions\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfunc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcollect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 874\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mreduce\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvals\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mzeroValue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 875\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/spark/python/pyspark/rdd.py\u001b[0m in \u001b[0;36mcollect\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 774\u001b[0m \"\"\"\n\u001b[1;32m 775\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mSCCallSiteSync\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcontext\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcss\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 776\u001b[0;31m \u001b[0mport\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mPythonRDD\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcollectAndServe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jrdd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrdd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 777\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_load_from_socket\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mport\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jrdd_deserializer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 778\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/spark/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mEND_COMMAND_PART\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 930\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 931\u001b[0;31m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 932\u001b[0m return_value = get_return_value(\n\u001b[1;32m 933\u001b[0m answer, self.gateway_client, self.target_id, self.name)\n", | |
"\u001b[0;32m/usr/lib/spark/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command, retry, binary)\u001b[0m\n\u001b[1;32m 693\u001b[0m \u001b[0mconnection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_connection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 694\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 695\u001b[0;31m \u001b[0mresponse\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mconnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 696\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbinary\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 697\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresponse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_create_connection_guard\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconnection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/usr/lib/spark/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py\u001b[0m in \u001b[0;36msend_command\u001b[0;34m(self, command)\u001b[0m\n\u001b[1;32m 826\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msocket\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msendall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"utf-8\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 827\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 828\u001b[0;31m \u001b[0manswer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msmart_decode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstream\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreadline\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 829\u001b[0m \u001b[0mlogger\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdebug\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Answer received: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0manswer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 830\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0manswer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mproto\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mRETURN_MESSAGE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/home/hadoop/anaconda2/lib/python2.7/socket.pyc\u001b[0m in \u001b[0;36mreadline\u001b[0;34m(self, size)\u001b[0m\n\u001b[1;32m 449\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0mTrue\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 450\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 451\u001b[0;31m \u001b[0mdata\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_sock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrecv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rbufsize\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 452\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0merror\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 453\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0mEINTR\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"abnormal_data.count()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 39, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{u'application': {u'architecture': u'x86-64',\n", | |
" u'buildId': u'20160920155715',\n", | |
" u'channel': u'beta',\n", | |
" u'displayVersion': u'50.0b1',\n", | |
" u'name': u'Firefox',\n", | |
" u'platformVersion': u'50.0',\n", | |
" u'vendor': u'Mozilla',\n", | |
" u'version': u'50.0',\n", | |
" u'xpcomAbi': u'x86_64-gcc3'},\n", | |
" u'creationDate': u'2016-12-06T22:54:31.860Z',\n", | |
" u'id': u'1e83698b-8cfe-c94f-aaa6-4393f348a4f5',\n", | |
" 'meta': {u'Date': u'Tue, 06 Dec 2016 22:55:31 GMT',\n", | |
" u'Host': u'incoming.telemetry.mozilla.org',\n", | |
" 'Hostname': u'ip-172-31-27-46',\n", | |
" u'Size': 707.0,\n", | |
" 'Timestamp': 1481064931103297536L,\n", | |
" 'Type': u'telemetry',\n", | |
" u'appBuildId': u'20160920155715',\n", | |
" u'appName': u'Firefox',\n", | |
" u'appUpdateChannel': u'beta',\n", | |
" u'appVendor': u'Mozilla',\n", | |
" u'appVersion': u'50.0',\n", | |
" u'creationTimestamp': 1.48106487186e+18,\n", | |
" u'docType': u'mitm-prevalence-beta51',\n", | |
" u'documentId': u'1e83698b-8cfe-c94f-aaa6-4393f348a4f5',\n", | |
" u'geoCity': u'Minneapolis',\n", | |
" u'geoCountry': u'US',\n", | |
" u'normalizedChannel': u'beta',\n", | |
" u'sourceName': u'telemetry',\n", | |
" u'sourceVersion': u'4',\n", | |
" u'submissionDate': u'20161206'},\n", | |
" u'payload': {u'chain': [{u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'5a75001493283c1893fbc3804b9f6fd4f6e1b9dd0f937c91fb97ecd6a3d177b5',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'},\n", | |
" {u'isBuiltInRoot': False,\n", | |
" u'sha256Fingerprint': u'afb8e7d141c43f61014c31bf1b14d4ad888b4a2509e5b04fb23eb7b6869a9b98',\n", | |
" u'signatureAlgorithm': u'sha256WithRSAEncryption'}],\n", | |
" u'error': u'',\n", | |
" u'errorCode': 0},\n", | |
" u'type': u'mitm-prevalence-beta51',\n", | |
" u'version': 4}" | |
] | |
}, | |
"execution_count": 39, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"abnormal_result" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 48, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"def isChainBuiltInOrError(xx):\n", | |
" if 'payload' not in xx:\n", | |
" return\n", | |
" if 'chain' not in xx['payload']:\n", | |
" return\n", | |
" if xx['payload']['errorCode'] == 0:\n", | |
" return\n", | |
" for chainEntry in xx['payload']['chain']:\n", | |
" if chainEntry['isBuiltInRoot'] == True:\n", | |
" return xx\n", | |
" return\n", | |
"#map(isChainBuiltInOrError, example)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 49, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"mitm_data = abnormal_data.filter(lambda xx: isChainBuiltInOrError(xx) != None)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"mitm_result = mitm_data.take(10)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 90, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'isAccum': True,\n", | |
" 'rooterrors': Counter({'687fa451382278fff0c8b11f8d43d576671c6eb2bceab413fb83d965d06d2ff2 -12276': 16,\n", | |
" '73c176434f1bc6d5adf45b0e76e727287c8de57616c1e6e6141a2b2cbc7d8e4c -12276': 2,\n", | |
" 'c3846bf24b9e93ca64274c0ec67c1ecc5e024ffcacd2d74019350e81fe546ae4 -12276': 2}),\n", | |
" 'total_errors': Counter({-12276: 20}),\n", | |
" 'total_roots': Counter({u'687fa451382278fff0c8b11f8d43d576671c6eb2bceab413fb83d965d06d2ff2': 16,\n", | |
" u'73c176434f1bc6d5adf45b0e76e727287c8de57616c1e6e6141a2b2cbc7d8e4c': 2,\n", | |
" u'c3846bf24b9e93ca64274c0ec67c1ecc5e024ffcacd2d74019350e81fe546ae4': 2})}" | |
] | |
}, | |
"execution_count": 90, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from collections import defaultdict, Counter\n", | |
"\n", | |
"def countRoots(accum, xx):\n", | |
" if 'isAccum' not in accum:\n", | |
" # Happens on the first execution\n", | |
" accum = countRoots({'isAccum':True, \n", | |
" 'total_roots':Counter(), \n", | |
" 'total_errors':Counter(), \n", | |
" 'rooterrors':Counter()\n", | |
" }, accum)\n", | |
" if 'isAccum' in xx:\n", | |
" # Happens on the final executions, merging intermediate states\n", | |
" for k,v in xx['total_errors'].iteritems():\n", | |
" accum['total_errors'][k] += v\n", | |
" for k,v in xx['total_roots'].iteritems():\n", | |
" accum['total_roots'][k] += v\n", | |
" for k,v in xx['rooterrors'].iteritems():\n", | |
" accum['rooterrors'][k] += v\n", | |
" return accum\n", | |
"\n", | |
" # Primary analysis\n", | |
" if 'payload' not in xx:\n", | |
" return accum\n", | |
" if 'errorCode' not in xx['payload']:\n", | |
" return accum\n", | |
" if 'chain' not in xx['payload']:\n", | |
" return accum\n", | |
"\n", | |
" code=xx['payload']['errorCode']\n", | |
" if code != 0:\n", | |
" accum['total_errors'][code] += 1\n", | |
" for chainEntry in xx['payload']['chain']:\n", | |
" if chainEntry['isBuiltInRoot'] == True:\n", | |
" fp = chainEntry['sha256Fingerprint']\n", | |
" accum['total_roots'][fp] += 1\n", | |
" \n", | |
" fpcode = \"{} {}\".format(fp,code)\n", | |
" accum['rooterrors'][fpcode] += 1\n", | |
" return accum\n", | |
"\n", | |
"x=reduce(countRoots, mitm_result)\n", | |
"countRoots(x,x)\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"mitm_count_roots = mitm_data.reduce(countRoots)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 87, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'isAccum': True,\n", | |
" 'rooterrors': Counter({'0f993c8aef97baaf5687140ed59ad1821bb4afacf0aa9a58b5d57a338a3afbcb -12276': 1,\n", | |
" '687fa451382278fff0c8b11f8d43d576671c6eb2bceab413fb83d965d06d2ff2 -12276': 22,\n", | |
" '73c176434f1bc6d5adf45b0e76e727287c8de57616c1e6e6141a2b2cbc7d8e4c -12276': 1,\n", | |
" 'c3846bf24b9e93ca64274c0ec67c1ecc5e024ffcacd2d74019350e81fe546ae4 -12276': 1,\n", | |
" 'ff856a2d251dcd88d36656f450126798cfabaade40799c722de4d2b5db36a73a -12276': 1}),\n", | |
" 'total_errors': Counter({-12276: 26}),\n", | |
" 'total_roots': Counter({u'0f993c8aef97baaf5687140ed59ad1821bb4afacf0aa9a58b5d57a338a3afbcb': 1,\n", | |
" u'687fa451382278fff0c8b11f8d43d576671c6eb2bceab413fb83d965d06d2ff2': 22,\n", | |
" u'73c176434f1bc6d5adf45b0e76e727287c8de57616c1e6e6141a2b2cbc7d8e4c': 1,\n", | |
" u'c3846bf24b9e93ca64274c0ec67c1ecc5e024ffcacd2d74019350e81fe546ae4': 1,\n", | |
" u'ff856a2d251dcd88d36656f450126798cfabaade40799c722de4d2b5db36a73a': 1})}" | |
] | |
}, | |
"execution_count": 87, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"mitm_count_roots" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'isAccum': True,\n", | |
" 'rooterrors': Counter({'0f993c8aef97baaf5687140ed59ad1821bb4afacf0aa9a58b5d57a338a3afbcb -12276': 1,\n", | |
" '4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161 0': 2822178,\n", | |
" '687fa451382278fff0c8b11f8d43d576671c6eb2bceab413fb83d965d06d2ff2 -12276': 22,\n", | |
" '73c176434f1bc6d5adf45b0e76e727287c8de57616c1e6e6141a2b2cbc7d8e4c -12276': 1,\n", | |
" 'c3846bf24b9e93ca64274c0ec67c1ecc5e024ffcacd2d74019350e81fe546ae4 -12276': 1,\n", | |
" 'ff856a2d251dcd88d36656f450126798cfabaade40799c722de4d2b5db36a73a -12276': 1}),\n", | |
" 'total_errors': Counter({-16381: 1,\n", | |
" -16379: 11,\n", | |
" -16378: 1,\n", | |
" -12276: 53,\n", | |
" -12173: 1,\n", | |
" -8191: 1,\n", | |
" -8179: 299,\n", | |
" -8162: 14,\n", | |
" -8061: 351,\n", | |
" -8016: 28}),\n", | |
" 'total_roots': Counter({u'0f993c8aef97baaf5687140ed59ad1821bb4afacf0aa9a58b5d57a338a3afbcb': 1,\n", | |
" u'4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161': 2822178,\n", | |
" u'687fa451382278fff0c8b11f8d43d576671c6eb2bceab413fb83d965d06d2ff2': 22,\n", | |
" u'73c176434f1bc6d5adf45b0e76e727287c8de57616c1e6e6141a2b2cbc7d8e4c': 1,\n", | |
" u'c3846bf24b9e93ca64274c0ec67c1ecc5e024ffcacd2d74019350e81fe546ae4': 1,\n", | |
" u'ff856a2d251dcd88d36656f450126798cfabaade40799c722de4d2b5db36a73a': 1})}" | |
] | |
}, | |
"execution_count": 91, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"experiment_data.reduce(countRoots)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"anaconda-cloud": {}, | |
"kernelspec": { | |
"display_name": "Python [default]", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.12" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 1 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
from moztelemetry import get_pings_properties, get_one_ping_per_client | |
from moztelemetry.dataset import Dataset | |
# In[2]: | |
pings = Dataset.from_source("telemetry") .where(submissionDate=lambda xx: xx.startswith("201612")) .where(docType="OTHER") .records(sc, sample=1) | |
# In[3]: | |
experiment_data = pings.filter(lambda xx: xx['meta']['docType'] == "mitm-prevalence-beta51") | |
# In[4]: | |
example = experiment_data.take(5) | |
# In[5]: | |
experiment_data | |
# In[6]: | |
example | |
# In[30]: | |
expected = ["197feaf3faa0f0ad637a89c97cb91336bfc114b6b3018203cbd9c3d10c7fa86c", | |
"154c433c491929c5ef686e838e323664a00e6a0d822ccc958fb4dab03e49a08f", | |
"4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161"] | |
def isChainAsExpected(xx): | |
if 'payload' not in xx: | |
return | |
if 'chain' not in xx['payload']: | |
return | |
if len(xx['payload']['chain']) != 3: | |
return xx['payload']['chain'] | |
for idx, val in enumerate(expected): | |
chainEntry = xx['payload']['chain'][idx]['sha256Fingerprint'] | |
if chainEntry != val: | |
#return "Mismatch on {} - {} != {}".format(idx, chainEntry, val) | |
return xx['payload']['chain'] | |
return | |
test_data = map(isChainAsExpected, example) | |
# In[34]: | |
filter(lambda xx: isChainAsExpected(xx) != None, example) | |
# In[35]: | |
abnormal_data = experiment_data.filter(lambda xx: isChainAsExpected(xx) != None) | |
# In[36]: | |
abnormal_data | |
# In[37]: | |
abnormal_result = abnormal_data.first() | |
# In[38]: | |
abnormal_data.count() | |
# In[39]: | |
abnormal_result | |
# In[48]: | |
def isChainBuiltInOrError(xx): | |
if 'payload' not in xx: | |
return | |
if 'chain' not in xx['payload']: | |
return | |
if xx['payload']['errorCode'] == 0: | |
return | |
for chainEntry in xx['payload']['chain']: | |
if chainEntry['isBuiltInRoot'] == True: | |
return xx | |
return | |
#map(isChainBuiltInOrError, example) | |
# In[49]: | |
mitm_data = abnormal_data.filter(lambda xx: isChainBuiltInOrError(xx) != None) | |
# In[50]: | |
mitm_result = mitm_data.take(10) | |
# In[90]: | |
from collections import defaultdict, Counter | |
def countRoots(accum, xx): | |
if 'isAccum' not in accum: | |
# Happens on the first execution | |
accum = countRoots({'isAccum':True, | |
'total_roots':Counter(), | |
'total_errors':Counter(), | |
'rooterrors':Counter() | |
}, accum) | |
if 'isAccum' in xx: | |
# Happens on the final executions, merging intermediate states | |
for k,v in xx['total_errors'].iteritems(): | |
accum['total_errors'][k] += v | |
for k,v in xx['total_roots'].iteritems(): | |
accum['total_roots'][k] += v | |
for k,v in xx['rooterrors'].iteritems(): | |
accum['rooterrors'][k] += v | |
return accum | |
# Primary analysis | |
if 'payload' not in xx: | |
return accum | |
if 'errorCode' not in xx['payload']: | |
return accum | |
if 'chain' not in xx['payload']: | |
return accum | |
code=xx['payload']['errorCode'] | |
if code != 0: | |
accum['total_errors'][code] += 1 | |
for chainEntry in xx['payload']['chain']: | |
if chainEntry['isBuiltInRoot'] == True: | |
fp = chainEntry['sha256Fingerprint'] | |
accum['total_roots'][fp] += 1 | |
fpcode = "{} {}".format(fp,code) | |
accum['rooterrors'][fpcode] += 1 | |
return accum | |
x=reduce(countRoots, mitm_result) | |
countRoots(x,x) | |
# In[86]: | |
mitm_count_roots = mitm_data.reduce(countRoots) | |
# In[87]: | |
mitm_count_roots | |
# In[91]: | |
experiment_data.reduce(countRoots) | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment