Skip to content

Instantly share code, notes, and snippets.

@hellais
Created May 20, 2020 15:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hellais/784a8115642d2b47072bfe4e177d1c59 to your computer and use it in GitHub Desktop.
Save hellais/784a8115642d2b47072bfe4e177d1c59 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 130,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import io\n",
"import yaml\n",
"import json\n",
"from datetime import datetime, timedelta\n",
"\n",
"import pandas as pd\n",
"import psycopg2\n",
"import boto3\n",
"\n",
"import requests\n",
"from tqdm import tqdm\n",
"\n",
"from dateutil.parser import parse as parse_date\n",
"from urllib.parse import urlencode, quote, urlparse\n",
"\n",
"import requests\n",
"import lz4framed\n",
"from urllib.parse import urljoin\n",
"import gzip\n",
"import subprocess\n",
"import tarfile"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"AUTOCLAVED_BASE_URL = 'http://s3.amazonaws.com/ooni-data/autoclaved/jsonl.tar.lz4/'"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"CANNED_BASE_URL = 'http://s3.amazonaws.com/ooni-data/canned/'"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"def load_autoclaved_index(bucket_date):\n",
" r = requests.get(\n",
" urljoin(AUTOCLAVED_BASE_URL, '{}/index.json.gz'.format(bucket_date))\n",
" )\n",
" blob = r.content\n",
" return gzip.decompress(blob).split(b'\\n')"
]
},
{
"cell_type": "code",
"execution_count": 158,
"metadata": {},
"outputs": [],
"source": [
"def load_canned_index(bucket_date):\n",
" r = requests.get(\n",
" urljoin(CANNED_BASE_URL, '{}/index.json.gz'.format(bucket_date))\n",
" )\n",
" blob = r.content\n",
" return gzip.decompress(blob).split(b'\\n')[:-1]"
]
},
{
"cell_type": "code",
"execution_count": 242,
"metadata": {},
"outputs": [],
"source": [
"import copy\n",
"def flatten_measurements(ml):\n",
" flat_list = []\n",
" for m in ml:\n",
" flat_m = copy.deepcopy(m)\n",
" flat_m.update(flat_m.pop('test_keys'))\n",
" flat_m.update(flat_m.pop('annotations'))\n",
" flat_list.append(flat_m)\n",
" return flat_list"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [],
"source": [
"def decompress_tar(filename):\n",
" tar_data = b''\n",
" p = subprocess.Popen([\"lz4\", \"-d\"], stdin=subprocess.PIPE, stdout=subprocess.PIPE)\n",
" r = requests.get(urljoin(CANNED_BASE_URL, filename))\n",
" stdout_data, _ = p.communicate(input=r.content)\n",
" return tarfile.open(mode=\"r:\", fileobj=io.BytesIO(stdout_data))"
]
},
{
"cell_type": "code",
"execution_count": 204,
"metadata": {},
"outputs": [],
"source": [
"def stream_can_lines(can_filename):\n",
" bn = os.path.basename(can_filename)\n",
" if bn.endswith('.json.lz4'):\n",
" raise NotImplemented('.json.lz not supported')\n",
" tar_fd = decompress_tar(can_filename)\n",
" members = tar_fd.getmembers()\n",
" for m in members:\n",
" f = tar_fd.extractfile(m)\n",
" for line in f:\n",
" yield line"
]
},
{
"cell_type": "code",
"execution_count": 173,
"metadata": {},
"outputs": [],
"source": [
"def is_test_name(can_filename, test_name):\n",
" bn = os.path.basename(can_filename)\n",
" return bn.startswith(test_name) or '-{}-'.format(test_name) in bn"
]
},
{
"cell_type": "code",
"execution_count": 218,
"metadata": {},
"outputs": [],
"source": [
"def get_can_filenames(bucket_date):\n",
" canned_index = load_canned_index(bucket_date)\n",
" return list(map(lambda x: json.loads(x)['filename'], canned_index))"
]
},
{
"cell_type": "code",
"execution_count": 219,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 220,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 222,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 230,
"metadata": {},
"outputs": [],
"source": [
"tor_cans = []\n",
"for dt in pd.date_range(start='2020-05-01', end='2020-05-18'):\n",
" can_filenames = get_can_filenames(dt.strftime(\"%Y-%m-%d\"))\n",
" tor_cans += list(filter(lambda x: is_test_name(x, 'tor'), can_filenames))"
]
},
{
"cell_type": "code",
"execution_count": 262,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"report_id: 20200501T072936Z_AS31036_cioQ7jZe4vUmiKJoSrZjusbc8cfhSgen0vM1Z5aYcpdWtZJauc has no test_keys\n",
"report_id: 20200504T001118Z_AS4812_yA6yk698E8X8MSRVbCUvF43Ui6f54mcg7mbBvCuAZ0tDkZa38l has no test_keys\n",
"report_id: 20200514T233416Z_AS262785_HxlqfdpJD9OgxpHCSqpHr8iuEFG5eyyOnvppxKWajx9PBAlUqe has no test_keys\n"
]
}
],
"source": [
"measurement_list = []\n",
"for can_filename in tor_cans:\n",
" for line in stream_can_lines(can_filename):\n",
" msmt = json.loads(line)\n",
" if msmt['test_keys'] is None:\n",
" print('report_id: %s has no test_keys' % msmt['report_id'])\n",
" continue\n",
" measurement_list.append(msmt)"
]
},
{
"cell_type": "code",
"execution_count": 263,
"metadata": {},
"outputs": [],
"source": [
"def process_tor_measurement(msmt):\n",
" tk = msmt['test_keys']\n",
" keys = [\n",
" 'obfs4_accessible',\n",
" 'obfs4_total',\n",
" 'dir_port_accessible',\n",
" 'dir_port_total',\n",
" 'or_port_dirauth_accessible',\n",
" 'or_port_dirauth_total',\n",
" 'or_port_accessible',\n",
" 'or_port_total'\n",
" ]\n",
" assert tk is not None, msmt\n",
" res = {}\n",
" for k in keys:\n",
" res[k] = tk[k]\n",
" res['obfs4_blocked'] = res['obfs4_accessible'] <= 0 and res['obfs4_total'] > 0\n",
" res['dir_port_blocked'] = res['dir_port_accessible'] <= 0 and res['dir_port_total'] > 0\n",
" res['or_port_blocked'] = res['or_port_accessible'] <= 0 and res['or_port_total'] > 0\n",
" res['or_port_dirauth_blocked'] = res['or_port_dirauth_accessible'] <= 0 and res['or_port_dirauth_total'] > 0\n",
"\n",
" res['is_anomaly'] = (res['obfs4_blocked'] \n",
" or res['dir_port_blocked'] \n",
" or res['or_port_blocked'] \n",
" or res['or_port_dirauth_blocked'])\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 264,
"metadata": {},
"outputs": [],
"source": [
"def extract_common(m):\n",
" common = {}\n",
" common_fields = [\n",
" 'probe_cc',\n",
" 'probe_asn',\n",
" 'test_start_time',\n",
" 'report_id',\n",
" 'test_runtime',\n",
" 'test_name'\n",
" ]\n",
" for field in common_fields:\n",
" common[field] = m[field]\n",
" platform = common.get('annotations', {}).get('platform', 'unknown')\n",
" flavor = common.get('annotations', {}).get('flavor', 'na')\n",
" network = common.get('annotations', {}).get('network_type', 'na')\n",
" common['network_type'] = network\n",
" common['software_string'] = '%s/%s/%s/%s' % (platform, m['software_name'], m['software_version'],flavor)\n",
" return common"
]
},
{
"cell_type": "code",
"execution_count": 269,
"metadata": {},
"outputs": [],
"source": [
"def process(m):\n",
" res = extract_common(m)\n",
" res.update(process_tor_measurement(m))\n",
" return res"
]
},
{
"cell_type": "code",
"execution_count": 270,
"metadata": {},
"outputs": [],
"source": [
"tor_msmts = [process(m) for m in measurement_list]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 273,
"metadata": {},
"outputs": [],
"source": [
"df = pd.DataFrame(tor_msmts)"
]
},
{
"cell_type": "code",
"execution_count": 275,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>probe_cc</th>\n",
" <th>probe_asn</th>\n",
" <th>test_start_time</th>\n",
" <th>report_id</th>\n",
" <th>test_runtime</th>\n",
" <th>test_name</th>\n",
" <th>network_type</th>\n",
" <th>software_string</th>\n",
" <th>obfs4_accessible</th>\n",
" <th>obfs4_total</th>\n",
" <th>...</th>\n",
" <th>dir_port_total</th>\n",
" <th>or_port_dirauth_accessible</th>\n",
" <th>or_port_dirauth_total</th>\n",
" <th>or_port_accessible</th>\n",
" <th>or_port_total</th>\n",
" <th>obfs4_blocked</th>\n",
" <th>dir_port_blocked</th>\n",
" <th>or_port_blocked</th>\n",
" <th>or_port_dirauth_blocked</th>\n",
" <th>is_anomaly</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>29</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-01 16:17:39</td>\n",
" <td>20200501T161739Z_AS4788_zUgCKHSxXSdWvhJw2mQFHL...</td>\n",
" <td>60.007625</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>3</td>\n",
" <td>16</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>40</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-01 22:19:15</td>\n",
" <td>20200501T221915Z_AS4788_TahYaHoOZT4M6FY4l41VIs...</td>\n",
" <td>60.008787</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>115</td>\n",
" <td>CN</td>\n",
" <td>AS24139</td>\n",
" <td>2020-05-03 15:22:35</td>\n",
" <td>20200503T152235Z_AS24139_Xib89kNzgKoDm2DtkUmbR...</td>\n",
" <td>540.436591</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-desktop/3.0.0/na</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>152</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-04 13:06:10</td>\n",
" <td>20200504T130610Z_AS4788_UtqbWilD7mnwse0Hcki8i9...</td>\n",
" <td>60.009271</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>1</td>\n",
" <td>16</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>178</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-05 04:05:57</td>\n",
" <td>20200505T040557Z_AS4788_K0My9bnsOxi8prBB5RkRen...</td>\n",
" <td>60.021903</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>1</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>198</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-05 13:16:42</td>\n",
" <td>20200505T131642Z_AS4788_n15wmUm0laLhozTfysHiXV...</td>\n",
" <td>60.007794</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>7</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>208</td>\n",
" <td>ZZ</td>\n",
" <td>AS0</td>\n",
" <td>2020-05-05 17:50:46</td>\n",
" <td>20200505T175046Z_AS0_mPGczP6PQ1vz0jyxUpES12Djq...</td>\n",
" <td>377.587875</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-desktop/3.0.1/na</td>\n",
" <td>6</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>6</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>255</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-06 18:20:03</td>\n",
" <td>20200506T182003Z_AS4788_KrTrudeyJFicClNYF3jrK7...</td>\n",
" <td>60.007490</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>4</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>279</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-07 04:06:19</td>\n",
" <td>20200507T040619Z_AS4788_YiyyXIhq1sd8bzhRTPiX0O...</td>\n",
" <td>60.006055</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>3</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>280</td>\n",
" <td>ZZ</td>\n",
" <td>AS136589</td>\n",
" <td>2020-05-07 05:00:09</td>\n",
" <td>20200507T050008Z_AS136589_Cn8PayVLrbdktFJBAcJa...</td>\n",
" <td>128.019293</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-desktop/3.0.1/na</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>7</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>357</td>\n",
" <td>IR</td>\n",
" <td>AS197207</td>\n",
" <td>2020-05-08 23:33:50</td>\n",
" <td>20200508T233351Z_AS197207_dSAyWGj3wlDXYNtqA0qH...</td>\n",
" <td>415.556119</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/miniooni/0.10.0/na</td>\n",
" <td>7</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>626</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-14 17:18:06</td>\n",
" <td>20200514T171806Z_AS4788_OtTSpAo8gSAfmPfnV8IQ8j...</td>\n",
" <td>60.009639</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>7</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>693</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-15 22:18:09</td>\n",
" <td>20200515T221809Z_AS4788_JNM3XD6EBQXJaQ78k6nrkB...</td>\n",
" <td>60.009084</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>4</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>737</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-16 21:17:01</td>\n",
" <td>20200516T211701Z_AS4788_WMK4Pc7DhUYyPko2TYmFJo...</td>\n",
" <td>60.014098</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>3</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>758</td>\n",
" <td>CN</td>\n",
" <td>AS24139</td>\n",
" <td>2020-05-17 09:41:32</td>\n",
" <td>20200517T094132Z_AS24139_QS3JAlv0yXH6NBGFCn5rb...</td>\n",
" <td>511.649366</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-desktop/3.0.0/na</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>763</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-17 13:06:00</td>\n",
" <td>20200517T130600Z_AS4788_tyZ88F7WsdJEffRLMPGU65...</td>\n",
" <td>60.014280</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>3</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>764</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-17 14:18:24</td>\n",
" <td>20200517T141824Z_AS4788_x2CM59oDjGZPwI6vPeXmUH...</td>\n",
" <td>60.012980</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>5</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <td>802</td>\n",
" <td>MY</td>\n",
" <td>AS4788</td>\n",
" <td>2020-05-18 08:06:07</td>\n",
" <td>20200518T080607Z_AS4788_D17iQWIh27W7HnMhrLNi2z...</td>\n",
" <td>60.026804</td>\n",
" <td>tor</td>\n",
" <td>na</td>\n",
" <td>unknown/ooniprobe-cli/3.0.0-rc.9/na</td>\n",
" <td>0</td>\n",
" <td>15</td>\n",
" <td>...</td>\n",
" <td>10</td>\n",
" <td>3</td>\n",
" <td>10</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>18 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" probe_cc probe_asn test_start_time \\\n",
"29 MY AS4788 2020-05-01 16:17:39 \n",
"40 MY AS4788 2020-05-01 22:19:15 \n",
"115 CN AS24139 2020-05-03 15:22:35 \n",
"152 MY AS4788 2020-05-04 13:06:10 \n",
"178 MY AS4788 2020-05-05 04:05:57 \n",
"198 MY AS4788 2020-05-05 13:16:42 \n",
"208 ZZ AS0 2020-05-05 17:50:46 \n",
"255 MY AS4788 2020-05-06 18:20:03 \n",
"279 MY AS4788 2020-05-07 04:06:19 \n",
"280 ZZ AS136589 2020-05-07 05:00:09 \n",
"357 IR AS197207 2020-05-08 23:33:50 \n",
"626 MY AS4788 2020-05-14 17:18:06 \n",
"693 MY AS4788 2020-05-15 22:18:09 \n",
"737 MY AS4788 2020-05-16 21:17:01 \n",
"758 CN AS24139 2020-05-17 09:41:32 \n",
"763 MY AS4788 2020-05-17 13:06:00 \n",
"764 MY AS4788 2020-05-17 14:18:24 \n",
"802 MY AS4788 2020-05-18 08:06:07 \n",
"\n",
" report_id test_runtime \\\n",
"29 20200501T161739Z_AS4788_zUgCKHSxXSdWvhJw2mQFHL... 60.007625 \n",
"40 20200501T221915Z_AS4788_TahYaHoOZT4M6FY4l41VIs... 60.008787 \n",
"115 20200503T152235Z_AS24139_Xib89kNzgKoDm2DtkUmbR... 540.436591 \n",
"152 20200504T130610Z_AS4788_UtqbWilD7mnwse0Hcki8i9... 60.009271 \n",
"178 20200505T040557Z_AS4788_K0My9bnsOxi8prBB5RkRen... 60.021903 \n",
"198 20200505T131642Z_AS4788_n15wmUm0laLhozTfysHiXV... 60.007794 \n",
"208 20200505T175046Z_AS0_mPGczP6PQ1vz0jyxUpES12Djq... 377.587875 \n",
"255 20200506T182003Z_AS4788_KrTrudeyJFicClNYF3jrK7... 60.007490 \n",
"279 20200507T040619Z_AS4788_YiyyXIhq1sd8bzhRTPiX0O... 60.006055 \n",
"280 20200507T050008Z_AS136589_Cn8PayVLrbdktFJBAcJa... 128.019293 \n",
"357 20200508T233351Z_AS197207_dSAyWGj3wlDXYNtqA0qH... 415.556119 \n",
"626 20200514T171806Z_AS4788_OtTSpAo8gSAfmPfnV8IQ8j... 60.009639 \n",
"693 20200515T221809Z_AS4788_JNM3XD6EBQXJaQ78k6nrkB... 60.009084 \n",
"737 20200516T211701Z_AS4788_WMK4Pc7DhUYyPko2TYmFJo... 60.014098 \n",
"758 20200517T094132Z_AS24139_QS3JAlv0yXH6NBGFCn5rb... 511.649366 \n",
"763 20200517T130600Z_AS4788_tyZ88F7WsdJEffRLMPGU65... 60.014280 \n",
"764 20200517T141824Z_AS4788_x2CM59oDjGZPwI6vPeXmUH... 60.012980 \n",
"802 20200518T080607Z_AS4788_D17iQWIh27W7HnMhrLNi2z... 60.026804 \n",
"\n",
" test_name network_type software_string \\\n",
"29 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"40 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"115 tor na unknown/ooniprobe-desktop/3.0.0/na \n",
"152 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"178 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"198 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"208 tor na unknown/ooniprobe-desktop/3.0.1/na \n",
"255 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"279 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"280 tor na unknown/ooniprobe-desktop/3.0.1/na \n",
"357 tor na unknown/miniooni/0.10.0/na \n",
"626 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"693 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"737 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"758 tor na unknown/ooniprobe-desktop/3.0.0/na \n",
"763 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"764 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"802 tor na unknown/ooniprobe-cli/3.0.0-rc.9/na \n",
"\n",
" obfs4_accessible obfs4_total ... dir_port_total \\\n",
"29 3 16 ... 10 \n",
"40 1 16 ... 10 \n",
"115 0 16 ... 10 \n",
"152 1 16 ... 10 \n",
"178 1 15 ... 10 \n",
"198 7 15 ... 10 \n",
"208 6 15 ... 10 \n",
"255 4 15 ... 10 \n",
"279 3 15 ... 10 \n",
"280 0 15 ... 10 \n",
"357 7 15 ... 10 \n",
"626 7 15 ... 10 \n",
"693 4 15 ... 10 \n",
"737 3 15 ... 10 \n",
"758 0 15 ... 10 \n",
"763 3 15 ... 10 \n",
"764 5 15 ... 10 \n",
"802 0 15 ... 10 \n",
"\n",
" or_port_dirauth_accessible or_port_dirauth_total or_port_accessible \\\n",
"29 0 10 0 \n",
"40 0 10 0 \n",
"115 0 10 0 \n",
"152 0 10 0 \n",
"178 0 10 0 \n",
"198 0 10 0 \n",
"208 6 10 0 \n",
"255 0 10 0 \n",
"279 0 10 0 \n",
"280 7 10 0 \n",
"357 0 10 0 \n",
"626 0 10 0 \n",
"693 0 10 0 \n",
"737 0 10 0 \n",
"758 0 10 0 \n",
"763 0 10 0 \n",
"764 0 10 0 \n",
"802 3 10 0 \n",
"\n",
" or_port_total obfs4_blocked dir_port_blocked or_port_blocked \\\n",
"29 0 False False False \n",
"40 0 False True False \n",
"115 0 True True False \n",
"152 0 False False False \n",
"178 0 False False False \n",
"198 0 False False False \n",
"208 0 False True False \n",
"255 0 False False False \n",
"279 0 False False False \n",
"280 0 True False False \n",
"357 0 False True False \n",
"626 0 False False False \n",
"693 0 False False False \n",
"737 0 False True False \n",
"758 0 True True False \n",
"763 0 False False False \n",
"764 0 False False False \n",
"802 0 True False False \n",
"\n",
" or_port_dirauth_blocked is_anomaly \n",
"29 True True \n",
"40 True True \n",
"115 True True \n",
"152 True True \n",
"178 True True \n",
"198 True True \n",
"208 False True \n",
"255 True True \n",
"279 True True \n",
"280 False True \n",
"357 True True \n",
"626 True True \n",
"693 True True \n",
"737 True True \n",
"758 True True \n",
"763 True True \n",
"764 True True \n",
"802 False True \n",
"\n",
"[18 rows x 21 columns]"
]
},
"execution_count": 275,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\n",
" df['is_anomaly'] == True\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tar_fd = decompress_tar(tor_cans['filename'])"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [],
"source": [
"can = json.loads(canned_index[0])"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"b'{\"file_crc32\": 215810753, \"file_sha1\": \"H13OP/FpPNAlNkXOsJ7tbtr/wFs=\", \"file_size\": 12117855, \"filename\": \"2020-04-01/20200401T171548Z-MX-AS6503-web_connectivity-20200401T171549Z_AS6503_D4cfRYRFM2R7OBOnIhBKl11fLpMIpmORa5Gd2MLdkVIn4QWOF0-0.2.0-probe.json.lz4\", \"text_crc32\": 2120001983, \"text_sha1\": \"rhTGWN8tvt0s4glepwUvxCKICLE=\", \"text_size\": 66878062, \"textname\": \"2020-04-01/20200401T171548Z-MX-AS6503-web_connectivity-20200401T171549Z_AS6503_D4cfRYRFM2R7OBOnIhBKl11fLpMIpmORa5Gd2MLdkVIn4QWOF0-0.2.0-probe.json\"}'"
]
},
"execution_count": 161,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"canned_index[-1]"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 163,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['2020-04-01/web_connectivity.02.tar.lz4',\n",
" '2020-04-01/web_connectivity.24.tar.lz4',\n",
" '2020-04-01/psiphon.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.54.tar.lz4',\n",
" '2020-04-01/web_connectivity.16.tar.lz4',\n",
" '2020-04-01/web_connectivity.21.tar.lz4',\n",
" '2020-04-01/web_connectivity.49.tar.lz4',\n",
" '2020-04-01/web_connectivity.32.tar.lz4',\n",
" '2020-04-01/web_connectivity.01.tar.lz4',\n",
" '2020-04-01/web_connectivity.46.tar.lz4',\n",
" '2020-04-01/web_connectivity.58.tar.lz4',\n",
" '2020-04-01/http_requests.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.19.tar.lz4',\n",
" '2020-04-01/whatsapp.0.tar.lz4',\n",
" '2020-04-01/ndt.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.40.tar.lz4',\n",
" '2020-04-01/example_with_input_non_interruptible.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.08.tar.lz4',\n",
" '2020-04-01/web_connectivity.45.tar.lz4',\n",
" '2020-04-01/example_with_failure.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.26.tar.lz4',\n",
" '2020-04-01/web_connectivity.30.tar.lz4',\n",
" '2020-04-01/web_connectivity.42.tar.lz4',\n",
" '2020-04-01/facebook_messenger.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.00.tar.lz4',\n",
" '2020-04-01/web_connectivity.59.tar.lz4',\n",
" '2020-04-01/web_connectivity.03.tar.lz4',\n",
" '2020-04-01/web_connectivity.33.tar.lz4',\n",
" '2020-04-01/web_connectivity.36.tar.lz4',\n",
" '2020-04-01/web_connectivity.11.tar.lz4',\n",
" '2020-04-01/web_connectivity.18.tar.lz4',\n",
" '2020-04-01/tor.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.48.tar.lz4',\n",
" '2020-04-01/web_connectivity.41.tar.lz4',\n",
" '2020-04-01/web_connectivity.17.tar.lz4',\n",
" '2020-04-01/web_connectivity.44.tar.lz4',\n",
" '2020-04-01/web_connectivity.50.tar.lz4',\n",
" '2020-04-01/web_connectivity.12.tar.lz4',\n",
" '2020-04-01/web_connectivity.22.tar.lz4',\n",
" '2020-04-01/web_connectivity.37.tar.lz4',\n",
" '2020-04-01/web_connectivity.14.tar.lz4',\n",
" '2020-04-01/web_connectivity.53.tar.lz4',\n",
" '2020-04-01/web_connectivity.09.tar.lz4',\n",
" '2020-04-01/meek_fronted_requests_test.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.04.tar.lz4',\n",
" '2020-04-01/http_invalid_request_line.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.38.tar.lz4',\n",
" '2020-04-01/web_connectivity.34.tar.lz4',\n",
" '2020-04-01/web_connectivity.27.tar.lz4',\n",
" '2020-04-01/tcp_connect.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.23.tar.lz4',\n",
" '2020-04-01/web_connectivity.06.tar.lz4',\n",
" '2020-04-01/web_connectivity.52.tar.lz4',\n",
" '2020-04-01/web_connectivity.47.tar.lz4',\n",
" '2020-04-01/web_connectivity.15.tar.lz4',\n",
" '2020-04-01/web_connectivity.57.tar.lz4',\n",
" '2020-04-01/dash.0.tar.lz4',\n",
" '2020-04-01/example.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.29.tar.lz4',\n",
" '2020-04-01/web_connectivity.51.tar.lz4',\n",
" '2020-04-01/web_connectivity.13.tar.lz4',\n",
" '2020-04-01/web_connectivity.05.tar.lz4',\n",
" '2020-04-01/web_connectivity.55.tar.lz4',\n",
" '2020-04-01/web_connectivity.10.tar.lz4',\n",
" '2020-04-01/telegram.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.35.tar.lz4',\n",
" '2020-04-01/http_header_field_manipulation.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.28.tar.lz4',\n",
" '2020-04-01/web_connectivity.07.tar.lz4',\n",
" '2020-04-01/web_connectivity.39.tar.lz4',\n",
" '2020-04-01/vanilla_tor.0.tar.lz4',\n",
" '2020-04-01/web_connectivity.56.tar.lz4',\n",
" '2020-04-01/web_connectivity.31.tar.lz4',\n",
" '2020-04-01/web_connectivity.25.tar.lz4',\n",
" '2020-04-01/web_connectivity.20.tar.lz4',\n",
" '2020-04-01/web_connectivity.43.tar.lz4',\n",
" '2020-04-01/ndt.1.tar.lz4',\n",
" '2020-04-01/20200401T000308Z-BR-AS28573-web_connectivity-20200401T000309Z_AS28573_XdEoZ8HepiUVK0DnDOIzjZ1OdumX1QnnXV24kOvk5rITIpwIJZ-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T001103Z-TW-AS9916-web_connectivity-20200401T001103Z_AS9916_zpAZgF7TXukWW9o52cK4n11enYIept6PnUZ6jq5h5uuXk3Pzv0-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T001733Z-RU-AS42668-web_connectivity-20200401T001733Z_AS42668_t0iAqk5kFHycbKemNcU0pJb5RRrWcK9DygcTDWf5wcufQtgpqN-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T003237Z-US-AS46562-web_connectivity-20200401T003237Z_AS46562_zQYLJIthte8Hj8GhAVXo1kc75ClVbFQrGAaJx0gC3glyhfzLeH-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T005246Z-ZZ-AS0-web_connectivity-20200401T005247Z_AS0_dd5qdEqgcvWwW8iaTx511FuGIxbvYTUeiXHjW4dFfcwOqjhlwu-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T005748Z-ZZ-AS0-web_connectivity-20200401T005750Z_AS0_5ZiqRg1TCCCo5jShv5E8cTqFHmUYB65mV3QYrdwnArc6ZSLUrd-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T011841Z-BR-AS28573-web_connectivity-20200401T011842Z_AS28573_7E9X0CZgNVjiOstlLxw3vhPwUVb0AUu9iKBP8hveqSiBAobKmC-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T012854Z-ZZ-AS0-web_connectivity-20200401T012855Z_AS0_ulZ5jlRGb4wAZveyZn9xaoXH84djmDQmJbMnHaU32fMeuDAsA0-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T013148Z-SC-AS0-web_connectivity-20200401T013149Z_AS0_NrNcierFoUbrMyLEWA66xK3Kok3IKaWXl5eIP0wCzJSV6WX0m9-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T014625Z-US-AS22773-web_connectivity-20200401T014625Z_AS22773_95JRITq1k3AnmojGorT7p54lAI3b1fZhVb7LVZTyn4yd6IpNjJ-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T015020Z-ES-AS29119-web_connectivity-20200401T015020Z_AS29119_UNmAXuFgS7xqbf2eJNN8MWr3BcHx9oljy8v12QGKBMv6IVUMnL-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T021759Z-US-AS2914-web_connectivity-20200401T021800Z_AS2914_eHAnMMFCdWC5Uo92jstQfHdyV9esDl5Rht7ILXGpXs6NHnVrpu-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T021816Z-GR-AS6799-web_connectivity-20200401T021816Z_AS6799_OQhe5wtYlWu2zTiy77AjiMCBouRvLnaoTkcYbEZjDGfQWJioNy-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T022111Z-US-AS11427-web_connectivity-20200401T022111Z_AS11427_wFY92Rbtt965hsf6A5PAvLAiJvlNxiRAu4u9cSmGRXhIhvhBxw-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T022637Z-AU-AS1221-web_connectivity-20200401T022638Z_AS1221_hqVhnB0xm1oEzgbWGQDPyU1uxJi2WUEbT5zjKSU3WKHzJH7c1t-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200331T200708Z-US-AS7922-web_connectivity-20200401T210301Z_AS7922_FxZ9Nos4bd5ZzVq1JOLMPTuEE42S5r0Kxi35trHWYCVdMNFfFc-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200331T223411Z-US-AS21928-web_connectivity-20200331T223416Z_AS21928_kppdowoa4Iaj4B8ZeGKaqWRqlWul0DEpjzybu2Sv2J69bSzk60-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T025408Z-RU-AS8427-web_connectivity-20200401T025408Z_AS8427_71MW3dONciQecxJOLTMleR3GgPdpW8vu7gS0uAWTEWvbrfsJFT-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T025728Z-RU-AS3239-web_connectivity-20200401T025729Z_AS3239_mTqbErVLler5P6XBCInYK9SfBWVwb9lKoA4ZBtJqHa8nB1oQRb-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T030758Z-RU-AS41661-web_connectivity-20200401T030759Z_AS41661_e7V20QoLcChfE9BaqU82DCJKJG7rpFc15EtoxICxstlgSEk0zO-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T033145Z-RU-AS8369-web_connectivity-20200401T033146Z_AS8369_OoUVMKIqmo8xRgclgMyX6Syr5rOY4ikzswvNOtUhmwHzw4NWWh-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T034635Z-RU-AS41682-web_connectivity-20200401T034636Z_AS41682_X1K1XmWwl1fQwpka9e6PqPxckChdb6STtOJSmEggQY5ERcB6LP-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035009Z-RU-AS8369-web_connectivity-20200401T035009Z_AS8369_q8Oq96VbMcSsqL3izQwNVjPtO7sKHrJxCnKCIk8YF9pvLzEXF3-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035151Z-RU-AS8369-web_connectivity-20200401T035154Z_AS8369_O3d3ToGQFFekMAg3HDU1WPSYpoOxtiYnQzz4YOFLi2qkZDB1Qn-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035316Z-RU-AS8369-web_connectivity-20200401T035317Z_AS8369_BmwLtK79zXrTKnHBc9SgfhqCdiqDfM39T0Bf5tipPRV1zfO6qp-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035323Z-RU-AS8369-web_connectivity-20200401T035324Z_AS8369_FXq4RipTj2zyN068ZDtRcEAxgspU1NsagVFU9p8GIodsYbNErv-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035334Z-RU-AS24588-web_connectivity-20200401T035335Z_AS24588_H1Pl5DXKprk4XbC3yV3seFE1ErJThU02DKHwhb0AvZmaJOAZUt-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035503Z-RU-AS28890-web_connectivity-20200401T035504Z_AS28890_kQGtBW6ihY5AhCdYuEUnOPm3eNoHGkl0CXbISZt2Kh3lTOrJtM-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035811Z-RU-AS8369-web_connectivity-20200401T035811Z_AS8369_JipDVbJLOJhcexUh7w68pK2C1j6gHvaCIuomVpIkFr5z1EUOvT-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T040313Z-RU-AS8369-web_connectivity-20200401T040314Z_AS8369_ajuTYfsMsW4BPkj0KsuymbhWXAQeNPJUhXmJ94vF5kjJg8mdS3-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T040701Z-RU-AS51604-web_connectivity-20200401T040702Z_AS51604_sUMd1bjQOZNl9EaiMNEiftR0z9t7x1AEURh5dFzl7rSjeqLYjQ-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T041011Z-RU-AS41661-web_connectivity-20200401T041012Z_AS41661_0Ji5lCZlkRsBFBrDbSIrqhDEbJ2wOLkpJQFMegXhc1KpBmlnZO-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T050300Z-RO-AS8708-web_connectivity-20200401T050301Z_AS8708_P1Nhvr5DF83YUQOuwNxD6CSULfnY5Ah5YN9y3qvrgIB6KuokOu-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T051916Z-TW-AS3462-web_connectivity-20200401T051916Z_AS3462_qEMhKXUDPcbSX3aRy6HgjMoaSOYsIwJMw6PnR6MvtoxDUqNqAr-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T054458Z-RU-AS24588-web_connectivity-20200401T054459Z_AS24588_FFVKzoGx1ToNG4g7NsebHjmf1SDW2HHA1cot92xJ5FOLU4yyuS-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T062327Z-RU-AS8369-web_connectivity-20200401T062327Z_AS8369_gnTz9MUJzcp2Mmnr1g1NSAXKNX1NMzq7TxOt6qZTQTTRUhLPZs-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T080404Z-ES-AS12715-web_connectivity-20200401T080405Z_AS12715_icSuNfKXMHGhEGg7xb2kredEZe8rdqQU245vmF51I1SZteLLPP-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T091719Z-FR-AS12876-web_connectivity-20200401T091720Z_AS12876_TYYbgaQWxz7qCKTgI84z9EHJ8cKsvc1zPwyqmr5weSgyAX6MLf-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T093124Z-FR-AS3215-web_connectivity-20200401T093124Z_AS3215_Ax2XRehK4Ejx9Lb6jpA51hmPc0rkfOHxm7OkfdpmKp25LCQ8BE-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T101655Z-DE-AS8881-web_connectivity-20200401T101656Z_AS8881_5bXmSz2ztYyJbbqwxHmaIy9u8wu9NZ6pAJTEAVLEixI1JcQQyI-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T101655Z-DE-AS8881-web_connectivity-20200401T212937Z_AS8881_S9ElHvF9gUET8PNfGzRtVqEuZbkWQ5jQckri9qsnXlgyvoApKY-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T110024Z-US-AS209-web_connectivity-20200401T110028Z_AS209_YJr0aYdtn0hnXfHqxougmo8Os2LtGdJdwHPYGA7cU5dRUD823w-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T110200Z-US-AS2637-web_connectivity-20200401T110204Z_AS2637_PBodQrzEiiQQNnesOJ93LPH1fYIcGVMeFxGkfgdfAYArtyOSNx-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T115126Z-ES-AS3352-web_connectivity-20200401T115127Z_AS3352_IzeXNSvpqbzurqAceJpbdaXHltYVh0gHwL37v9M3lHG7PHyF7D-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T161747Z-MX-AS6503-web_connectivity-20200401T161748Z_AS6503_2FwSadKHetKQHTItG1Dd9rvqntfguHFy1DyHfdz1b90oeUEwQs-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T162021Z-RO-AS8708-web_connectivity-20200401T162021Z_AS8708_Gz7TY4KgB9nbYSPBA5GCOfuhfsLF5vU5Q6W2THDumFe6NWJ96s-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T164201Z-US-AS12271-web_connectivity-20200401T164202Z_AS12271_3icNoyaywuCpmKrTxVHPi6JMsFjSrEH7oZTUUpgR6BOUDDATRV-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T210903Z-GB-AS2856-web_connectivity-20200401T210904Z_AS2856_mvMBHCbjRzAbzdwEfbairuzDagSeniUfE7cqgf2EkDnCzwHPgW-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000033Z-IT-AS137-web_connectivity-20200401T000034Z_AS137_Yj1zXfIyT13wscsB78HtvSJhnJ3vMsxb9PiDRfY1AXqPGAikaX-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000058Z-US-AS7922-web_connectivity-20200401T000059Z_AS7922_mSIoqRVjqbLAyjfY2judx6RCEaeSjbBv68ugI8Z2AhA4qjyo1T-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000100Z-US-AS7922-web_connectivity-20200401T000101Z_AS7922_R1nvcHzZ2IeXvQ8zZlBxKqNAkiMbtaVJwdSzkG6G25w6VPcY3Y-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200331T192825Z-BR-AS28573-web_connectivity-20200331T192831Z_AS28573_cOn3AuQfgqAruG78x9rOM7JDQOpmOSgJb3O7HrVbhZn1DamRHK-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200331T220300Z-US-AS21928-web_connectivity-20200331T220303Z_AS21928_I9VvWHgkWPIy7uiB4rCLHvxUCRDBQ9LLJQP0X69ekNEnexWogR-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200331T232327Z-HU-AS20845-web_connectivity-20200331T232328Z_AS20845_NAUEMdA64VQS9HpavH4fJIHhCIgQj85FXKydSzF1sFObnmdOFZ-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000032Z-RU-AS41661-web_connectivity-20200401T000033Z_AS41661_oGJvUmECWPFhU5l0PoNdLTxEV9E4cWxUy0UJOtyzqFuo9qCJHO-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000040Z-AT-AS679-web_connectivity-20200401T000040Z_AS679_Dqjz8dUGGVqZeZ0Po6zV80WNSR7okNCsojh9WVpvmzvJuUr3ub-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000040Z-RU-AS41682-web_connectivity-20200401T000041Z_AS41682_OWM2E2vVxQZRRyGnXEN43gJJOSmaIcvRDIpfOEZtDgsDkAGBgj-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000055Z-RU-AS51570-web_connectivity-20200401T000056Z_AS51570_DVZQ0YW1afTYAT0ncSXRiFYixodPGA3LUrMG0ggho0qS1x4cTZ-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000102Z-GB-AS5413-web_connectivity-20200401T000103Z_AS5413_CKyokzfKLYLtlrk1eQKsSHe8SN9wRWHDhz6xiCVUEOMn7MSzGd-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000149Z-RU-AS41733-web_connectivity-20200401T000151Z_AS41733_pYZkP0mxITdkI28jZC6R5Mb5JfmT1hGuZ78nlNJJWRCbIeTmL4-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T000454Z-ZZ-AS0-web_connectivity-20200401T000455Z_AS0_NDJpTtmMbOFPEPE7I7rbvd5eoYbBMXJpG3Ku5l5DaJ8L1CEqt8-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T001932Z-ZZ-AS0-web_connectivity-20200401T001933Z_AS0_o9d9s3xiuQ3N34V4nm76WsCSUQWYpdUEBMmG2dFbImfhQR5fGp-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T002146Z-GB-AS0-web_connectivity-20200401T002146Z_AS0_TD2arEPwtHZMLqzn9St0jN55twW8j4OAyJvH5qX19SGhS2p0jh-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T002309Z-ZZ-AS0-web_connectivity-20200401T002310Z_AS0_nB0mZWTftJiJ4ARvsVfw1Xyy5Y6fVOxgg40G9j7oC82gnpa26r-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T003014Z-ZZ-AS0-web_connectivity-20200401T003014Z_AS0_p8UW1Xx0sTS5OOuO81EOUlrbuhGTPrNMMwLtpS1x1BcKdJ9P5G-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T005346Z-ML-AS30985-web_connectivity-20200401T005347Z_AS30985_R4kcqx1iyYxXaGNc8HVraAm96Ua5KCOiPB5vKEXU32PS7UFj2R-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T012845Z-ZZ-AS0-web_connectivity-20200401T012845Z_AS0_OArr7dV1aaAUftXSwJ4QnWiy7MyJdKce3Jodkm1i0AMvGglM3K-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T012850Z-ZZ-AS0-web_connectivity-20200401T012850Z_AS0_H5dZdot0xfEDJuL7GzmPPpV6SoGoy5opzmyyzhZCB4sn4zfQHM-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T014102Z-CA-AS1403-web_connectivity-20200401T014103Z_AS1403_4wwKvcA8RFe2hCq17MLOBvgiAbiMi17DLKWimDCUcbv9jEAaOF-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T014617Z-ZZ-AS0-web_connectivity-20200401T014618Z_AS0_YVEIC1P6vdVIbk1eklP2MzBNTWe7hotuBvWC1nSEbwDES47asz-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T014842Z-BR-AS14868-web_connectivity-20200401T014845Z_AS14868_Otg0fc5xMJfLmkp1lOfsiMMdSqlJ2eLrbmIZJ3w6u6e3HGo4xA-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T015459Z-ZZ-AS0-web_connectivity-20200401T015501Z_AS0_JUNwjvYAZaxMGJQ1DWBW6e1AX80X6Et6oD2EIVTfQfoZIrrILI-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T015554Z-CA-AS5645-web_connectivity-20200401T015555Z_AS5645_kjeHhF9nnG9FM4sVposm3Ua1SLOUhM7i4p0gzdJ1W3hGD1o7xI-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T020240Z-ZZ-AS0-web_connectivity-20200401T020241Z_AS0_3FScW7afW2zON1yDEoFp10xBtEXYBhRq4SXgtDyIXfHZsYYtZj-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T020829Z-AR-AS16814-web_connectivity-20200401T020830Z_AS16814_il43pfi773WEy69hfzrEeR1tDQeq3RrxLIbwEoeInGV3dZL6oY-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T021716Z-ZZ-AS0-web_connectivity-20200401T021716Z_AS0_tClBmv2LVKEfAlKNt79Qv8DG5ZdU9dpMTbeMw48O6yakCZKzJp-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T021731Z-ZZ-AS0-web_connectivity-20200401T021732Z_AS0_ZsFv4HGSNvDAIIJwy5ya99Lkq3QOJwEuCQDi3qES8hFeUL7Qxm-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T021758Z-PK-AS23674-web_connectivity-20200401T021801Z_AS23674_zzwRETno8YbFn7tggQfSF2x1UJQCu3yH4Z7rmEqDC4qPBp20gL-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T023513Z-US-AS701-web_connectivity-20200401T023515Z_AS701_salGDP6qOXqiduPFMeV2bWDyvXWK3q1LMJwzw71IBjCrXfHgTx-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T024205Z-US-AS7922-web_connectivity-20200401T024207Z_AS7922_Fs4Td0W1qJ1N9hW9kCkYoLE83p2CLtd8x9OLzrpGv5HNgq4eAG-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T024205Z-US-AS7922-web_connectivity-20200401T181522Z_AS7922_C9oJq886fUsZunGQQOcJZTwADBajmQsr1XdxmgKr5jDT5BmEtT-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T024215Z-BR-AS14868-web_connectivity-20200401T024216Z_AS14868_2CNPfnxDotvbeoLKnVtGEoHFiLGRV2D5eELTvsuwbHSIRDISoF-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T024215Z-BR-AS14868-web_connectivity-20200401T154320Z_AS14868_eNI6ttLAQQ7HXSQAFVSlVVm5ighiEmgduD0kIdcdbHe8tW3L8u-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T030059Z-RU-AS41661-web_connectivity-20200401T030100Z_AS41661_NCwL83RkxIevpGulC0BNeBQdAj1KOlyDxbCHLEBwbEKJAFGoAI-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T030348Z-RU-AS41661-web_connectivity-20200401T030349Z_AS41661_137QGTlIWRnuuSYWXnmlVxh8g6IaeCXoK6sGEAl7XbrJZwvXqe-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T034848Z-RU-AS41682-web_connectivity-20200401T034848Z_AS41682_Y1jM18KK5TVo5R0WmTX7xtyG2REzmlB4AvrWsVCV1bfhTfqSKO-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035233Z-RU-AS56330-web_connectivity-20200401T035234Z_AS56330_6yva4BHT6lgPYJwfW57LhJjBpUF5V66fX1f3OLQPLIFAOGb6Qq-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035341Z-RU-AS8369-web_connectivity-20200401T035342Z_AS8369_dCRLLP78gQXFazd4KjSWg6qOR8ByFlnLDG2NBHZaU8fo0rK5DD-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035505Z-RU-AS8369-web_connectivity-20200401T035506Z_AS8369_xsh4MTdVIA8xj4p5I2G7Z3ZaZ8mttJgXadY8ce0ysRacJggm6v-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035606Z-RU-AS56377-web_connectivity-20200401T035607Z_AS56377_9mixE8Za1ncPfbCYdnlTSy79YoDj0TOEREUlliq8YxSTgy0NDV-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035639Z-RU-AS41661-web_connectivity-20200401T035641Z_AS41661_wY1p6y9JJ7bjatoYRK19QIaUaDl2BWRSXZZ7uK0ju8igQE7XnY-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T035753Z-RU-AS51035-web_connectivity-20200401T035754Z_AS51035_2VviT1iMLZ9AEfGXEPU6yRH6hw8jaau6EKlPDLLs3FWPgpSNpU-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T040221Z-RU-AS8369-web_connectivity-20200401T040222Z_AS8369_3OdZohu1hFpMUO3Cn2sDH9l8t1PJeouk8zcj53sltOvCLD1YRT-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T040824Z-RU-AS48642-web_connectivity-20200401T040824Z_AS48642_K1fKTkoXq81nqvKyT1zIu8xx2EThFYkcv7cxLR5yWL71kFKG0n-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T042605Z-RU-AS8369-web_connectivity-20200401T042606Z_AS8369_ssE79lbAQTILUkOsSoc1qUUBtcH9oDccLkvHmxxDceQQqko1S4-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T050149Z-RU-AS41691-web_connectivity-20200401T050149Z_AS41691_tcWOa8xnUjR8HQIgUc346ulpPfCaLnWYJv3TqjXayItw50wrlm-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T050746Z-US-AS714-web_connectivity-20200401T050748Z_AS714_BJOGQJnisziYRxJbZvQTry4WdV8AhNeU6QeNheTP94qn2hXFJ5-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T054927Z-RU-AS8427-web_connectivity-20200401T054928Z_AS8427_vvzi3g0uICmxi9x0xcnYECouZUPHjRzOHHvE9RxL7zXdcvNwI4-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T055633Z-RU-AS39289-web_connectivity-20200401T055634Z_AS39289_dtWzRyd5mSjkq0bmA7BQ19GaD0VFXA1aZPyrZdzjg3TNKkBRQA-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T060601Z-ZW-AS37123-web_connectivity-20200401T060603Z_AS37123_PcezhvzMSD8eMIMcQUczUHihSmmuZSlv5fOjX7xUU9M35Kugde-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T071756Z-IT-AS12874-web_connectivity-20200401T071805Z_AS12874_arJc3saBZTykqgRk3MwSoNmEm52LKyUcFKItDxoJimHS7KCq47-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T072626Z-DE-AS3209-web_connectivity-20200401T072627Z_AS3209_mwzVbXzLa7ySpdRoL9bT4mercvP2ZKt7Hc7TV3zBC8zSYI3FIt-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T072636Z-HU-AS8448-web_connectivity-20200401T072637Z_AS8448_hNMEFkoL2LhQBd9lRiTKObKGzX3go6MpA6zNOQnEueRL1H3f2H-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T074632Z-RU-AS42610-web_connectivity-20200401T074633Z_AS42610_ctGROe04P77l3uEExn0Ci4n6oL7xIC1VZ0QC6H87RnRtO7ZYap-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T084239Z-ZA-AS5713-web_connectivity-20200401T084240Z_AS5713_3BU0YcUKaX7WzCtaJyYwLG4VGy2CrWzu9Q2BbpdZV571Bhwoux-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T084239Z-ZA-AS5713-web_connectivity-20200401T210425Z_AS5713_4223cRZH04sjMK6AoR3HgcmUHB11LvLXYNDU0tMCJXY5PqEp0d-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T110215Z-RU-AS24955-web_connectivity-20200401T110216Z_AS24955_uw3Z7B3EhTL1dR3nqjwCjVdXabnFSgJu0Iab6FdpTlR1QQX5dw-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T130522Z-BR-AS28573-web_connectivity-20200401T130524Z_AS28573_kkHPyOvyXE9R8XJ1JtGoFhEGhe8fD4w9XBad5japC8Y9baIc0A-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T140533Z-BR-AS28573-web_connectivity-20200401T140534Z_AS28573_fVU4N85iX3UrAv6yXrHoSIQT4mJllrc8blAiQpHV0ZBD9DCEB3-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T140605Z-RU-AS24955-web_connectivity-20200401T140606Z_AS24955_uk3RnqceTqHrWPwmCNh5LxoDsB2ALU8vedPcraj7SiQOVLAxSv-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T150618Z-RU-AS42610-web_connectivity-20200401T150619Z_AS42610_O3GpB1jLULkVGFlwVomGgV5TySuEd8n5Msivfaquau4beqJFE1-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T201537Z-ES-AS12357-web_connectivity-20200401T201540Z_AS12357_u9z2T1RPtEa2ELwYs1AbBk0VbjrhbQb5p4LrlvrlPDPF0hgcWZ-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T210225Z-BR-AS28573-web_connectivity-20200401T210230Z_AS28573_9yZBWqRndQemLokaEM42T8doUpWvlllofK9Xo7EW86s3yJJDUt-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T004141Z-RU-AS41682-web_connectivity-20200401T004142Z_AS41682_0SEeA2nWo4WjXD48Hd8959XBos6cbi7NtL9WRh8lADDn6oFKTC-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T004231Z-RU-AS41733-web_connectivity-20200401T004231Z_AS41733_iu3I0Il3tptmGYiBfg3OPQuEtpFRoMwVZ8FZYqi8kEo5ZDV9Gh-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T004306Z-RU-AS51570-web_connectivity-20200401T004307Z_AS51570_RbkZQGRqELp3vx5tl3TMTzHGWezKEBews6MnLKqqDZfqBzAVtX-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T031517Z-AR-AS16814-web_connectivity-20200401T031518Z_AS16814_voo2r7lb5avS86Ms4JWsS7D9qi0g9RZmH67N7rIm9NgfaMGgUr-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T034424Z-RU-AS3239-web_connectivity-20200401T034424Z_AS3239_ZK3cX250N8LLn66QGZoNuzBhy9OxgcslEW12bPCq7UCnV7MMN5-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T034947Z-RU-AS41661-web_connectivity-20200401T034948Z_AS41661_3BZhFPiuPkTd2ykHvXSVGbvUGDf4GVMboNH9EwP8qanptNrPT0-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T044655Z-RU-AS51035-web_connectivity-20200401T044656Z_AS51035_csOLyfFNNUdBC2jADMbeqte14dxFYFjWJubkH4fitDdshHeta5-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T044658Z-RU-AS41661-web_connectivity-20200401T044659Z_AS41661_YrC3mMp0OaVxI8ZZgfAGcdbJ7vYFKPba5frr5vk2wR6B1Gyrrg-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T044753Z-RU-AS56330-web_connectivity-20200401T044754Z_AS56330_IGhk4FoKg3SqtL9BkDTQbIHoBhbzz7FfjR2Go2UTgkX3bmU6sK-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T050341Z-RU-AS56377-web_connectivity-20200401T050341Z_AS56377_gS0URO7mgeDZE2pKH9LMKYfZZvpItLHJfARTZNrr0HCr4xH1mk-0.2.0-probe.json.lz4',\n",
" '2020-04-01/20200401T171548Z-MX-AS6503-web_connectivity-20200401T171549Z_AS6503_D4cfRYRFM2R7OBOnIhBKl11fLpMIpmORa5Gd2MLdkVIn4QWOF0-0.2.0-probe.json.lz4']"
]
},
"execution_count": 163,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(can_filenames)"
]
},
{
"cell_type": "code",
"execution_count": 136,
"metadata": {},
"outputs": [],
"source": [
"tarfd = decompress_tar(can['filename'])"
]
},
{
"cell_type": "code",
"execution_count": 181,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 182,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'vAgL3VMkiQU+tjTXdR5Et0OGJpw='"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dict_keys(['test_keys', 'test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers', 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn', 'software_name', 'software_version', 'data_format_version', 'report_filename', 'test_version', 'bucket_date', 'test_name', 'report_id', 'measurement_start_time', 'backend_version', 'options'])\n",
"dict_keys(['test_keys', 'test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers', 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn', 'software_name', 'software_version', 'data_format_version', 'report_filename', 'test_version', 'bucket_date', 'test_name', 'report_id', 'measurement_start_time', 'backend_version', 'options'])\n",
"dict_keys(['test_keys', 'test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers', 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn', 'software_name', 'software_version', 'data_format_version', 'report_filename', 'test_version', 'bucket_date', 'test_name', 'report_id', 'measurement_start_time', 'backend_version', 'options'])\n",
"dict_keys(['test_keys', 'test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers', 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn', 'software_name', 'software_version', 'data_format_version', 'report_filename', 'test_version', 'bucket_date', 'test_name', 'report_id', 'measurement_start_time', 'backend_version', 'options'])\n",
"dict_keys(['test_keys', 'test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers', 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn', 'software_name', 'software_version', 'data_format_version', 'report_filename', 'test_version', 'bucket_date', 'test_name', 'report_id', 'measurement_start_time', 'backend_version', 'options'])\n",
"dict_keys(['test_keys', 'test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers', 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn', 'software_name', 'software_version', 'data_format_version', 'report_filename', 'test_version', 'bucket_date', 'test_name', 'report_id', 'measurement_start_time', 'backend_version', 'options'])\n",
"dict_keys(['test_keys', 'test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers', 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn', 'software_name', 'software_version', 'data_format_version', 'report_filename', 'test_version', 'bucket_date', 'test_name', 'report_id', 'measurement_start_time', 'backend_version', 'options'])\n"
]
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['test_start_time', 'input_hashes', 'probe_ip', 'id', 'test_helpers',\n",
" 'probe_cc', 'test_runtime', 'input', 'probe_city', 'probe_asn',\n",
" 'software_name', 'software_version', 'data_format_version',\n",
" 'report_filename', 'test_version', 'bucket_date', 'test_name',\n",
" 'report_id', 'measurement_start_time', 'backend_version', 'options',\n",
" 'obfs4_accessible', 'dir_port_total', 'or_port_dirauth_accessible',\n",
" 'dir_port_accessible', 'or_port_total', 'obfs4_total',\n",
" 'or_port_accessible', 'resolver_asn', 'resolver_network_name',\n",
" 'targets', 'resolver_ip', 'or_port_dirauth_total',\n",
" '_probe_engine_sanitize_test_keys'],\n",
" dtype='object')"
]
},
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test_start_time</th>\n",
" <th>input_hashes</th>\n",
" <th>probe_ip</th>\n",
" <th>id</th>\n",
" <th>test_helpers</th>\n",
" <th>probe_cc</th>\n",
" <th>test_runtime</th>\n",
" <th>input</th>\n",
" <th>probe_city</th>\n",
" <th>probe_asn</th>\n",
" <th>...</th>\n",
" <th>dir_port_accessible</th>\n",
" <th>or_port_total</th>\n",
" <th>obfs4_total</th>\n",
" <th>or_port_accessible</th>\n",
" <th>resolver_asn</th>\n",
" <th>resolver_network_name</th>\n",
" <th>targets</th>\n",
" <th>resolver_ip</th>\n",
" <th>or_port_dirauth_total</th>\n",
" <th>_probe_engine_sanitize_test_keys</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2020-04-01 00:19:23</td>\n",
" <td>None</td>\n",
" <td>127.0.0.1</td>\n",
" <td>aec25991-126e-70c6-c6a9-c770e9b4b6ed</td>\n",
" <td>None</td>\n",
" <td>MY</td>\n",
" <td>60.004755</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AS4788</td>\n",
" <td>...</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>AS4788</td>\n",
" <td>TM Net, Internet Service Provider</td>\n",
" <td>{'154.35.175.225:443': {'tcp_connect': [{'stat...</td>\n",
" <td>202.188.1.176</td>\n",
" <td>10</td>\n",
" <td>true</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2020-04-01 01:17:01</td>\n",
" <td>None</td>\n",
" <td>127.0.0.1</td>\n",
" <td>5b7ee1ba-48d2-9997-0fb3-cbffaf6126b3</td>\n",
" <td>None</td>\n",
" <td>MY</td>\n",
" <td>60.001890</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AS4788</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>AS4788</td>\n",
" <td>TM Net, Internet Service Provider</td>\n",
" <td>{'154.35.175.225:443': {'tcp_connect': [{'stat...</td>\n",
" <td>202.188.1.181</td>\n",
" <td>10</td>\n",
" <td>true</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>2020-04-01 02:18:55</td>\n",
" <td>None</td>\n",
" <td>127.0.0.1</td>\n",
" <td>a91ac2ad-98e2-1fa3-cf87-a26e06b67cc2</td>\n",
" <td>None</td>\n",
" <td>MY</td>\n",
" <td>60.006450</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AS4788</td>\n",
" <td>...</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>AS4788</td>\n",
" <td>TM Net, Internet Service Provider</td>\n",
" <td>{'154.35.175.225:443': {'tcp_connect': [{'stat...</td>\n",
" <td>202.188.1.176</td>\n",
" <td>10</td>\n",
" <td>true</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>2020-04-01 03:06:32</td>\n",
" <td>None</td>\n",
" <td>127.0.0.1</td>\n",
" <td>47b1302f-0c7a-0754-17d8-09b7912b01e7</td>\n",
" <td>None</td>\n",
" <td>MY</td>\n",
" <td>60.020081</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AS4788</td>\n",
" <td>...</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>AS4788</td>\n",
" <td>TM Net, Internet Service Provider</td>\n",
" <td>{'154.35.175.225:443': {'tcp_connect': [{'stat...</td>\n",
" <td>202.188.1.181</td>\n",
" <td>10</td>\n",
" <td>true</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>2020-04-01 04:25:12</td>\n",
" <td>None</td>\n",
" <td>127.0.0.1</td>\n",
" <td>485b29d9-3891-870c-d180-8b6539f8d45a</td>\n",
" <td>None</td>\n",
" <td>MY</td>\n",
" <td>60.014249</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AS4788</td>\n",
" <td>...</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>AS4788</td>\n",
" <td>TM Net, Internet Service Provider</td>\n",
" <td>{'154.35.175.225:443': {'tcp_connect': [{'stat...</td>\n",
" <td>202.188.1.181</td>\n",
" <td>10</td>\n",
" <td>true</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>2020-04-01 05:06:07</td>\n",
" <td>None</td>\n",
" <td>127.0.0.1</td>\n",
" <td>1295ca23-e413-aa4e-dc86-0da0e95e5d73</td>\n",
" <td>None</td>\n",
" <td>MY</td>\n",
" <td>60.005112</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AS4788</td>\n",
" <td>...</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>AS4788</td>\n",
" <td>TM Net, Internet Service Provider</td>\n",
" <td>{'154.35.175.225:443': {'tcp_connect': [{'stat...</td>\n",
" <td>202.188.1.181</td>\n",
" <td>10</td>\n",
" <td>true</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>2020-04-01 06:19:37</td>\n",
" <td>None</td>\n",
" <td>127.0.0.1</td>\n",
" <td>359df18b-a8f5-866c-4e8d-31c8504bec9f</td>\n",
" <td>None</td>\n",
" <td>MY</td>\n",
" <td>60.004131</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>AS4788</td>\n",
" <td>...</td>\n",
" <td>7</td>\n",
" <td>0</td>\n",
" <td>16</td>\n",
" <td>0</td>\n",
" <td>AS4788</td>\n",
" <td>TM Net, Internet Service Provider</td>\n",
" <td>{'154.35.175.225:443': {'tcp_connect': [{'stat...</td>\n",
" <td>202.188.1.181</td>\n",
" <td>10</td>\n",
" <td>true</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7 rows × 34 columns</p>\n",
"</div>"
],
"text/plain": [
" test_start_time input_hashes probe_ip \\\n",
"0 2020-04-01 00:19:23 None 127.0.0.1 \n",
"1 2020-04-01 01:17:01 None 127.0.0.1 \n",
"2 2020-04-01 02:18:55 None 127.0.0.1 \n",
"3 2020-04-01 03:06:32 None 127.0.0.1 \n",
"4 2020-04-01 04:25:12 None 127.0.0.1 \n",
"5 2020-04-01 05:06:07 None 127.0.0.1 \n",
"6 2020-04-01 06:19:37 None 127.0.0.1 \n",
"\n",
" id test_helpers probe_cc test_runtime \\\n",
"0 aec25991-126e-70c6-c6a9-c770e9b4b6ed None MY 60.004755 \n",
"1 5b7ee1ba-48d2-9997-0fb3-cbffaf6126b3 None MY 60.001890 \n",
"2 a91ac2ad-98e2-1fa3-cf87-a26e06b67cc2 None MY 60.006450 \n",
"3 47b1302f-0c7a-0754-17d8-09b7912b01e7 None MY 60.020081 \n",
"4 485b29d9-3891-870c-d180-8b6539f8d45a None MY 60.014249 \n",
"5 1295ca23-e413-aa4e-dc86-0da0e95e5d73 None MY 60.005112 \n",
"6 359df18b-a8f5-866c-4e8d-31c8504bec9f None MY 60.004131 \n",
"\n",
" input probe_city probe_asn ... dir_port_accessible or_port_total \\\n",
"0 None None AS4788 ... 6 0 \n",
"1 None None AS4788 ... 8 0 \n",
"2 None None AS4788 ... 5 0 \n",
"3 None None AS4788 ... 2 0 \n",
"4 None None AS4788 ... 5 0 \n",
"5 None None AS4788 ... 7 0 \n",
"6 None None AS4788 ... 7 0 \n",
"\n",
" obfs4_total or_port_accessible resolver_asn \\\n",
"0 16 0 AS4788 \n",
"1 16 0 AS4788 \n",
"2 16 0 AS4788 \n",
"3 16 0 AS4788 \n",
"4 16 0 AS4788 \n",
"5 16 0 AS4788 \n",
"6 16 0 AS4788 \n",
"\n",
" resolver_network_name \\\n",
"0 TM Net, Internet Service Provider \n",
"1 TM Net, Internet Service Provider \n",
"2 TM Net, Internet Service Provider \n",
"3 TM Net, Internet Service Provider \n",
"4 TM Net, Internet Service Provider \n",
"5 TM Net, Internet Service Provider \n",
"6 TM Net, Internet Service Provider \n",
"\n",
" targets resolver_ip \\\n",
"0 {'154.35.175.225:443': {'tcp_connect': [{'stat... 202.188.1.176 \n",
"1 {'154.35.175.225:443': {'tcp_connect': [{'stat... 202.188.1.181 \n",
"2 {'154.35.175.225:443': {'tcp_connect': [{'stat... 202.188.1.176 \n",
"3 {'154.35.175.225:443': {'tcp_connect': [{'stat... 202.188.1.181 \n",
"4 {'154.35.175.225:443': {'tcp_connect': [{'stat... 202.188.1.181 \n",
"5 {'154.35.175.225:443': {'tcp_connect': [{'stat... 202.188.1.181 \n",
"6 {'154.35.175.225:443': {'tcp_connect': [{'stat... 202.188.1.181 \n",
"\n",
" or_port_dirauth_total _probe_engine_sanitize_test_keys \n",
"0 10 true \n",
"1 10 true \n",
"2 10 true \n",
"3 10 true \n",
"4 10 true \n",
"5 10 true \n",
"6 10 true \n",
"\n",
"[7 rows x 34 columns]"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\n",
" df['obfs4_total'] - df['obfs4_accessible'] > 0\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"def get_measurement(autoclaved_filename, frame_off, frame_size, intra_off, intra_size):\n",
" filename = autoclaved_filename\n",
" range_header = \"bytes={}-{}\".format(\n",
" frame_off, frame_off + frame_size - 1\n",
" )\n",
" r = requests.get(\n",
" urljoin(AUTOCLAVED_BASE_URL, filename),\n",
" headers={\"Range\": range_header},\n",
" )\n",
" r.raise_for_status()\n",
" blob = r.content\n",
" if len(blob) != frame_size:\n",
" raise RuntimeError(\"Failed to fetch LZ4 frame\", len(blob), msmt.frame_size)\n",
" blob = lz4framed.decompress(blob)[intra_off : intra_off + intra_size]\n",
" if len(blob) != intra_size or blob[:1] != b\"{\" or blob[-1:] != b\"}\":\n",
" raise RuntimeError(\n",
" \"Failed to decompress LZ4 frame to measurement.json\",\n",
" len(blob),\n",
" intra_size,\n",
" blob[:1],\n",
" blob[-1:],\n",
" )\n",
" return blob"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_raw_frame(autoclaved_filename, frame_off, frame_size):\n",
" filename = autoclaved_filename\n",
" range_header = \"bytes={}-{}\".format(\n",
" frame_off, frame_off + frame_size - 1\n",
" )\n",
" r = requests.get(\n",
" urljoin(AUTOCLAVED_BASE_URL, filename),\n",
" headers={\"Range\": range_header},\n",
" )\n",
" r.raise_for_status()\n",
" blob = r.content\n",
" if len(blob) != frame_size:\n",
" raise RuntimeError(\"Failed to fetch LZ4 frame\", len(blob), frame_size)\n",
" return lz4framed.decompress(blob)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"index = load_autoclaved_index('2020-04-01')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"measurement_list = []\n",
"for autoclaved_filename, offsets in datums.items():\n",
" for offset in offsets:\n",
" try:\n",
" msmt = json.loads(\n",
" get_measurement(\n",
" autoclaved_filename, \n",
" offset['file_off'], \n",
" offset['file_size'], \n",
" offset['text_off'], \n",
" offset['text_size']\n",
" )\n",
" )\n",
" measurement_list.append(msmt)\n",
" except Exception as exc:\n",
" print(exc)\n",
" print(offset)"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {},
"outputs": [],
"source": [
"def get_datums(test_name):\n",
" current_filename = None\n",
" current_frame = None\n",
" frames = {}\n",
"\n",
" for b in index:\n",
" if b == b\"\":\n",
" continue\n",
" d = json.loads(b)\n",
" if d['type'] == 'file' and is_test_name(d['filename'], test_name):\n",
" current_filename = d['filename']\n",
" frames[current_filename] = []\n",
" if current_filename is not None:\n",
" if d['type'] == 'frame':\n",
" current_frame = d\n",
" if d['type'] == 'datum':\n",
" datum = dict(current_frame)\n",
" datum.update(d)\n",
" frames[current_filename].append(datum)\n",
" if d['type'] == '/file':\n",
" current_filename = None\n",
" current_frame = None\n",
" return frames"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"datums = get_datums('tor')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment