Skip to content

Instantly share code, notes, and snippets.

@hellais
Created May 14, 2020 14:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hellais/24054799ab6dea3913855bce1118691a to your computer and use it in GitHub Desktop.
Save hellais/24054799ab6dea3913855bce1118691a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import io\n",
"import yaml\n",
"import json\n",
"from datetime import datetime, timedelta\n",
"\n",
"import pandas as pd\n",
"import psycopg2\n",
"import boto3\n",
"\n",
"import requests\n",
"from tqdm import tqdm\n",
"\n",
"from dateutil.parser import parse as parse_date\n",
"from urllib.parse import urlencode, quote, urlparse\n",
"\n",
"from IPython.display import IFrame\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def get_explorer_url(e):\n",
" query = ''\n",
" if 'input' in e.keys() and e['input']:\n",
" query = '?input={}'.format(quote(e['input'], safe=''))\n",
" return 'https://explorer.ooni.org/measurement/{}{}'.format(e['report_id'], query)\n",
" \n",
"def print_explorer_url(e):\n",
" print(get_explorer_url(e))\n",
"\n",
"def query(q):\n",
" conn = psycopg2.connect(\n",
" \"dbname='metadb'\"\n",
" \"user='amsapi'\"\n",
" \"host='amsmetadb.ooni.nu'\"\n",
" \"password='b2HUU6gKM19SvXzXJCzpUV'\"\n",
" )\n",
" return pd.read_sql_query(q, conn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"list_msmt_query = \"\"\"\n",
"SELECT \n",
" measurement.measurement_start_time AS measurement_start_time,\n",
" measurement.anomaly,\n",
" measurement.confirmed,\n",
" report.report_id,\n",
" report.probe_cc,\n",
" report.probe_asn,\n",
" report.test_name,\n",
" measurement.test_runtime,\n",
" domain_input.input,\n",
" domain_input.domain,\n",
" http_verdict.blocking,\n",
" http_verdict.http_experiment_failure,\n",
" http_verdict.dns_experiment_failure,\n",
" http_verdict.control_failure,\n",
" http_request.body_simhash,\n",
" http_request.body_sha256,\n",
" dns_a.test_ip,\n",
" dns_a.client_resolver,\n",
" dns_a.control_ip\n",
"FROM measurement\n",
"JOIN domain_input ON domain_input.input_no = measurement.input_no\n",
"JOIN report ON report.report_no = measurement.report_no\n",
"JOIN http_verdict ON http_verdict.msm_no = measurement.msm_no\n",
"JOIN http_request ON http_request.msm_no = measurement.msm_no\n",
"JOIN dns_a ON dns_a.msm_no = measurement.msm_no\n",
"WHERE test_name = 'web_connectivity'\n",
"AND test_start_time > '2019-05-01'\n",
"AND probe_cc = 'MM'\n",
"AND http_experiment_failure NOT LIKE 'unknown_failure%'\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [],
"source": [
"web_dfs = query(list_msmt_query)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"dns_block_ips = {\n",
" 'ID': [\n",
" '112.215.197.131'\n",
" ],\n",
" 'MY': [\n",
" '175.139.142.25'\n",
" ],\n",
" 'RU': [\n",
" '5.3.3.17'\n",
" ],\n",
" 'IR': [\n",
" '10.10.34.35',\n",
" '10.10.34.36'\n",
" ],\n",
" 'MM': [\n",
" '167.172.4.60',\n",
" '188.166.199.101',\n",
" '59.153.90.11'\n",
" ],\n",
" 'LB': [\n",
" '172.16.200.2',\n",
" '78.40.176.25',\n",
" #'78.40.176.26',\n",
" '0.0.0.0',\n",
" '127.0.0.1'\n",
" ],\n",
" 'RW': [\n",
" '127.0.0.1'\n",
" ],\n",
" 'CN': [\n",
" '8.7.198.45',\n",
" '203.98.7.65',\n",
" '31.13.73.1',\n",
" '31.13.85.16',\n",
" '59.24.3.173',\n",
" '78.16.49.15',\n",
" '93.46.8.89',\n",
" '199.59.148.140',\n",
" '243.185.187.39',\n",
" '46.82.174.68',\n",
" '173.252.110.21', # https://github.com/shadowsocks/ChinaDNS/pull/151/files\n",
" \n",
" # Facebook IPs\n",
" '31.13.66.23',\n",
" '31.13.66.6',\n",
" '31.13.71.23',\n",
" '31.13.74.17',\n",
" '31.13.83.1',\n",
" '31.13.97.245',\n",
" '69.171.229.73'\n",
" # Not visible in the data, but in research paper\n",
" '37.61.54.158',\n",
" '159.106.121.75'\n",
" ],\n",
" 'RU': [\n",
" '127.0.0.1'\n",
" ],\n",
" 'TM': [\n",
" '127.0.0.1'\n",
" ],\n",
" 'VN': [\n",
" '113.187.31.114',\n",
" '192.168.1.1',\n",
" '127.0.0.1'\n",
" ],\n",
" 'UA': [\n",
" '193.192.36.13',\n",
" '127.0.0.1',\n",
" '5.196.229.56',\n",
" '100.106.194.11',\n",
" '100.106.194.111',\n",
" '100.106.197.100',\n",
" '195.39.248.48'\n",
" ]\n",
"}\n",
"\n",
"http_block_simhash = {\n",
" 'LB': [\n",
" -8.132979422794627e+18,\n",
" -8.128447811188751e+18,\n",
" -5.041816293145594e+18,\n",
" 8.653896855253318e+18\n",
" ]\n",
"}\n",
"def has_dns_fingerprint(row):\n",
" if row['test_ip'] and row['probe_cc'] in dns_block_ips:\n",
" return set(row['test_ip']).intersection(set(dns_block_ips[row['probe_cc']]))\n",
" return False\n",
"\n",
"def has_blockpage_fingerprint(row):\n",
" if row['body_simhash'] is not None and row['probe_cc'] in http_block_simhash:\n",
" return row['body_simhash'] in http_block_simhash[row['probe_cc']]\n",
" \n",
"def calculate_blocking(row):\n",
" if row['confirmed'] == True:\n",
" return 'confirmed-blk'\n",
" if has_dns_fingerprint(row):\n",
" return 'confirmed-dns'\n",
" if has_blockpage_fingerprint(row):\n",
" return 'confirmed-blk'\n",
" return row['blocking']"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>measurement_start_time</th>\n",
" <th>anomaly</th>\n",
" <th>confirmed</th>\n",
" <th>report_id</th>\n",
" <th>probe_cc</th>\n",
" <th>probe_asn</th>\n",
" <th>test_name</th>\n",
" <th>test_runtime</th>\n",
" <th>input</th>\n",
" <th>domain</th>\n",
" <th>blocking</th>\n",
" <th>http_experiment_failure</th>\n",
" <th>dns_experiment_failure</th>\n",
" <th>control_failure</th>\n",
" <th>body_simhash</th>\n",
" <th>body_sha256</th>\n",
" <th>test_ip</th>\n",
" <th>client_resolver</th>\n",
" <th>control_ip</th>\n",
" <th>explorer_url</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>100</td>\n",
" <td>2020-01-14 15:17:03</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200114T151623Z_AS133385_V3rGW3tEiVwXMSVkxduR...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>19.8566</td>\n",
" <td>https://extratorrent.cc/</td>\n",
" <td>extratorrent.cc</td>\n",
" <td>http-failure</td>\n",
" <td>http_empty_location_header</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>0.000000e+00</td>\n",
" <td>[b'\\xe3', b'\\xb0', b'\\xc4', b'B', b'\\x98', b'\\...</td>\n",
" <td>[104.31.16.3, 104.31.17.3]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[104.31.16.3, 104.31.17.3]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200114...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>220</td>\n",
" <td>2020-02-24 12:20:57</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200224T114833Z_AS135307_emRSIHQMIoFzbk3AqtEg...</td>\n",
" <td>MM</td>\n",
" <td>135307</td>\n",
" <td>web_connectivity</td>\n",
" <td>17.6074</td>\n",
" <td>http://www.blubster.com/</td>\n",
" <td>www.blubster.com</td>\n",
" <td>http-failure</td>\n",
" <td>connection_reset</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[91.195.240.126]</td>\n",
" <td>74.125.190.143</td>\n",
" <td>[91.195.240.126]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200224...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>222</td>\n",
" <td>2020-02-24 12:18:37</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200224T114833Z_AS135307_emRSIHQMIoFzbk3AqtEg...</td>\n",
" <td>MM</td>\n",
" <td>135307</td>\n",
" <td>web_connectivity</td>\n",
" <td>17.2177</td>\n",
" <td>https://trashy.com/</td>\n",
" <td>trashy.com</td>\n",
" <td>http-failure</td>\n",
" <td>connection_reset</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[23.227.38.32]</td>\n",
" <td>74.125.190.143</td>\n",
" <td>[23.227.38.32]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200224...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>261</td>\n",
" <td>2020-02-25 07:26:49</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200225T072641Z_AS135307_sxdf5Kz73ju8R7dHdkCJ...</td>\n",
" <td>MM</td>\n",
" <td>135307</td>\n",
" <td>web_connectivity</td>\n",
" <td>34.1095</td>\n",
" <td>http://www.exmormon.org/</td>\n",
" <td>www.exmormon.org</td>\n",
" <td>http-failure</td>\n",
" <td>generic_timeout_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[204.238.168.42]</td>\n",
" <td>172.253.211.68</td>\n",
" <td>[204.238.168.42]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200225...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>347</td>\n",
" <td>2020-03-13 12:41:55</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200313T110351Z_AS9988_TzkOe0saCR2mszvZ81AAkt...</td>\n",
" <td>MM</td>\n",
" <td>9988</td>\n",
" <td>web_connectivity</td>\n",
" <td>16.6834</td>\n",
" <td>http://www.rockstargames.com/</td>\n",
" <td>www.rockstargames.com</td>\n",
" <td>http-failure</td>\n",
" <td>http_too_many_redirects</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>8.018934e+18</td>\n",
" <td>[b'X', b'\\xdc', b'\\xb6', b'\\xe9', b'\\xfd', b'\\...</td>\n",
" <td>[23.74.221.131]</td>\n",
" <td>203.81.65.86</td>\n",
" <td>[104.73.54.83]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200313...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3598</td>\n",
" <td>2020-04-08 05:12:38</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200408T044457Z_AS133385_DxPQ1hup7WAKrIfTvfHh...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>38.9313</td>\n",
" <td>https://theglobalobservatory.org/2019/07/manda...</td>\n",
" <td>theglobalobservatory.org</td>\n",
" <td>http-failure</td>\n",
" <td>generic_timeout_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[198.50.154.58]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[198.50.154.58]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200408...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3676</td>\n",
" <td>2020-04-08 05:20:23</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200408T044457Z_AS133385_DxPQ1hup7WAKrIfTvfHh...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>18.5150</td>\n",
" <td>https://kids.yahoo.com/</td>\n",
" <td>kids.yahoo.com</td>\n",
" <td>http-failure</td>\n",
" <td>ssl_error: error:00000000:lib(0):func(0):reaso...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[212.82.100.151]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[212.82.100.151]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200408...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3910</td>\n",
" <td>2020-04-13 10:46:30</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>34.9183</td>\n",
" <td>https://epic.org/</td>\n",
" <td>epic.org</td>\n",
" <td>http-failure</td>\n",
" <td>generic_timeout_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[66.39.149.49]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[66.39.149.49]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3913</td>\n",
" <td>2020-04-13 10:44:36</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>5.0355</td>\n",
" <td>http://join.sensual-network.com/</td>\n",
" <td>join.sensual-network.com</td>\n",
" <td>http-failure</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[52.2.237.237]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[52.2.237.237]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3914</td>\n",
" <td>2020-04-13 10:43:50</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>27.6302</td>\n",
" <td>https://extratorrent.cc/</td>\n",
" <td>extratorrent.cc</td>\n",
" <td>http-failure</td>\n",
" <td>http_empty_location_header</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>0.000000e+00</td>\n",
" <td>[b'\\xe3', b'\\xb0', b'\\xc4', b'B', b'\\x98', b'\\...</td>\n",
" <td>[104.28.18.50, 104.28.19.50]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[104.28.18.50, 104.28.19.50]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>71 rows × 20 columns</p>\n",
"</div>"
],
"text/plain": [
" measurement_start_time anomaly confirmed \\\n",
"100 2020-01-14 15:17:03 True None \n",
"220 2020-02-24 12:20:57 True None \n",
"222 2020-02-24 12:18:37 True None \n",
"261 2020-02-25 07:26:49 True None \n",
"347 2020-03-13 12:41:55 True None \n",
"... ... ... ... \n",
"3598 2020-04-08 05:12:38 True None \n",
"3676 2020-04-08 05:20:23 True None \n",
"3910 2020-04-13 10:46:30 True None \n",
"3913 2020-04-13 10:44:36 True None \n",
"3914 2020-04-13 10:43:50 True None \n",
"\n",
" report_id probe_cc probe_asn \\\n",
"100 20200114T151623Z_AS133385_V3rGW3tEiVwXMSVkxduR... MM 133385 \n",
"220 20200224T114833Z_AS135307_emRSIHQMIoFzbk3AqtEg... MM 135307 \n",
"222 20200224T114833Z_AS135307_emRSIHQMIoFzbk3AqtEg... MM 135307 \n",
"261 20200225T072641Z_AS135307_sxdf5Kz73ju8R7dHdkCJ... MM 135307 \n",
"347 20200313T110351Z_AS9988_TzkOe0saCR2mszvZ81AAkt... MM 9988 \n",
"... ... ... ... \n",
"3598 20200408T044457Z_AS133385_DxPQ1hup7WAKrIfTvfHh... MM 133385 \n",
"3676 20200408T044457Z_AS133385_DxPQ1hup7WAKrIfTvfHh... MM 133385 \n",
"3910 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"3913 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"3914 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"\n",
" test_name test_runtime \\\n",
"100 web_connectivity 19.8566 \n",
"220 web_connectivity 17.6074 \n",
"222 web_connectivity 17.2177 \n",
"261 web_connectivity 34.1095 \n",
"347 web_connectivity 16.6834 \n",
"... ... ... \n",
"3598 web_connectivity 38.9313 \n",
"3676 web_connectivity 18.5150 \n",
"3910 web_connectivity 34.9183 \n",
"3913 web_connectivity 5.0355 \n",
"3914 web_connectivity 27.6302 \n",
"\n",
" input \\\n",
"100 https://extratorrent.cc/ \n",
"220 http://www.blubster.com/ \n",
"222 https://trashy.com/ \n",
"261 http://www.exmormon.org/ \n",
"347 http://www.rockstargames.com/ \n",
"... ... \n",
"3598 https://theglobalobservatory.org/2019/07/manda... \n",
"3676 https://kids.yahoo.com/ \n",
"3910 https://epic.org/ \n",
"3913 http://join.sensual-network.com/ \n",
"3914 https://extratorrent.cc/ \n",
"\n",
" domain blocking \\\n",
"100 extratorrent.cc http-failure \n",
"220 www.blubster.com http-failure \n",
"222 trashy.com http-failure \n",
"261 www.exmormon.org http-failure \n",
"347 www.rockstargames.com http-failure \n",
"... ... ... \n",
"3598 theglobalobservatory.org http-failure \n",
"3676 kids.yahoo.com http-failure \n",
"3910 epic.org http-failure \n",
"3913 join.sensual-network.com http-failure \n",
"3914 extratorrent.cc http-failure \n",
"\n",
" http_experiment_failure \\\n",
"100 http_empty_location_header \n",
"220 connection_reset \n",
"222 connection_reset \n",
"261 generic_timeout_error \n",
"347 http_too_many_redirects \n",
"... ... \n",
"3598 generic_timeout_error \n",
"3676 ssl_error: error:00000000:lib(0):func(0):reaso... \n",
"3910 generic_timeout_error \n",
"3913 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"3914 http_empty_location_header \n",
"\n",
" dns_experiment_failure control_failure body_simhash \\\n",
"100 None None 0.000000e+00 \n",
"220 None None NaN \n",
"222 None None NaN \n",
"261 None None NaN \n",
"347 None None 8.018934e+18 \n",
"... ... ... ... \n",
"3598 None None NaN \n",
"3676 None None NaN \n",
"3910 None None NaN \n",
"3913 None None NaN \n",
"3914 None None 0.000000e+00 \n",
"\n",
" body_sha256 \\\n",
"100 [b'\\xe3', b'\\xb0', b'\\xc4', b'B', b'\\x98', b'\\... \n",
"220 None \n",
"222 None \n",
"261 None \n",
"347 [b'X', b'\\xdc', b'\\xb6', b'\\xe9', b'\\xfd', b'\\... \n",
"... ... \n",
"3598 None \n",
"3676 None \n",
"3910 None \n",
"3913 None \n",
"3914 [b'\\xe3', b'\\xb0', b'\\xc4', b'B', b'\\x98', b'\\... \n",
"\n",
" test_ip client_resolver \\\n",
"100 [104.31.16.3, 104.31.17.3] 103.255.174.220 \n",
"220 [91.195.240.126] 74.125.190.143 \n",
"222 [23.227.38.32] 74.125.190.143 \n",
"261 [204.238.168.42] 172.253.211.68 \n",
"347 [23.74.221.131] 203.81.65.86 \n",
"... ... ... \n",
"3598 [198.50.154.58] 103.255.174.220 \n",
"3676 [212.82.100.151] 103.255.174.220 \n",
"3910 [66.39.149.49] 103.255.174.220 \n",
"3913 [52.2.237.237] 103.255.174.220 \n",
"3914 [104.28.18.50, 104.28.19.50] 103.255.174.220 \n",
"\n",
" control_ip \\\n",
"100 [104.31.16.3, 104.31.17.3] \n",
"220 [91.195.240.126] \n",
"222 [23.227.38.32] \n",
"261 [204.238.168.42] \n",
"347 [104.73.54.83] \n",
"... ... \n",
"3598 [198.50.154.58] \n",
"3676 [212.82.100.151] \n",
"3910 [66.39.149.49] \n",
"3913 [52.2.237.237] \n",
"3914 [104.28.18.50, 104.28.19.50] \n",
"\n",
" explorer_url \n",
"100 https://explorer.ooni.org/measurement/20200114... \n",
"220 https://explorer.ooni.org/measurement/20200224... \n",
"222 https://explorer.ooni.org/measurement/20200224... \n",
"261 https://explorer.ooni.org/measurement/20200225... \n",
"347 https://explorer.ooni.org/measurement/20200313... \n",
"... ... \n",
"3598 https://explorer.ooni.org/measurement/20200408... \n",
"3676 https://explorer.ooni.org/measurement/20200408... \n",
"3910 https://explorer.ooni.org/measurement/20200413... \n",
"3913 https://explorer.ooni.org/measurement/20200413... \n",
"3914 https://explorer.ooni.org/measurement/20200413... \n",
"\n",
"[71 rows x 20 columns]"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"web_dfs[\n",
" (web_dfs['blocking'] == 'http-failure')\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"https://explorer.ooni.org/measurement/20200114T151623Z_AS133385_V3rGW3tEiVwXMSVkxduR0oei0PE3KSq8GJzIS570a0Abc9XOHt?input=https%3A%2F%2Fextratorrent.cc%2F\n"
]
}
],
"source": [
"print_explorer_url(web_dfs.iloc[100])"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>measurement_start_time</th>\n",
" <th>anomaly</th>\n",
" <th>confirmed</th>\n",
" <th>report_id</th>\n",
" <th>probe_cc</th>\n",
" <th>probe_asn</th>\n",
" <th>test_name</th>\n",
" <th>test_runtime</th>\n",
" <th>input</th>\n",
" <th>domain</th>\n",
" <th>blocking</th>\n",
" <th>http_experiment_failure</th>\n",
" <th>dns_experiment_failure</th>\n",
" <th>control_failure</th>\n",
" <th>body_simhash</th>\n",
" <th>body_sha256</th>\n",
" <th>test_ip</th>\n",
" <th>client_resolver</th>\n",
" <th>control_ip</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1693</td>\n",
" <td>2020-03-29 16:11:18</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>39.05910</td>\n",
" <td>http://www.hornybank.com/</td>\n",
" <td>www.hornybank.com</td>\n",
" <td>dns</td>\n",
" <td>generic_timeout_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[206.54.182.51]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1694</td>\n",
" <td>2020-03-29 16:11:51</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>3.21670</td>\n",
" <td>https://www.watchmyexgf.net/</td>\n",
" <td>www.watchmyexgf.net</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[162.251.110.41]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1695</td>\n",
" <td>2020-03-29 16:11:19</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.25665</td>\n",
" <td>https://landing.mofosnetwork.com/</td>\n",
" <td>landing.mofosnetwork.com</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[66.254.114.54]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1697</td>\n",
" <td>2020-03-29 16:10:52</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>3.06453</td>\n",
" <td>https://watchmygf.mobi/</td>\n",
" <td>watchmygf.mobi</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[162.254.189.6]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1698</td>\n",
" <td>2020-03-29 16:10:46</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.39157</td>\n",
" <td>https://myporngay.com/</td>\n",
" <td>myporngay.com</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[108.167.174.59]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3974</td>\n",
" <td>2020-04-13 09:35:20</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>1.92121</td>\n",
" <td>https://www.redtube.com/</td>\n",
" <td>www.redtube.com</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[216.18.168.124]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3975</td>\n",
" <td>2020-04-13 09:35:07</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.87732</td>\n",
" <td>https://thatinhman.com/</td>\n",
" <td>thatinhman.com</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[162.241.217.213]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3978</td>\n",
" <td>2020-04-13 09:33:46</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.92664</td>\n",
" <td>https://www.wicked.com/</td>\n",
" <td>www.wicked.com</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[104.22.50.75, 104.22.51.75]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3979</td>\n",
" <td>2020-04-13 09:33:12</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.40543</td>\n",
" <td>https://gigaporn.xxx/</td>\n",
" <td>gigaporn.xxx</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[104.27.174.124, 104.27.175.124]</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3980</td>\n",
" <td>2020-04-13 09:33:08</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5...</td>\n",
" <td>MM</td>\n",
" <td>133385</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.22747</td>\n",
" <td>https://mckzonecelebrity.com/</td>\n",
" <td>mckzonecelebrity.com</td>\n",
" <td>dns</td>\n",
" <td>ssl_error: error:14007086:SSL routines:CONNECT...</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>[167.172.4.60]</td>\n",
" <td>103.255.174.220</td>\n",
" <td>[104.24.122.228, 104.24.123.228]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1220 rows × 19 columns</p>\n",
"</div>"
],
"text/plain": [
" measurement_start_time anomaly confirmed \\\n",
"1693 2020-03-29 16:11:18 True None \n",
"1694 2020-03-29 16:11:51 True None \n",
"1695 2020-03-29 16:11:19 True None \n",
"1697 2020-03-29 16:10:52 True None \n",
"1698 2020-03-29 16:10:46 True None \n",
"... ... ... ... \n",
"3974 2020-04-13 09:35:20 True None \n",
"3975 2020-04-13 09:35:07 True None \n",
"3978 2020-04-13 09:33:46 True None \n",
"3979 2020-04-13 09:33:12 True None \n",
"3980 2020-04-13 09:33:08 True None \n",
"\n",
" report_id probe_cc probe_asn \\\n",
"1693 20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI... MM 133385 \n",
"1694 20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI... MM 133385 \n",
"1695 20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI... MM 133385 \n",
"1697 20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI... MM 133385 \n",
"1698 20200329T135350Z_AS133385_l0XbaDjEflKrSxWOGcyI... MM 133385 \n",
"... ... ... ... \n",
"3974 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"3975 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"3978 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"3979 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"3980 20200413T093309Z_AS133385_w5KsPUGYOH579vkniCV5... MM 133385 \n",
"\n",
" test_name test_runtime input \\\n",
"1693 web_connectivity 39.05910 http://www.hornybank.com/ \n",
"1694 web_connectivity 3.21670 https://www.watchmyexgf.net/ \n",
"1695 web_connectivity 2.25665 https://landing.mofosnetwork.com/ \n",
"1697 web_connectivity 3.06453 https://watchmygf.mobi/ \n",
"1698 web_connectivity 2.39157 https://myporngay.com/ \n",
"... ... ... ... \n",
"3974 web_connectivity 1.92121 https://www.redtube.com/ \n",
"3975 web_connectivity 2.87732 https://thatinhman.com/ \n",
"3978 web_connectivity 2.92664 https://www.wicked.com/ \n",
"3979 web_connectivity 2.40543 https://gigaporn.xxx/ \n",
"3980 web_connectivity 2.22747 https://mckzonecelebrity.com/ \n",
"\n",
" domain blocking \\\n",
"1693 www.hornybank.com dns \n",
"1694 www.watchmyexgf.net dns \n",
"1695 landing.mofosnetwork.com dns \n",
"1697 watchmygf.mobi dns \n",
"1698 myporngay.com dns \n",
"... ... ... \n",
"3974 www.redtube.com dns \n",
"3975 thatinhman.com dns \n",
"3978 www.wicked.com dns \n",
"3979 gigaporn.xxx dns \n",
"3980 mckzonecelebrity.com dns \n",
"\n",
" http_experiment_failure \\\n",
"1693 generic_timeout_error \n",
"1694 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"1695 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"1697 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"1698 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"... ... \n",
"3974 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"3975 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"3978 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"3979 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"3980 ssl_error: error:14007086:SSL routines:CONNECT... \n",
"\n",
" dns_experiment_failure control_failure body_simhash body_sha256 \\\n",
"1693 None None NaN None \n",
"1694 None None NaN None \n",
"1695 None None NaN None \n",
"1697 None None NaN None \n",
"1698 None None NaN None \n",
"... ... ... ... ... \n",
"3974 None None NaN None \n",
"3975 None None NaN None \n",
"3978 None None NaN None \n",
"3979 None None NaN None \n",
"3980 None None NaN None \n",
"\n",
" test_ip client_resolver control_ip \n",
"1693 [167.172.4.60] 103.255.174.220 [206.54.182.51] \n",
"1694 [167.172.4.60] 103.255.174.220 [162.251.110.41] \n",
"1695 [167.172.4.60] 103.255.174.220 [66.254.114.54] \n",
"1697 [167.172.4.60] 103.255.174.220 [162.254.189.6] \n",
"1698 [167.172.4.60] 103.255.174.220 [108.167.174.59] \n",
"... ... ... ... \n",
"3974 [167.172.4.60] 103.255.174.220 [216.18.168.124] \n",
"3975 [167.172.4.60] 103.255.174.220 [162.241.217.213] \n",
"3978 [167.172.4.60] 103.255.174.220 [104.22.50.75, 104.22.51.75] \n",
"3979 [167.172.4.60] 103.255.174.220 [104.27.174.124, 104.27.175.124] \n",
"3980 [167.172.4.60] 103.255.174.220 [104.24.122.228, 104.24.123.228] \n",
"\n",
"[1220 rows x 19 columns]"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"web_dfs[\n",
" web_dfs['test_ip'].astype(str).str.contains(\"167.172.4.60\")\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"web_dfs['blocking'] = web_dfs.apply(calculate_blocking, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [],
"source": [
"web_dfs.loc[:, 'explorer_url'] = web_dfs.apply(get_explorer_url, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>measurement_start_time</th>\n",
" <th>anomaly</th>\n",
" <th>confirmed</th>\n",
" <th>report_id</th>\n",
" <th>probe_cc</th>\n",
" <th>probe_asn</th>\n",
" <th>test_name</th>\n",
" <th>test_runtime</th>\n",
" <th>input</th>\n",
" <th>domain</th>\n",
" <th>blocking</th>\n",
" <th>http_experiment_failure</th>\n",
" <th>dns_experiment_failure</th>\n",
" <th>control_failure</th>\n",
" <th>body_simhash</th>\n",
" <th>body_sha256</th>\n",
" <th>test_ip</th>\n",
" <th>client_resolver</th>\n",
" <th>control_ip</th>\n",
" <th>explorer_url</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>1337</td>\n",
" <td>2020-03-26 12:18:49</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200326T121415Z_AS9988_ECLvshPEDDxY2ytBrZJCTX...</td>\n",
" <td>MM</td>\n",
" <td>9988</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.57311</td>\n",
" <td>https://hlatawtar.com/</td>\n",
" <td>hlatawtar.com</td>\n",
" <td>dns</td>\n",
" <td>dns_lookup_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>203.81.65.86</td>\n",
" <td>[150.95.22.178]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200326...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1388</td>\n",
" <td>2020-03-26 12:30:28</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200326T122353Z_AS9988_UGo7BI0i74UC8iw8WuWSkt...</td>\n",
" <td>MM</td>\n",
" <td>9988</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.36863</td>\n",
" <td>https://hlatawtar.com/</td>\n",
" <td>hlatawtar.com</td>\n",
" <td>dns</td>\n",
" <td>dns_lookup_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>203.81.65.86</td>\n",
" <td>[150.95.22.178]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200326...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1583</td>\n",
" <td>2020-03-29 11:55:59</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200329T115103Z_AS9988_eZ8JX2xxYN9qWpJ7OMdaPR...</td>\n",
" <td>MM</td>\n",
" <td>9988</td>\n",
" <td>web_connectivity</td>\n",
" <td>3.42065</td>\n",
" <td>https://hlatawtar.com/</td>\n",
" <td>hlatawtar.com</td>\n",
" <td>dns</td>\n",
" <td>dns_lookup_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>203.81.65.86</td>\n",
" <td>[150.95.22.178]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200329...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2001</td>\n",
" <td>2020-03-29 12:27:16</td>\n",
" <td>True</td>\n",
" <td>None</td>\n",
" <td>20200329T120520Z_AS9988_SMeWOedI5Y2ALXkUH0rdC6...</td>\n",
" <td>MM</td>\n",
" <td>9988</td>\n",
" <td>web_connectivity</td>\n",
" <td>2.92568</td>\n",
" <td>https://hlatawtar.com/</td>\n",
" <td>hlatawtar.com</td>\n",
" <td>dns</td>\n",
" <td>dns_lookup_error</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>NaN</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>203.81.65.86</td>\n",
" <td>[150.95.22.178]</td>\n",
" <td>https://explorer.ooni.org/measurement/20200329...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" measurement_start_time anomaly confirmed \\\n",
"1337 2020-03-26 12:18:49 True None \n",
"1388 2020-03-26 12:30:28 True None \n",
"1583 2020-03-29 11:55:59 True None \n",
"2001 2020-03-29 12:27:16 True None \n",
"\n",
" report_id probe_cc probe_asn \\\n",
"1337 20200326T121415Z_AS9988_ECLvshPEDDxY2ytBrZJCTX... MM 9988 \n",
"1388 20200326T122353Z_AS9988_UGo7BI0i74UC8iw8WuWSkt... MM 9988 \n",
"1583 20200329T115103Z_AS9988_eZ8JX2xxYN9qWpJ7OMdaPR... MM 9988 \n",
"2001 20200329T120520Z_AS9988_SMeWOedI5Y2ALXkUH0rdC6... MM 9988 \n",
"\n",
" test_name test_runtime input domain \\\n",
"1337 web_connectivity 2.57311 https://hlatawtar.com/ hlatawtar.com \n",
"1388 web_connectivity 2.36863 https://hlatawtar.com/ hlatawtar.com \n",
"1583 web_connectivity 3.42065 https://hlatawtar.com/ hlatawtar.com \n",
"2001 web_connectivity 2.92568 https://hlatawtar.com/ hlatawtar.com \n",
"\n",
" blocking http_experiment_failure dns_experiment_failure control_failure \\\n",
"1337 dns dns_lookup_error None None \n",
"1388 dns dns_lookup_error None None \n",
"1583 dns dns_lookup_error None None \n",
"2001 dns dns_lookup_error None None \n",
"\n",
" body_simhash body_sha256 test_ip client_resolver control_ip \\\n",
"1337 NaN None None 203.81.65.86 [150.95.22.178] \n",
"1388 NaN None None 203.81.65.86 [150.95.22.178] \n",
"1583 NaN None None 203.81.65.86 [150.95.22.178] \n",
"2001 NaN None None 203.81.65.86 [150.95.22.178] \n",
"\n",
" explorer_url \n",
"1337 https://explorer.ooni.org/measurement/20200326... \n",
"1388 https://explorer.ooni.org/measurement/20200326... \n",
"1583 https://explorer.ooni.org/measurement/20200329... \n",
"2001 https://explorer.ooni.org/measurement/20200329... "
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"web_dfs[\n",
" (web_dfs['probe_asn'] == 9988)\n",
" & (web_dfs['domain'] == 'hlatawtar.com')\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 2019-05-03 02:19:18\n",
"1 2019-06-01 14:54:45\n",
"2 2019-06-01 14:53:57\n",
"3 2019-06-01 14:54:11\n",
"4 2019-06-17 11:19:07\n",
" ... \n",
"3976 2020-04-13 09:34:29\n",
"3977 2020-04-13 09:33:08\n",
"3978 2020-04-13 09:33:46\n",
"3979 2020-04-13 09:33:12\n",
"3980 2020-04-13 09:33:08\n",
"Name: measurement_start_time, Length: 3981, dtype: datetime64[ns]"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:376: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" self.obj[key] = _infer_fill_value(value)\n",
"/opt/conda/lib/python3.7/site-packages/pandas/core/indexing.py:494: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" self.obj[item] = s\n"
]
}
],
"source": [
"pivot = web_dfs[['probe_cc', 'probe_asn', 'measurement_start_time', 'blocking', 'input', 'domain', 'report_id']]\n",
"pivot.loc[:,'count'] = 1\n",
"pivot.loc[:, 'explorer_url'] = pivot.apply(get_explorer_url, axis=1)\n",
"pivot = pivot.pivot_table(\n",
" index=['measurement_start_time', 'probe_asn', 'input', 'domain', 'explorer_url', 'probe_cc'], columns='blocking', values='count'\n",
").fillna(0).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>blocking</th>\n",
" <th>measurement_start_time</th>\n",
" <th>probe_asn</th>\n",
" <th>input</th>\n",
" <th>domain</th>\n",
" <th>explorer_url</th>\n",
" <th>probe_cc</th>\n",
" <th>confirmed-dns</th>\n",
" <th>dns</th>\n",
" <th>false</th>\n",
" <th>http-failure</th>\n",
" <th>tcp_ip</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>2019-05-03 02:19:18</td>\n",
" <td>9988</td>\n",
" <td>http://burmadigest.info/</td>\n",
" <td>burmadigest.info</td>\n",
" <td>https://explorer.ooni.org/measurement/20190503...</td>\n",
" <td>MM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>2019-06-01 14:53:57</td>\n",
" <td>9988</td>\n",
" <td>http://anjali.uncovered.com/</td>\n",
" <td>anjali.uncovered.com</td>\n",
" <td>https://explorer.ooni.org/measurement/20190601...</td>\n",
" <td>MM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>2019-06-01 14:54:11</td>\n",
" <td>9988</td>\n",
" <td>https://sci-hub.ac/</td>\n",
" <td>sci-hub.ac</td>\n",
" <td>https://explorer.ooni.org/measurement/20190601...</td>\n",
" <td>MM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>2019-06-01 14:54:45</td>\n",
" <td>9988</td>\n",
" <td>http://www.monmusic.org/reg/index/</td>\n",
" <td>www.monmusic.org</td>\n",
" <td>https://explorer.ooni.org/measurement/20190601...</td>\n",
" <td>MM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>2019-06-17 11:18:50</td>\n",
" <td>9988</td>\n",
" <td>http://www.kachinpost.com/</td>\n",
" <td>www.kachinpost.com</td>\n",
" <td>https://explorer.ooni.org/measurement/20190617...</td>\n",
" <td>MM</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2902</td>\n",
" <td>2020-04-13 16:38:13</td>\n",
" <td>58952</td>\n",
" <td>https://dailyporn.club/</td>\n",
" <td>dailyporn.club</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" <td>MM</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2903</td>\n",
" <td>2020-04-13 16:38:16</td>\n",
" <td>58952</td>\n",
" <td>https://landing.mofosnetwork.com/</td>\n",
" <td>landing.mofosnetwork.com</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" <td>MM</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2904</td>\n",
" <td>2020-04-13 16:45:00</td>\n",
" <td>58952</td>\n",
" <td>https://www.puretaboo.com/</td>\n",
" <td>www.puretaboo.com</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" <td>MM</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2905</td>\n",
" <td>2020-04-13 16:45:13</td>\n",
" <td>58952</td>\n",
" <td>https://xnxx123.net/</td>\n",
" <td>xnxx123.net</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" <td>MM</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2906</td>\n",
" <td>2020-04-13 16:45:28</td>\n",
" <td>58952</td>\n",
" <td>https://topmmnews.com/</td>\n",
" <td>topmmnews.com</td>\n",
" <td>https://explorer.ooni.org/measurement/20200413...</td>\n",
" <td>MM</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2907 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
"blocking measurement_start_time probe_asn \\\n",
"0 2019-05-03 02:19:18 9988 \n",
"1 2019-06-01 14:53:57 9988 \n",
"2 2019-06-01 14:54:11 9988 \n",
"3 2019-06-01 14:54:45 9988 \n",
"4 2019-06-17 11:18:50 9988 \n",
"... ... ... \n",
"2902 2020-04-13 16:38:13 58952 \n",
"2903 2020-04-13 16:38:16 58952 \n",
"2904 2020-04-13 16:45:00 58952 \n",
"2905 2020-04-13 16:45:13 58952 \n",
"2906 2020-04-13 16:45:28 58952 \n",
"\n",
"blocking input domain \\\n",
"0 http://burmadigest.info/ burmadigest.info \n",
"1 http://anjali.uncovered.com/ anjali.uncovered.com \n",
"2 https://sci-hub.ac/ sci-hub.ac \n",
"3 http://www.monmusic.org/reg/index/ www.monmusic.org \n",
"4 http://www.kachinpost.com/ www.kachinpost.com \n",
"... ... ... \n",
"2902 https://dailyporn.club/ dailyporn.club \n",
"2903 https://landing.mofosnetwork.com/ landing.mofosnetwork.com \n",
"2904 https://www.puretaboo.com/ www.puretaboo.com \n",
"2905 https://xnxx123.net/ xnxx123.net \n",
"2906 https://topmmnews.com/ topmmnews.com \n",
"\n",
"blocking explorer_url probe_cc \\\n",
"0 https://explorer.ooni.org/measurement/20190503... MM \n",
"1 https://explorer.ooni.org/measurement/20190601... MM \n",
"2 https://explorer.ooni.org/measurement/20190601... MM \n",
"3 https://explorer.ooni.org/measurement/20190601... MM \n",
"4 https://explorer.ooni.org/measurement/20190617... MM \n",
"... ... ... \n",
"2902 https://explorer.ooni.org/measurement/20200413... MM \n",
"2903 https://explorer.ooni.org/measurement/20200413... MM \n",
"2904 https://explorer.ooni.org/measurement/20200413... MM \n",
"2905 https://explorer.ooni.org/measurement/20200413... MM \n",
"2906 https://explorer.ooni.org/measurement/20200413... MM \n",
"\n",
"blocking confirmed-dns dns false http-failure tcp_ip \n",
"0 0.0 0.0 1.0 0.0 0.0 \n",
"1 0.0 0.0 1.0 0.0 0.0 \n",
"2 0.0 0.0 1.0 0.0 0.0 \n",
"3 0.0 0.0 1.0 0.0 0.0 \n",
"4 0.0 0.0 1.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"2902 1.0 0.0 0.0 0.0 0.0 \n",
"2903 1.0 0.0 0.0 0.0 0.0 \n",
"2904 1.0 0.0 0.0 0.0 0.0 \n",
"2905 1.0 0.0 0.0 0.0 0.0 \n",
"2906 1.0 0.0 0.0 0.0 0.0 \n",
"\n",
"[2907 rows x 11 columns]"
]
},
"execution_count": 75,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pivot"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>blocking</th>\n",
" <th>probe_cc</th>\n",
" <th>input</th>\n",
" <th>domain</th>\n",
" <th>measurement_start_time</th>\n",
" <th>confirmed-dns</th>\n",
" <th>dns</th>\n",
" <th>false</th>\n",
" <th>http-failure</th>\n",
" <th>tcp_ip</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>19</td>\n",
" <td>MM</td>\n",
" <td>http://burmadigest.info/</td>\n",
" <td>burmadigest.info</td>\n",
" <td>2019-05-06</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>MM</td>\n",
" <td>http://anjali.uncovered.com/</td>\n",
" <td>anjali.uncovered.com</td>\n",
" <td>2019-06-03</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>651</td>\n",
" <td>MM</td>\n",
" <td>https://sci-hub.ac/</td>\n",
" <td>sci-hub.ac</td>\n",
" <td>2019-06-03</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>298</td>\n",
" <td>MM</td>\n",
" <td>http://www.monmusic.org/reg/index/</td>\n",
" <td>www.monmusic.org</td>\n",
" <td>2019-06-03</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>268</td>\n",
" <td>MM</td>\n",
" <td>http://www.kachinpost.com/</td>\n",
" <td>www.kachinpost.com</td>\n",
" <td>2019-06-17</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <td>641</td>\n",
" <td>MM</td>\n",
" <td>https://realthadin.com/</td>\n",
" <td>realthadin.com</td>\n",
" <td>2020-04-13</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>634</td>\n",
" <td>MM</td>\n",
" <td>https://pyidaungsu.hluttaw.mm/</td>\n",
" <td>pyidaungsu.hluttaw.mm</td>\n",
" <td>2020-04-13</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>75</td>\n",
" <td>MM</td>\n",
" <td>http://karennews.org/</td>\n",
" <td>karennews.org</td>\n",
" <td>2020-04-13</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>627</td>\n",
" <td>MM</td>\n",
" <td>https://pornprosnetwork.com/</td>\n",
" <td>pornprosnetwork.com</td>\n",
" <td>2020-04-13</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1032</td>\n",
" <td>MM</td>\n",
" <td>https://zootube1.com/</td>\n",
" <td>zootube1.com</td>\n",
" <td>2020-04-13</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1033 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
"blocking probe_cc input domain \\\n",
"19 MM http://burmadigest.info/ burmadigest.info \n",
"0 MM http://anjali.uncovered.com/ anjali.uncovered.com \n",
"651 MM https://sci-hub.ac/ sci-hub.ac \n",
"298 MM http://www.monmusic.org/reg/index/ www.monmusic.org \n",
"268 MM http://www.kachinpost.com/ www.kachinpost.com \n",
"... ... ... ... \n",
"641 MM https://realthadin.com/ realthadin.com \n",
"634 MM https://pyidaungsu.hluttaw.mm/ pyidaungsu.hluttaw.mm \n",
"75 MM http://karennews.org/ karennews.org \n",
"627 MM https://pornprosnetwork.com/ pornprosnetwork.com \n",
"1032 MM https://zootube1.com/ zootube1.com \n",
"\n",
"blocking measurement_start_time confirmed-dns dns false http-failure \\\n",
"19 2019-05-06 0.0 0.0 1.0 0.0 \n",
"0 2019-06-03 0.0 0.0 1.0 0.0 \n",
"651 2019-06-03 0.0 0.0 1.0 0.0 \n",
"298 2019-06-03 0.0 0.0 1.0 0.0 \n",
"268 2019-06-17 0.0 0.0 1.0 0.0 \n",
"... ... ... ... ... ... \n",
"641 2020-04-13 3.0 0.0 0.0 0.0 \n",
"634 2020-04-13 0.0 0.0 3.0 0.0 \n",
"75 2020-04-13 3.0 0.0 0.0 0.0 \n",
"627 2020-04-13 3.0 0.0 0.0 0.0 \n",
"1032 2020-04-13 3.0 0.0 0.0 0.0 \n",
"\n",
"blocking tcp_ip \n",
"19 0.0 \n",
"0 0.0 \n",
"651 0.0 \n",
"298 0.0 \n",
"268 0.0 \n",
"... ... \n",
"641 0.0 \n",
"634 0.0 \n",
"75 0.0 \n",
"627 0.0 \n",
"1032 0.0 \n",
"\n",
"[1033 rows x 9 columns]"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pivot.groupby([\n",
" 'probe_cc', 'input', 'domain', \n",
" pd.Grouper(key='measurement_start_time', freq='W-MON')])['confirmed-dns', 'dns', 'false', 'http-failure', 'tcp_ip']\\\n",
" .sum()\\\n",
" .reset_index()\\\n",
" .sort_values('measurement_start_time')"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"pgb = pivot.groupby([\n",
" 'probe_cc', 'input', 'domain', 'probe_asn',\n",
" pd.Grouper(key='measurement_start_time', freq='W-MON')])['confirmed-dns', 'dns', 'false', 'http-failure', 'tcp_ip']"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>blocking</th>\n",
" <th>confirmed-dns</th>\n",
" <th>dns</th>\n",
" <th>false</th>\n",
" <th>http-failure</th>\n",
" <th>tcp_ip</th>\n",
" </tr>\n",
" <tr>\n",
" <th>measurement_start_time</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>2019-05-06</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-06-03</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-06-17</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-07-01</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-08-05</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-08-12</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-08-19</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-08-26</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-09-09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-10-21</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-11-04</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-11-18</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-12-02</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-12-09</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-12-16</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2019-12-30</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-01-06</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-01-13</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-01-20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-02-10</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-02-17</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-02-24</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>33.0</td>\n",
" <td>2.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-03-02</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>18.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-03-16</td>\n",
" <td>0.0</td>\n",
" <td>25.0</td>\n",
" <td>98.0</td>\n",
" <td>2.0</td>\n",
" <td>7.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-03-23</td>\n",
" <td>4.0</td>\n",
" <td>38.0</td>\n",
" <td>240.0</td>\n",
" <td>14.0</td>\n",
" <td>15.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-03-30</td>\n",
" <td>121.0</td>\n",
" <td>465.0</td>\n",
" <td>192.0</td>\n",
" <td>8.0</td>\n",
" <td>6.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-04-06</td>\n",
" <td>632.0</td>\n",
" <td>36.0</td>\n",
" <td>195.0</td>\n",
" <td>14.0</td>\n",
" <td>11.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2020-04-13</td>\n",
" <td>486.0</td>\n",
" <td>26.0</td>\n",
" <td>133.0</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"blocking confirmed-dns dns false http-failure tcp_ip\n",
"measurement_start_time \n",
"2019-05-06 0.0 0.0 1.0 0.0 0.0\n",
"2019-06-03 0.0 0.0 3.0 0.0 0.0\n",
"2019-06-17 0.0 0.0 2.0 0.0 0.0\n",
"2019-07-01 0.0 0.0 2.0 0.0 0.0\n",
"2019-08-05 0.0 0.0 4.0 0.0 0.0\n",
"2019-08-12 0.0 0.0 4.0 0.0 0.0\n",
"2019-08-19 0.0 0.0 2.0 0.0 0.0\n",
"2019-08-26 0.0 0.0 5.0 0.0 0.0\n",
"2019-09-09 0.0 0.0 1.0 0.0 0.0\n",
"2019-10-21 0.0 0.0 3.0 0.0 0.0\n",
"2019-11-04 0.0 0.0 2.0 0.0 0.0\n",
"2019-11-18 0.0 0.0 3.0 0.0 0.0\n",
"2019-12-02 0.0 0.0 7.0 0.0 0.0\n",
"2019-12-09 0.0 0.0 4.0 0.0 0.0\n",
"2019-12-16 0.0 0.0 1.0 0.0 0.0\n",
"2019-12-30 0.0 0.0 6.0 0.0 0.0\n",
"2020-01-06 0.0 1.0 2.0 0.0 0.0\n",
"2020-01-13 0.0 1.0 1.0 0.0 0.0\n",
"2020-01-20 0.0 0.0 1.0 1.0 0.0\n",
"2020-02-10 0.0 0.0 3.0 0.0 0.0\n",
"2020-02-17 0.0 0.0 1.0 0.0 0.0\n",
"2020-02-24 0.0 2.0 33.0 2.0 4.0\n",
"2020-03-02 0.0 0.0 18.0 1.0 5.0\n",
"2020-03-16 0.0 25.0 98.0 2.0 7.0\n",
"2020-03-23 4.0 38.0 240.0 14.0 15.0\n",
"2020-03-30 121.0 465.0 192.0 8.0 6.0\n",
"2020-04-06 632.0 36.0 195.0 14.0 11.0\n",
"2020-04-13 486.0 26.0 133.0 9.0 4.0"
]
},
"execution_count": 111,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pgb.sum().groupby(level=4).sum()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scp jupyter.ooni.io:/srv/jupyter/home/data/20200416-mm-pivot-msmts-2019.csv .\n"
]
}
],
"source": [
"dst_filename = '20200416-mm-pivot-msmts-2019.csv'\n",
"pivot.to_csv('../data/{}'.format(dst_filename))\n",
"print('scp jupyter.ooni.io:/srv/jupyter/home/data/{} .'.format(dst_filename))"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"scp jupyter.ooni.io:/srv/jupyter/home/data/20200416-mm-raw-msmts-2019.csv .\n"
]
}
],
"source": [
"dst_filename = '20200416-mm-raw-msmts-2019.csv'\n",
"web_dfs.to_csv('../data/{}'.format(dst_filename))\n",
"print('scp jupyter.ooni.io:/srv/jupyter/home/data/{} .'.format(dst_filename))"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"blocked_domains = sorted(list(web_dfs[web_dfs['blocking'] == 'confirmed-dns']['domain'].unique()))"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([133385])"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"web_dfs[\n",
" (web_dfs['domain'].isin(blocked_domains))\n",
" & (web_dfs['blocking'] == 'confirmed-dns')\n",
"]['probe_asn'].unique()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment