Created
October 22, 2012 16:46
-
-
Save willscott/3932484 to your computer and use it in GitHub Desktop.
Tor Disruption Extractor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Stat 2 Disruptions""" | |
"""Converts country per-day stats into interruption periods""" | |
import datetime | |
import os | |
import sys | |
import pickle | |
from numpy import std | |
days = {} | |
outages = {} | |
def addOutage(country, period): | |
if country in outages: | |
outages[country] += period | |
else: | |
outages[country] = [period] | |
# Load Data | |
for root,dirs,files in os.walk(sys.argv[1]): #the stats | |
for f in files: | |
partialmap = pickle.load(open(os.path.join(root,f), "rb")) | |
days.update(partialmap) | |
# Load Range | |
oldest = datetime.datetime.now() | |
newest = datetime.datetime(1900, 1, 1) | |
for key in days: | |
if key < oldest: oldest = key | |
if key > newest: newest = key | |
# Scan | |
disrupted_hist = {} | |
disrupted = {} | |
window_prev = {} | |
window = {} | |
def addToWindow(w, day): | |
key = oldest + datetime.timedelta(days = day) | |
if key in days: | |
s = sum(days[key].values()) * 1.0 | |
for c in days[key]: | |
if c in w: | |
w[c].extend([days[key][c] / s]) | |
else: | |
w[c] = [days[key][c] / s] | |
def removeFromWindow(w, day): | |
key = oldest + datetime.timedelta(days = day) | |
if key in days: | |
s = sum(days[key].values()) * 1.0 | |
for c in days[key]: | |
if c in w: | |
w[c].remove(days[key][c] / s) | |
def undisrupt(c, day): | |
entry = [disrupted[c]['start'], day] | |
if c in disrupted_hist: | |
disrupted_hist[c].extend([entry]) | |
else: | |
disrupted_hist[c] = [entry] | |
del disrupted[c] | |
total = 0 | |
for day in range((newest - oldest).days): | |
addToWindow(window, day) | |
removeFromWindow(window, day - 14) | |
addToWindow(window_prev, day - 14) | |
removeFromWindow(window_prev, day - 28) | |
for country in window: | |
if not country in window_prev: | |
continue | |
if len(window[country]) < 5 or len(window_prev[country]) < 5: | |
continue | |
if country in disrupted: | |
now = sum(window[country]) / len(window[country]) | |
if now > disrupted[country]['limit']: | |
undisrupt(country, day) | |
else: | |
dev = std(window_prev[country]) | |
now = sum(window[country]) / len(window[country]) | |
then = sum(window_prev[country]) / len(window_prev[country]) | |
if now < then - 4 * dev: | |
disrupted[country] = {'limit': then - 2 * dev, 'start': day} | |
print "Saw discontinuity in " + country + ": values " + str(now) + " vs " + str(then) | |
while len(disrupted) > 0: | |
country = '' | |
for x in disrupted: | |
country = x | |
break | |
undisrupt(country, (newest - oldest).days) | |
print disrupted_hist |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Tor 2 Stats""" | |
"""Converts extra-infos data to #ips/country/day info""" | |
import datetime | |
import os | |
import sys | |
import pickle | |
days = {} | |
for root,dirs,files in os.walk(sys.argv[1]): | |
for f in files: | |
fullpath = os.path.join(root, f) | |
date = 0 | |
ips = {} | |
for line in open(fullpath): | |
if line.startswith("published"): | |
date = datetime.datetime.strptime(line.split()[1], "%Y-%m-%d") | |
elif line.startswith("dirreq-v3-ips"): | |
try: | |
itms = line.split()[1].split(",") | |
ips = {c.split("=")[0]: int(c.split("=")[1]) for c in itms} | |
except: | |
break | |
if date in days: | |
for country in ips: | |
if country in days[date]: | |
days[date][country] += ips[country] | |
else: | |
days[date][country] = ips[country] | |
else: | |
days[date] = ips | |
#print days | |
pickle.dump(days, open(sys.argv[2], "wb")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
while read line | |
do | |
wget $line | |
tar xjf *.bz2 | |
rm *.bz2 | |
folder=`ls | grep extra` | |
python tor2stat.py $folder stats/$folder | |
rm -r $folder | |
echo "$folder" | |
done < "torurls.txt" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment