Skip to content

Instantly share code, notes, and snippets.

@willscott
Created October 22, 2012 16:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save willscott/3932484 to your computer and use it in GitHub Desktop.
Save willscott/3932484 to your computer and use it in GitHub Desktop.
Tor Disruption Extractor
"""Stat 2 Disruptions"""
"""Converts country per-day stats into interruption periods"""
import datetime
import os
import sys
import pickle
from numpy import std
days = {}
outages = {}
def addOutage(country, period):
if country in outages:
outages[country] += period
else:
outages[country] = [period]
# Load Data
for root,dirs,files in os.walk(sys.argv[1]): #the stats
for f in files:
partialmap = pickle.load(open(os.path.join(root,f), "rb"))
days.update(partialmap)
# Load Range
oldest = datetime.datetime.now()
newest = datetime.datetime(1900, 1, 1)
for key in days:
if key < oldest: oldest = key
if key > newest: newest = key
# Scan
disrupted_hist = {}
disrupted = {}
window_prev = {}
window = {}
def addToWindow(w, day):
key = oldest + datetime.timedelta(days = day)
if key in days:
s = sum(days[key].values()) * 1.0
for c in days[key]:
if c in w:
w[c].extend([days[key][c] / s])
else:
w[c] = [days[key][c] / s]
def removeFromWindow(w, day):
key = oldest + datetime.timedelta(days = day)
if key in days:
s = sum(days[key].values()) * 1.0
for c in days[key]:
if c in w:
w[c].remove(days[key][c] / s)
def undisrupt(c, day):
entry = [disrupted[c]['start'], day]
if c in disrupted_hist:
disrupted_hist[c].extend([entry])
else:
disrupted_hist[c] = [entry]
del disrupted[c]
total = 0
for day in range((newest - oldest).days):
addToWindow(window, day)
removeFromWindow(window, day - 14)
addToWindow(window_prev, day - 14)
removeFromWindow(window_prev, day - 28)
for country in window:
if not country in window_prev:
continue
if len(window[country]) < 5 or len(window_prev[country]) < 5:
continue
if country in disrupted:
now = sum(window[country]) / len(window[country])
if now > disrupted[country]['limit']:
undisrupt(country, day)
else:
dev = std(window_prev[country])
now = sum(window[country]) / len(window[country])
then = sum(window_prev[country]) / len(window_prev[country])
if now < then - 4 * dev:
disrupted[country] = {'limit': then - 2 * dev, 'start': day}
print "Saw discontinuity in " + country + ": values " + str(now) + " vs " + str(then)
while len(disrupted) > 0:
country = ''
for x in disrupted:
country = x
break
undisrupt(country, (newest - oldest).days)
print disrupted_hist
"""Tor 2 Stats"""
"""Converts extra-infos data to #ips/country/day info"""
import datetime
import os
import sys
import pickle
days = {}
for root,dirs,files in os.walk(sys.argv[1]):
for f in files:
fullpath = os.path.join(root, f)
date = 0
ips = {}
for line in open(fullpath):
if line.startswith("published"):
date = datetime.datetime.strptime(line.split()[1], "%Y-%m-%d")
elif line.startswith("dirreq-v3-ips"):
try:
itms = line.split()[1].split(",")
ips = {c.split("=")[0]: int(c.split("=")[1]) for c in itms}
except:
break
if date in days:
for country in ips:
if country in days[date]:
days[date][country] += ips[country]
else:
days[date][country] = ips[country]
else:
days[date] = ips
#print days
pickle.dump(days, open(sys.argv[2], "wb"))
#!/bin/bash
while read line
do
wget $line
tar xjf *.bz2
rm *.bz2
folder=`ls | grep extra`
python tor2stat.py $folder stats/$folder
rm -r $folder
echo "$folder"
done < "torurls.txt"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment