Skip to content

Instantly share code, notes, and snippets.

@aflaxman
Created February 11, 2010 16:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aflaxman/301647 to your computer and use it in GitHub Desktop.
Save aflaxman/301647 to your computer and use it in GitHub Desktop.
import csv
from networkx import Graph
from pylab import *
class MsgData:
def __init__(self, fname='haiti_sms.csv',
start_date='JAN 12',
end_date='FEB 11'):
self.start_date = start_date
self.end_date = end_date
self.categories = {}
self.data = Graph()
f = open(fname)
f.readline()
for d in csv.DictReader(f):
d_id = int(d.pop('#'))
for c in d['CATEGORY'].split(','):
if c in [' ', '']:
continue
c_code, c_str = c.split('.')
c_code = c_code.strip()
c_str = c_str.strip()
self.data.add_node(d_id, d)
self.data.add_edge(c_code, d_id)
self.categories[c_code] = c_str
def streams(self):
streams = []
for c in sorted(self.categories):
times_c = []
for n in self.data[c]:
times_c.append(datestr2num(self.data.node[n]['INCIDENT DATE']))
stream_c, bins_c = histogram(times_c, arange(datestr2num('%s 2010' % self.start_date),
datestr2num('%s 2010' % self.end_date), .01))
streams.append(stream_c)
return bins_c, streams
def txt_col(col):
return (.5*col[0], .5*col[1], .5*col[2])
def plot_report_streams(D, cmap=cm.spectral):
t, streams = D.streams()
n = float(len(streams))
x = t.copy()
k = normpdf(arange(-10,10,.1), 0, 4)
streams = [np.convolve(s_c, k) for s_c in streams]
for i in range(len(streams)):
plot_date(x, i+streams[i][99:-99], '-',
linewidth=3,
alpha=1, color=cmap(i/n), zorder=.5-i)
fill(hstack((x, x[::-1])),
hstack((i+streams[i][99:-99], (i+zeros(len(x)))[::-1])),
linewidth=2,
color=cmap(i/n), alpha=.25, zorder=-i)
text(x[0], i+.4,
sorted(D.categories.items())[i][1].split('| ')[1],
color=txt_col(cmap(i/n)), va='top', ha='right', fontsize=8, rotation=30)
text(x[-1]+.01, i,
sorted(D.categories.items())[i][1].split('| ')[1],
color=txt_col(cmap(i/n)), va='bottom', ha='left', fontsize=8, rotation=30)
x -= .1
yticks([])
xticks(.5+arange(datestr2num(D.start_date), datestr2num(D.end_date)),
['%d-%d' % (num2date(i).month, num2date(i).day)
for i in arange(datestr2num(D.start_date), datestr2num(D.end_date))],
fontsize=8, rotation=75, ha='center')
l,r,b,t = axis()
axis([l-4.5,r+4.5,b-4,t])
def hist_reports(D):
""" Generate histogram of report type frequency"""
hist = [[len(D.data[c]), c] for c in D.categories]
hist = sorted(hist, reverse=True)
left = arange(len(hist))
bar(left, [h for h,c in hist])
xticks(left+.5,
[D.categories[c].split('| ')[-1] for h,c in hist],
fontsize=8, rotation=45, ha='right')
ylabel('Number of Reports')
title('Number of Reports by Type')
l,r,b,t = axis()
axis([l-.5, len(hist)+.5, b, t])
if __name__ == 'main':
D = MsgData()
plot_report_streams(D)
savefig('reports.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment