Skip to content

Instantly share code, notes, and snippets.

Created January 23, 2012 20:41
Show Gist options
  • Save anonymous/1665438 to your computer and use it in GitHub Desktop.
Save anonymous/1665438 to your computer and use it in GitHub Desktop.
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
from collections import defaultdict
def confusion_matrix(results, filename='confmat.png'):
# calculate the confusion matrix
N = 17
conf_mat = np.zeros( (N,N) )
for k,v in results.items():
event_id = lambda event: int(event[1:])
x = event_id(v['event']) if v.has_key('event') else 16
y = event_id(v['user_event']) if v.has_key('user_event') else 16
# if x==16 or y==16: continue
conf_mat[x][y] += 1
# normalize the confusion matrix
norm_conf = [map(lambda x: float(x)/sum(i), i) for i in conf_mat]
# draw the confusion matrix
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ind = np.arange(N)
ax.set_yticks(ind)
ax.set_yticklabels(['E0%02d' % i for i in range(N-1)] + ['skip'])
ax.set_xticks(ind)
ax.set_xticklabels(['E0%02d' % i for i in range(N-1)] + ['skip'], rotation='vertical')
res = ax.imshow(array(norm_conf), interpolation='nearest')
cb = fig.colorbar(res)
# number annotation
for i, cas in enumerate(conf_mat):
for j, c in enumerate(cas):
if c>0:
plt.text(j-.2, i+.2, int(c), fontsize=12)
savefig(filename, format="png")
# plt.show()
def time_attempt(results, filename='time_attempt.png'):
U = defaultdict(list)
for k, v in results.items():
if v.has_key('end_time'):
U[v['user_id']].append( (v['start_time'], v['end_time']) )
for k, v in U.items():
v.sort()
max_done = 0
for k, v in U.items():
U[k] = [ y-x for (x,y) in U[k] ]
max_done = max(max_done, len(U[k]))
sum_time = [0] * max_done
count = [0] * max_done
for k, v in U.items():
for i in range(len(U[k])):
sum_time[i] += U[k][i]
count[i] += 1
avg_time = [sum_time[i] * 1./count[i] for i in range(len(count))]
clf()
plot(avg_time)
xlabel('number of attempts')
ylabel('time taken (s)')
savefig(filename, format='png')
# show()
def acc_user(results, filename='acc_user.png'):
U = {}
for k, v in results.items():
if v.has_key('user_event'):
U[v['user_id']] = [0, 0]
for k, v in results.items():
if v.has_key('end_time'):
U[v['user_id']][1] += 1
if v['event'] == v['user_event']:
U[v['user_id']][0] += 1
x = U.keys()
y = [ float(v[0]) / v[1] for _, v in U.items()]
# draw bar chart
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
width = .5;
ind = np.arange(len(y))
rects1 = ax.bar(np.arange(len(y)), y, width)
ax.set_xticks(ind+width*.5)
ax.set_xticklabels( x, rotation='vertical')
savefig(filename, format='png')
# plt.show()
def time_class(results, attr='user_id', filename='time_attr.png'):
filename = 'time_%s.png'% attr
def group(results, attr='event'):
U = defaultdict(list)
for _, v in results.items():
U[v[attr]].append(v)
return U
U = group(results,attr)
avg_time = [] # average time
keys = []
for k,d in U.items():
t = 0
cnt = 0
for v in d:
if v.has_key('end_time'):
t += v['end_time'] - v['start_time']
cnt += 1
if cnt > 0:
avg_time.append(float(t) / cnt) # average time taken
keys.append(k)
keys, avg_time = zip(*sorted(zip(keys,avg_time)))
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
ind = np.arange(len(avg_time))
width = 0.5
ax.bar(ind, avg_time, width)
ax.set_ylabel('average time taken (s)')
ax.set_xlabel(attr)
ax.set_xticks(ind+width)
ax.set_xticklabels(keys, rotation='vertical')
savefig(filename, format="png")
# plt.show()
def acc_time(results, filename='acc_time.png'):
# group time interval...
def group_time(results):
# 5 seconds interval...
M = 15
U = [ [] for i in range(M) ]
for _, v in results.items():
if v.has_key('end_time'):
if v['end_time']==v['start_time']:
print v['start_time'], v['end_time'], v['user_id']
U[min(int((v['end_time'] - v['start_time']) / 5), len(U)-1)].append(v)
keys = ['%d-%d'%(i*5,(i+1)*5) for i in range(M-1)]
keys.append('>= %d' % (M*5))
return (keys, U)
(keys,U) = group_time(results)
acc = []
frac = []
for d in U:
c = cnt = 0
for v in d:
c += 1 if v['event']==v['user_event'] else 0
cnt += 1
acc.append(float(c) / max(cnt,1))
frac.append( (c,cnt) )
fig = plt.figure(figsize=(10,9))
ax = fig.add_subplot(1,1,1)
ind = np.arange(len(acc))
width = 1
rects = ax.bar(ind, acc, width)
ax.set_ylabel('average accuracy')
ax.set_xlabel('time taken (s)')
ax.set_xticks(ind+width*.5)
ax.set_xticklabels(keys, rotation='vertical')
for i in range(len(rects)):
height = rects[i].get_height()
ax.text(rects[i].get_x()+rects[i].get_width()/2., 1.05*(height+0.01),
'%d / %d'%(frac[i][0],frac[i][1]), ha='center',
va='bottom', rotation='vertical')
gca().set_ylim([0,1])
savefig(filename, format="png")
# plt.show()
def calculate_statistics(results):
acc_time(results)
confusion_matrix(results)
time_class(results)
time_attempt(results)
acc_user(results)
if __name__ == "__main__":
import json
with open('results.js', 'r') as f:
results = json.loads(f.read())
calculate_statistics(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment