Skip to content

Instantly share code, notes, and snippets.

@srubin
Created December 14, 2013 20:56
Show Gist options
  • Save srubin/7964799 to your computer and use it in GitHub Desktop.
Save srubin/7964799 to your computer and use it in GitHub Desktop.
Simple labeling and analysis code for my cs294-10 Visualization project, Fall 2013.
import sys
import csv
import os
import numpy as N
import vincent
import simplejson as json
# voiceboxmongo provides a connection to my database,
# which stores data from Mechanical Turk
import voiceboxmongo
def load_statements(vis, strategy=None):
with voiceboxmongo.VoiceboxMongo() as client:
db = client.da_comparisons
query = {"vis": vis}
if strategy is not None:
query["strategy"] = strategy
results = db.visstratresults.find(query)
return [res for res in results]
def assign_nuggets(vis):
statements = [s["response"][0] for s in load_statements(vis, strategy="E")]
with open("data/vis/%s/nug_scores.csv" % vis, 'rb') as f:
reader = csv.reader(f)
for row in reader:
header = row
break
header.append('WRONG')
results = N.zeros((len(statements), len(header)))
for si, s in enumerate(statements):
os.system('clear')
for i, h in enumerate(header):
print "%d\t%s" % (i, h)
print
print "%d. %s" % (si, s)
nugs = raw_input("Contained nuggets (space separated): ")
nugs = nugs.split(' ')
try:
nugs = N.array([int(s) for s in nugs])
results[si][nugs] = 1
print
print results[si]
except:
pass
N.savetxt("data/vis/%s/nug_assignments.csv" % vis, results, delimiter=',')
build_nugget_result(vis, statements, wrong_col=True)
def build_nugget_result(vis, statements, wrong_col=False):
nug_weights = N.loadtxt("data/vis/%s/nug_scores.csv" % vis, delimiter=',', skiprows=1)
nugs = N.loadtxt("data/vis/%s/nug_assignments.csv" % vis, delimiter=',')
# remove the "wrong" column for now
if wrong_col:
nugs = nugs[:, :-1]
print vis, "response scores"
scores = N.nan_to_num(N.dot(nugs, nug_weights) / N.sum(nugs, axis=1))
hist = N.histogram(scores, bins=10, range=(0,1))
print "median score", N.median(scores)
df = {"v": list(hist[0]),
"i": [float("%02f" % s) for s in list((hist[1][1:] + hist[1][:-1])/2.0)] }
score_histogram = vincent.Bar(df, iter_idx="i", key_on="v", height=200, width=400)
# score_histogram.display()
nug_distribution = -N.sort(-N.sum(nugs, axis=0))
dist_bar = vincent.Bar(list(nug_distribution), height=200, width=400)
dist_bar.scales[0].range_max = len(nug_distribution) * 25
# dist_bar.display()
info = {}
with open("data/vis/%s/nug_scores.csv" % vis, 'rb') as f:
reader = csv.reader(f)
for row in reader:
nuggets = row
break
info["statements"] = []
for i in range(len(statements)):
nug_parts = []
for j in range(len(nuggets)):
if nugs[i, j] != 0:
nug_parts.append(j)
info["statements"].append({
"statement": statements[i],
"nuggets": nug_parts,
"score": scores[i]
})
info["nuggets"] = []
for i in range(len(nuggets)):
cluster = []
for j in range(len(statements)):
if nugs[j, i] != 0:
cluster.append(j)
info["nuggets"].append({
"score": nug_weights[i],
"nugget": nuggets[i],
"statements": cluster
})
info["score_histogram"] = {
"bin": df["i"],
"count": df["v"],
"spec": score_histogram.grammar()
}
json.dump(info, open("data/vis/%s/result.csv" % vis, 'w'))
if __name__ == '__main__':
if len(sys.argv) > 2:
if sys.argv[2] == 'skip':
wrong_col = False
if len(sys.argv) > 3:
if sys.argv[3] == 'w':
wrong_col = True
vis = sys.argv[1]
statements = [s["response"][0] for s in load_statements(vis, strategy="E")]
build_nugget_result(vis, statements, wrong_col=wrong_col)
else:
assign_nuggets(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment