Skip to content

Instantly share code, notes, and snippets.

@brendano
Created August 10, 2012 05:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save brendano/3311340 to your computer and use it in GitHub Desktop.
Save brendano/3311340 to your computer and use it in GitHub Desktop.
http://opinionator.blogs.nytimes.com/2012/08/08/hear-all-ye-people-hearken-o-earth/
http://news.ycombinator.com/item?id=4362277
http://news.ycombinator.com/item?id=4365086
font stronga moda slighta strongd modd slightd
baskerville 1669 2490 544 985 1460 388
comicsans 1567 2418 538 1045 1547 392
compmod 1566 2590 524 1007 1460 330
georgia 1580 2500 574 1101 1520 374
helv 1618 2520 577 1066 1537 381
trebuchet 1578 2527 588 1062 1541 360
from __future__ import division
import sys
import random, bisect
# is this from http://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/ maybe?
class WeightedRandomGenerator(object):
def __init__(self, weights):
self.totals = []
running_total = 0
for w in weights:
running_total += w
self.totals.append(running_total)
def next(self):
rnd = random.random() * self.totals[-1]
return bisect.bisect_right(self.totals, rnd)
def __call__(self):
return self.next()
if __name__=='__main__':
n = int(float(sys.argv[1]))
weights = [float(x) for x in sys.argv[2:]]
gen = WeightedRandomGenerator(weights)
for i in xrange(n):
print gen()
d = read.table("data2.txt",header=T)
scores = c(5,3,2, -5,-3,-1)
n = rowSums(d[,2:ncol(d)])
weighted = t(d[,2:ncol(d)]) * scores
weighted_totals = colSums(weighted)
averages = weighted_totals / n
ex_sq = averages**2
xsq_sums = t(d[,2:ncol(d)]) * (scores**2)
e_xsq = colSums(xsq_sums)/n
stdevs = sqrt(e_xsq - ex_sq)
stderrs = stdevs / sqrt(n)
grand_e_xsq = sum(xsq_sums)/sum(n)
grand_ex = sum(weighted_totals)/sum(n)
grand_ex_sq = grand_ex**2
grand_stdev = sqrt(grand_e_xsq - grand_ex_sq)
# [1] 3.616294
# Baskverville vs Comic Sans
averages[1]-averages[2]
# [1] 0.1698754
(averages[1]-averages[2]) / grand_stdev
# [1] 0.04697499
plot(averages, ylim=c(0.5,1.5),axes=F,xlab=''); segments(1:6, y0=averages-1.96*stderrs, y1=averages+1.96*stderrs); axis(2); axis(1,labels=d$font,at=1:6,las=2); title(main="Weighted averages\nand 95% intervals for these means\nscale is -5 to 5")
# Quantiles via normal approximation -- lame
# > pnorm(averages[2], mean=grand_ex, sd=grand_stdev)
# [1] 0.4923823
# > pnorm(averages[1], mean=grand_ex, sd=grand_stdev)
# [1] 0.5111207
## I tried using
## draws1=replicate(1000000,sample(1:6,1,prob=d[1,2:ncol(d)]))
## but WAY too slow. silly R. Calling Python is much, much faster.
mult_draws = function(n, weights) {
weight_str = sprintf("%s", weights)
cmd = sprintf("python gen.py %s %s", n, str_c(weight_str, collapse=' '))
print(cmd)
scan(pipe(cmd))
}
# > draws1=mult_draws(1000000, d[1,2:ncol(d)])
# > draws2=mult_draws(1000000, d[2,2:ncol(d)])
#
# > scores1=scores[draws1+1]
# > scores2=scores[draws2+1]
#
# > mean(scores1==scores2)
# [1] 0.218856
# > mean(scores1<scores2)
# [1] 0.377949
# > mean(scores1>scores2)
# [1] 0.403195
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment