Created
August 10, 2012 05:34
-
-
Save brendano/3311340 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
http://opinionator.blogs.nytimes.com/2012/08/08/hear-all-ye-people-hearken-o-earth/ | |
http://news.ycombinator.com/item?id=4362277 | |
http://news.ycombinator.com/item?id=4365086 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
font stronga moda slighta strongd modd slightd | |
baskerville 1669 2490 544 985 1460 388 | |
comicsans 1567 2418 538 1045 1547 392 | |
compmod 1566 2590 524 1007 1460 330 | |
georgia 1580 2500 574 1101 1520 374 | |
helv 1618 2520 577 1066 1537 381 | |
trebuchet 1578 2527 588 1062 1541 360 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import sys | |
import random, bisect | |
# is this from http://eli.thegreenplace.net/2010/01/22/weighted-random-generation-in-python/ maybe? | |
class WeightedRandomGenerator(object): | |
def __init__(self, weights): | |
self.totals = [] | |
running_total = 0 | |
for w in weights: | |
running_total += w | |
self.totals.append(running_total) | |
def next(self): | |
rnd = random.random() * self.totals[-1] | |
return bisect.bisect_right(self.totals, rnd) | |
def __call__(self): | |
return self.next() | |
if __name__=='__main__': | |
n = int(float(sys.argv[1])) | |
weights = [float(x) for x in sys.argv[2:]] | |
gen = WeightedRandomGenerator(weights) | |
for i in xrange(n): | |
print gen() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
d = read.table("data2.txt",header=T) | |
scores = c(5,3,2, -5,-3,-1) | |
n = rowSums(d[,2:ncol(d)]) | |
weighted = t(d[,2:ncol(d)]) * scores | |
weighted_totals = colSums(weighted) | |
averages = weighted_totals / n | |
ex_sq = averages**2 | |
xsq_sums = t(d[,2:ncol(d)]) * (scores**2) | |
e_xsq = colSums(xsq_sums)/n | |
stdevs = sqrt(e_xsq - ex_sq) | |
stderrs = stdevs / sqrt(n) | |
grand_e_xsq = sum(xsq_sums)/sum(n) | |
grand_ex = sum(weighted_totals)/sum(n) | |
grand_ex_sq = grand_ex**2 | |
grand_stdev = sqrt(grand_e_xsq - grand_ex_sq) | |
# [1] 3.616294 | |
# Baskverville vs Comic Sans | |
averages[1]-averages[2] | |
# [1] 0.1698754 | |
(averages[1]-averages[2]) / grand_stdev | |
# [1] 0.04697499 | |
plot(averages, ylim=c(0.5,1.5),axes=F,xlab=''); segments(1:6, y0=averages-1.96*stderrs, y1=averages+1.96*stderrs); axis(2); axis(1,labels=d$font,at=1:6,las=2); title(main="Weighted averages\nand 95% intervals for these means\nscale is -5 to 5") | |
# Quantiles via normal approximation -- lame | |
# > pnorm(averages[2], mean=grand_ex, sd=grand_stdev) | |
# [1] 0.4923823 | |
# > pnorm(averages[1], mean=grand_ex, sd=grand_stdev) | |
# [1] 0.5111207 | |
## I tried using | |
## draws1=replicate(1000000,sample(1:6,1,prob=d[1,2:ncol(d)])) | |
## but WAY too slow. silly R. Calling Python is much, much faster. | |
mult_draws = function(n, weights) { | |
weight_str = sprintf("%s", weights) | |
cmd = sprintf("python gen.py %s %s", n, str_c(weight_str, collapse=' ')) | |
print(cmd) | |
scan(pipe(cmd)) | |
} | |
# > draws1=mult_draws(1000000, d[1,2:ncol(d)]) | |
# > draws2=mult_draws(1000000, d[2,2:ncol(d)]) | |
# | |
# > scores1=scores[draws1+1] | |
# > scores2=scores[draws2+1] | |
# | |
# > mean(scores1==scores2) | |
# [1] 0.218856 | |
# > mean(scores1<scores2) | |
# [1] 0.377949 | |
# > mean(scores1>scores2) | |
# [1] 0.403195 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment