Skip to content

Instantly share code, notes, and snippets.

@ajschumacher
Created August 17, 2014 17:46
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ajschumacher/b9645724d9d842810613 to your computer and use it in GitHub Desktop.
Save ajschumacher/b9645724d9d842810613 to your computer and use it in GitHub Desktop.
code for a blog post on Wilson scoring vs. Laplace smoothing
par(pty='s')
par(mfrow=c(1, 2))
ci_lower_bound <- function(pos, n, confidence) {
if (n == 0) return(0)
z = qnorm(1 - (1 - confidence) / 2)
p = pos / n
(p + z^2 / (2*n) - z * sqrt((p * (1-p) + z^2 / (4*n)) / n )) /
(1 + z^2 / n)
}
bayes_estimate <- function(pos, n, alpha, beta) {
(pos + alpha) / (n + beta)
}
confidence <- 0.95
alpha <- 1
beta <- 2
colors <- 200
make_plot <- function(range) { # function wrapping start
ci = expand.grid(range, range)
names(ci) <- c("pos", "neg")
ci$score <- apply(ci, 1, function(v) {
return(ci_lower_bound(v[1], sum(v), confidence))
})
ci_matrix <- matrix(ci$score, ncol=length(range))
image(range, range, t(ci_matrix),
breaks=0:colors/colors,
col=colorRampPalette(c("blue", "white", "red"))(colors),
xlab="downvotes",
ylab="upvotes",
main=expression("Wilson 95% interval lower bound"))
bayes = expand.grid(range, range)
names(bayes) <- c("pos", "neg")
bayes$score <- apply(bayes, 1, function(v) {
return(bayes_estimate(v[1], sum(v), alpha, beta))
})
bayes_matrix <- matrix(bayes$score, ncol=length(range))
image(range, range, t(bayes_matrix),
breaks=0:colors/colors,
col=colorRampPalette(c("blue", "white", "red"))(colors),
xlab="downvotes",
ylab="upvotes",
main=expression("Laplace Smoothing," ~ alpha==1 * ", " ~ beta==2))
} # end make_plot function wrapping
# plots to by 800x460
make_plot(0:200) # plot1
make_plot(0:10) # plot2
make_plot_2 <- function(down, uprange) { # function wrapping start
plot(sapply(uprange, function(x) ci_lower_bound(x, x+down, confidence)) ~ uprange, ylim=c(0,1), ylab="Wilson 95% interval lower bound", xlab=paste("upvotes (against", down, "downvotes)"))
plot(sapply(uprange, function(x) bayes_estimate(x, x+down, alpha, beta)) ~ uprange, ylim=c(0,1), ylab=expression("Laplace Smoothing," ~ alpha==1 * ", " ~ beta==2), xlab=paste("upvotes (against", down, "downvotes)"))
} # end make_plot_2 function wrapping
make_plot_2(10, 0:100) # plot3
# numbers for post
209 - 50
118 - 25
209 / (209+50)
118 / (118+25)
ci_lower_bound(209, 209+50, 0.95)
ci_lower_bound(118, 118+25, 0.95)
bayes_estimate(209, 209+50, 1, 2)
bayes_estimate(118, 118+25, 1, 2)
1 - 0
534 - 46
1 / 1
534 / 580
ci_lower_bound(1, 1, 0.95)
ci_lower_bound(534, 580, 0.95)
bayes_estimate(1, 1, 1, 2)
bayes_estimate(534, 580, 1, 2)
range <- 0:10
data <- expand.grid(range, range, range, range)
names(data) <- c("first_up", "first_down", "second_up", "second_down")
compare_ci <- function(v) {
first <- ci_lower_bound(v[1], v[1]+v[2], confidence)
second <- ci_lower_bound(v[3], v[3]+v[4], confidence)
if (first > second) return('first')
if (first < second) return('second')
return('tie')
}
compare_bayes <- function(v) {
first <- bayes_estimate(v[1], v[1]+v[2], alpha, beta)
second <- bayes_estimate(v[3], v[3]+v[4], alpha, beta)
if (first > second) return('first')
if (first < second) return('second')
return('tie')
}
data$ci <- apply(data[, 1:4], 1, compare_ci)
data$bayes <- apply(data[, 1:4], 1, compare_bayes)
disagreement <- data[data$ci != data$bayes, ]
# You can find problems like this either way...
7 - 6
10 - 10
7 / (7+6)
10 / 20
ci_lower_bound(7, (7+6), 0.95)
ci_lower_bound(10, 20, 0.95)
bayes_estimate(7, (7+6), 1, 2)
bayes_estimate(10, 20, 1, 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment