ajschumacher/scoring.R

## scoring.R

par(pty='s')
par(mfrow=c(1, 2))

ci_lower_bound <- function(pos, n, confidence) {
  if (n == 0) return(0)
  z = qnorm(1 - (1 - confidence) / 2)
  p = pos / n
  (p + z^2 / (2*n) - z * sqrt((p * (1-p) + z^2 / (4*n)) / n )) /
    (1 + z^2 / n)
}

bayes_estimate <- function(pos, n, alpha, beta) {
  (pos + alpha) / (n + beta)
}


confidence <- 0.95
alpha <- 1
beta <- 2
colors <- 200


make_plot <- function(range) { # function wrapping start

ci = expand.grid(range, range)
names(ci) <- c("pos", "neg")
ci$score <- apply(ci, 1, function(v) {
  return(ci_lower_bound(v[1], sum(v), confidence))
})
ci_matrix <- matrix(ci$score, ncol=length(range))
image(range, range, t(ci_matrix),
      breaks=0:colors/colors,
      col=colorRampPalette(c("blue", "white", "red"))(colors),
      xlab="downvotes",
      ylab="upvotes",
      main=expression("Wilson 95% interval lower bound"))

bayes = expand.grid(range, range)
names(bayes) <- c("pos", "neg")
bayes$score <- apply(bayes, 1, function(v) {
  return(bayes_estimate(v[1], sum(v), alpha, beta))
})
bayes_matrix <- matrix(bayes$score, ncol=length(range))
image(range, range, t(bayes_matrix),
      breaks=0:colors/colors,
      col=colorRampPalette(c("blue", "white", "red"))(colors),
      xlab="downvotes",
      ylab="upvotes",
      main=expression("Laplace Smoothing," ~ alpha==1 * ", " ~ beta==2))

} # end make_plot function wrapping

# plots to by 800x460
make_plot(0:200) # plot1
make_plot(0:10) # plot2


make_plot_2 <- function(down, uprange) { # function wrapping start

plot(sapply(uprange, function(x) ci_lower_bound(x, x+down, confidence)) ~ uprange, ylim=c(0,1), ylab="Wilson 95% interval lower bound", xlab=paste("upvotes (against", down, "downvotes)"))
plot(sapply(uprange, function(x) bayes_estimate(x, x+down, alpha, beta)) ~ uprange, ylim=c(0,1), ylab=expression("Laplace Smoothing," ~ alpha==1 * ", " ~ beta==2), xlab=paste("upvotes (against", down, "downvotes)"))

} # end make_plot_2 function wrapping

make_plot_2(10, 0:100) # plot3

# numbers for post
209 - 50
118 - 25
209 / (209+50)
118 / (118+25)
ci_lower_bound(209, 209+50, 0.95)
ci_lower_bound(118, 118+25, 0.95)
bayes_estimate(209, 209+50, 1, 2)
bayes_estimate(118, 118+25, 1, 2)
1 - 0
534 - 46
1 / 1
534 / 580
ci_lower_bound(1, 1, 0.95)
ci_lower_bound(534, 580, 0.95)
bayes_estimate(1, 1, 1, 2)
bayes_estimate(534, 580, 1, 2)

range <- 0:10
data <- expand.grid(range, range, range, range)
names(data) <- c("first_up", "first_down", "second_up", "second_down")
compare_ci <- function(v) {
  first <- ci_lower_bound(v[1], v[1]+v[2], confidence)
  second <- ci_lower_bound(v[3], v[3]+v[4], confidence)
  if (first > second) return('first')
  if (first < second) return('second')
  return('tie')
}
compare_bayes <- function(v) {
  first <- bayes_estimate(v[1], v[1]+v[2], alpha, beta)
  second <- bayes_estimate(v[3], v[3]+v[4], alpha, beta)
  if (first > second) return('first')
  if (first < second) return('second')
  return('tie')
}
data$ci <- apply(data[, 1:4], 1, compare_ci)
data$bayes <- apply(data[, 1:4], 1, compare_bayes)
disagreement <- data[data$ci != data$bayes, ]

# You can find problems like this either way...
7 - 6
10 - 10
7 / (7+6)
10 / 20
ci_lower_bound(7, (7+6), 0.95)
ci_lower_bound(10, 20, 0.95)
bayes_estimate(7, (7+6), 1, 2)
bayes_estimate(10, 20, 1, 2)

	par(pty='s')
	par(mfrow=c(1, 2))

	ci_lower_bound <- function(pos, n, confidence) {
	if (n == 0) return(0)
	z = qnorm(1 - (1 - confidence) / 2)
	p = pos / n
	(p + z^2 / (2n) - z sqrt((p * (1-p) + z^2 / (4*n)) / n )) /
	(1 + z^2 / n)
	}

	bayes_estimate <- function(pos, n, alpha, beta) {
	(pos + alpha) / (n + beta)
	}


	confidence <- 0.95
	alpha <- 1
	beta <- 2
	colors <- 200


	make_plot <- function(range) { # function wrapping start

	ci = expand.grid(range, range)
	names(ci) <- c("pos", "neg")
	ci$score <- apply(ci, 1, function(v) {
	return(ci_lower_bound(v[1], sum(v), confidence))
	})
	ci_matrix <- matrix(ci$score, ncol=length(range))
	image(range, range, t(ci_matrix),
	breaks=0:colors/colors,
	col=colorRampPalette(c("blue", "white", "red"))(colors),
	xlab="downvotes",
	ylab="upvotes",
	main=expression("Wilson 95% interval lower bound"))

	bayes = expand.grid(range, range)
	names(bayes) <- c("pos", "neg")
	bayes$score <- apply(bayes, 1, function(v) {
	return(bayes_estimate(v[1], sum(v), alpha, beta))
	})
	bayes_matrix <- matrix(bayes$score, ncol=length(range))
	image(range, range, t(bayes_matrix),
	breaks=0:colors/colors,
	col=colorRampPalette(c("blue", "white", "red"))(colors),
	xlab="downvotes",
	ylab="upvotes",
	main=expression("Laplace Smoothing," ~ alpha==1 * ", " ~ beta==2))

	} # end make_plot function wrapping

	# plots to by 800x460
	make_plot(0:200) # plot1
	make_plot(0:10) # plot2


	make_plot_2 <- function(down, uprange) { # function wrapping start

	plot(sapply(uprange, function(x) ci_lower_bound(x, x+down, confidence)) ~ uprange, ylim=c(0,1), ylab="Wilson 95% interval lower bound", xlab=paste("upvotes (against", down, "downvotes)"))
	plot(sapply(uprange, function(x) bayes_estimate(x, x+down, alpha, beta)) ~ uprange, ylim=c(0,1), ylab=expression("Laplace Smoothing," ~ alpha==1 * ", " ~ beta==2), xlab=paste("upvotes (against", down, "downvotes)"))

	} # end make_plot_2 function wrapping

	make_plot_2(10, 0:100) # plot3

	# numbers for post
	209 - 50
	118 - 25
	209 / (209+50)
	118 / (118+25)
	ci_lower_bound(209, 209+50, 0.95)
	ci_lower_bound(118, 118+25, 0.95)
	bayes_estimate(209, 209+50, 1, 2)
	bayes_estimate(118, 118+25, 1, 2)
	1 - 0
	534 - 46
	1 / 1
	534 / 580
	ci_lower_bound(1, 1, 0.95)
	ci_lower_bound(534, 580, 0.95)
	bayes_estimate(1, 1, 1, 2)
	bayes_estimate(534, 580, 1, 2)

	range <- 0:10
	data <- expand.grid(range, range, range, range)
	names(data) <- c("first_up", "first_down", "second_up", "second_down")
	compare_ci <- function(v) {
	first <- ci_lower_bound(v[1], v[1]+v[2], confidence)
	second <- ci_lower_bound(v[3], v[3]+v[4], confidence)
	if (first > second) return('first')
	if (first < second) return('second')
	return('tie')
	}
	compare_bayes <- function(v) {
	first <- bayes_estimate(v[1], v[1]+v[2], alpha, beta)
	second <- bayes_estimate(v[3], v[3]+v[4], alpha, beta)
	if (first > second) return('first')
	if (first < second) return('second')
	return('tie')
	}
	data$ci <- apply(data[, 1:4], 1, compare_ci)
	data$bayes <- apply(data[, 1:4], 1, compare_bayes)
	disagreement <- data[data$ci != data$bayes, ]

	# You can find problems like this either way...
	7 - 6
	10 - 10
	7 / (7+6)
	10 / 20
	ci_lower_bound(7, (7+6), 0.95)
	ci_lower_bound(10, 20, 0.95)
	bayes_estimate(7, (7+6), 1, 2)
	bayes_estimate(10, 20, 1, 2)