ramhiser/schools.r

## schools.r
# SAT scores data from Table 5.2 on page 120 of Gelman's BDA3 text
y <- c(28, 8, -3, 7, -1, 1, 18, 12)
sigma <- c(15, 10, 16, 11, 9, 11, 10, 18)

# Goal: Replicate calculations in Section 5.5
# Instructions for posterior simulation given on page 118
library(itertools2)

# Equation 5.21 on page 117
tau_posterior <- function(tau, y, sigma) {
  # Note: Using p(tau) proportional to 1
  denom <- sigma^2 + tau^2
  precision <- sum(1 / denom)
  normal_mean <- sum(y / denom) / precision
  normal_var <- 1 / precision

  sqrt(normal_var) *
    prod(denom^(-1/2) * exp(-(y - normal_mean)^2 / (2 * denom)))
}
tau_posterior <- Vectorize(tau_posterior, vectorize.args="tau")

# Equation 5.20 on page 117
mu_posterior <- function(y, sigma, tau) {
  denom <- sigma^2 + tau^2
  precision <- sum(1 / denom)
  normal_mean <- sum(y / denom) / precision
  normal_sd <- precision^(-1/2)

  rnorm(n=1, mean=normal_mean, sd=normal_sd)
}
mu_posterior <- Vectorize(mu_posterior, vectorize.args="tau")

# Equation 5.17 on page 116
theta_posterior <- function(y, sigma, mu, tau) {
  denom <- sigma^-2 + tau^-2

  normal_mean <- (y / sigma^2 + mu / tau^2) / denom
  normal_sd <- denom^(-1/2)

  rnorm(n=1, mean=normal_mean, sd=normal_sd)
}
theta_posterior <- Vectorize(theta_posterior, vectorize.args=c("mu", "tau"))

# Inverse CDF method (Section 1.9) to sample tau values
# Code based on: http://stackoverflow.com/a/15332467/234233

# Interpolate the density
pdf <- approxfun(tau_grid, tau_post_grid, yleft=0, yright=0)

# Get the cdf by numeric integration
cdf <- function(x) {
  # Because the pdf (i.e., posterior for tau) is unnormalized,
  # divide the CDF by the max value
  max_cdf <- integrate(pdf, 0, max(tau_grid))$value
  integrate(pdf, 0, x)$value / max_cdf

}
# Use a root finding function to invert the cdf
inv_cdf <- function(q) {
  f_root <- function(x) {
    cdf(x) - q
  }
  uniroot(f=f_root, interval=range(tau_grid))$root
}
inv_cdf <- Vectorize(inv_cdf)

# Posterior simulation
tau_grid <- seq(0, 100, length=1000)
tau_post_grid <- tau_posterior(tau_grid, y, sigma)
plot(tau_grid, tau_post_grid, type="l")

set.seed(42)
num_draws <- 10000
tau <- inv_cdf(runif(n=num_draws))
mu <- mu_posterior(y, sigma, tau)
theta <- theta_posterior(y, sigma, mu, tau)

theta_posterior(y, sigma, mu, tau)
theta <- mapply(theta_posterior, y, sigma,
                MoreArgs=list(mu=mu, tau=tau),
                SIMPLIFY=FALSE)


# For each simulation, determine which school's program is the best along
# with the probability of each.
best_school <- apply(do.call(cbind, theta), 1, which.max)
prob_best <- table(best_school) / num_draws
prob_best

# Calculate the probability that school j's program is better than school k's
num_schools <- length(y)
prob_better <- matrix(0, nrow=num_schools, ncol=num_schools)
iter_comb <- itertools2::icombinations(seq_len(num_schools), 2)

dev_null <- lapply(iter_comb, function(comb) {
  j <- comb[1]
  k <- comb[2]

  prob_better[j, k] <<- mean(theta[[j]] > theta[[k]])
  prob_better[k, j] <<- 1 - prob_better[j, k]
})
prob_better
	# SAT scores data from Table 5.2 on page 120 of Gelman's BDA3 text
	y <- c(28, 8, -3, 7, -1, 1, 18, 12)
	sigma <- c(15, 10, 16, 11, 9, 11, 10, 18)

	# Goal: Replicate calculations in Section 5.5
	# Instructions for posterior simulation given on page 118
	library(itertools2)

	# Equation 5.21 on page 117
	tau_posterior <- function(tau, y, sigma) {
	# Note: Using p(tau) proportional to 1
	denom <- sigma^2 + tau^2
	precision <- sum(1 / denom)
	normal_mean <- sum(y / denom) / precision
	normal_var <- 1 / precision

	sqrt(normal_var) *
	prod(denom^(-1/2) * exp(-(y - normal_mean)^2 / (2 * denom)))
	}
	tau_posterior <- Vectorize(tau_posterior, vectorize.args="tau")

	# Equation 5.20 on page 117
	mu_posterior <- function(y, sigma, tau) {
	denom <- sigma^2 + tau^2
	precision <- sum(1 / denom)
	normal_mean <- sum(y / denom) / precision
	normal_sd <- precision^(-1/2)

	rnorm(n=1, mean=normal_mean, sd=normal_sd)
	}
	mu_posterior <- Vectorize(mu_posterior, vectorize.args="tau")

	# Equation 5.17 on page 116
	theta_posterior <- function(y, sigma, mu, tau) {
	denom <- sigma^-2 + tau^-2

	normal_mean <- (y / sigma^2 + mu / tau^2) / denom
	normal_sd <- denom^(-1/2)

	rnorm(n=1, mean=normal_mean, sd=normal_sd)
	}
	theta_posterior <- Vectorize(theta_posterior, vectorize.args=c("mu", "tau"))

	# Inverse CDF method (Section 1.9) to sample tau values
	# Code based on: http://stackoverflow.com/a/15332467/234233

	# Interpolate the density
	pdf <- approxfun(tau_grid, tau_post_grid, yleft=0, yright=0)

	# Get the cdf by numeric integration
	cdf <- function(x) {
	# Because the pdf (i.e., posterior for tau) is unnormalized,
	# divide the CDF by the max value
	max_cdf <- integrate(pdf, 0, max(tau_grid))$value
	integrate(pdf, 0, x)$value / max_cdf

	}
	# Use a root finding function to invert the cdf
	inv_cdf <- function(q) {
	f_root <- function(x) {
	cdf(x) - q
	}
	uniroot(f=f_root, interval=range(tau_grid))$root
	}
	inv_cdf <- Vectorize(inv_cdf)

	# Posterior simulation
	tau_grid <- seq(0, 100, length=1000)
	tau_post_grid <- tau_posterior(tau_grid, y, sigma)
	plot(tau_grid, tau_post_grid, type="l")

	set.seed(42)
	num_draws <- 10000
	tau <- inv_cdf(runif(n=num_draws))
	mu <- mu_posterior(y, sigma, tau)
	theta <- theta_posterior(y, sigma, mu, tau)

	theta_posterior(y, sigma, mu, tau)
	theta <- mapply(theta_posterior, y, sigma,
	MoreArgs=list(mu=mu, tau=tau),
	SIMPLIFY=FALSE)


	# For each simulation, determine which school's program is the best along
	# with the probability of each.
	best_school <- apply(do.call(cbind, theta), 1, which.max)
	prob_best <- table(best_school) / num_draws
	prob_best

	# Calculate the probability that school j's program is better than school k's
	num_schools <- length(y)
	prob_better <- matrix(0, nrow=num_schools, ncol=num_schools)
	iter_comb <- itertools2::icombinations(seq_len(num_schools), 2)

	dev_null <- lapply(iter_comb, function(comb) {
	j <- comb[1]
	k <- comb[2]

	prob_better[j, k] <<- mean(theta[[j]] > theta[[k]])
	prob_better[k, j] <<- 1 - prob_better[j, k]
	})
	prob_better