vankesteren/proportion_intervals.R

## proportion_intervals.R
# Different 95% uncertainty intervals for a proportion
dat <- c(rep(0, 38), rep(1, 2))

# normal approximation on probability scale
ci_normal <- function(dat) {
  mu <- mean(dat)
  se <- sqrt(mu * (1 - mu) / length(dat))
  return(c(
    "2.5 %"  = mu + qnorm(0.025)*s_normal,
    "97.5 %" = mu + qnorm(0.975)*se
  ))
}

# normal approx. on log-odds
ci_logodds <- function(dat) {
  p  <- mean(dat)
  mu <- log(p/(1-p))
  se <- sqrt(1 / (length(dat)*p*(1-p))) # https://stats.stackexchange.com/a/266116/116878
  ci <- c(
    "2.5 %"  = mu + qnorm(0.025)*se,
    "97.5 %" = mu + qnorm(0.975)*se
  )
  return(1/(1 + exp(-ci)))
}

# logistic regression intercept with profile likelihood
ci_profile <- function(dat) {
  fit <- glm(dat~1, "binomial")
  return(1/(1 + exp(-confint(fit))))
}


# jeffrey's prior bayesian method
ci_bayes <- function(dat) qbeta(
  p = c(0.025, 0.975),
  shape1 = sum(dat) + 0.5,
  shape2 = sum(1 - dat) + 0.5
)

# by simulation
ci_empirical <- function(dat, nsim = 10000) {
  edist <- replicate(
    n = nsim,
    expr = mean(rbinom(length(dat), 1, prob = mean(dat)))
  )
  return(quantile(edist, c(0.025, 0.975)))
}

# let's compare
rbind(
  normal    = ci_normal(dat),
  logodds   = ci_logodds(dat),
  profile   = ci_profile(dat),
  bayes     = ci_bayes(dat),
  empirical = ci_empirical(dat)
)

# and just to check that they converge:
# now the same thing "asymptotically"
dat <- c(rep(0, 38000), rep(1, 2000))

rbind(
  normal    = ci_normal(dat),
  logodds   = ci_logodds(dat),
  profile   = ci_profile(dat),
  bayes     = ci_bayes(dat),
  empirical = ci_empirical(dat)
)
	# Different 95% uncertainty intervals for a proportion
	dat <- c(rep(0, 38), rep(1, 2))

	# normal approximation on probability scale
	ci_normal <- function(dat) {
	mu <- mean(dat)
	se <- sqrt(mu * (1 - mu) / length(dat))
	return(c(
	"2.5 %" = mu + qnorm(0.025)*s_normal,
	"97.5 %" = mu + qnorm(0.975)*se
	))
	}

	# normal approx. on log-odds
	ci_logodds <- function(dat) {
	p <- mean(dat)
	mu <- log(p/(1-p))
	se <- sqrt(1 / (length(dat)p(1-p))) # https://stats.stackexchange.com/a/266116/116878
	ci <- c(
	"2.5 %" = mu + qnorm(0.025)*se,
	"97.5 %" = mu + qnorm(0.975)*se
	)
	return(1/(1 + exp(-ci)))
	}

	# logistic regression intercept with profile likelihood
	ci_profile <- function(dat) {
	fit <- glm(dat~1, "binomial")
	return(1/(1 + exp(-confint(fit))))
	}


	# jeffrey's prior bayesian method
	ci_bayes <- function(dat) qbeta(
	p = c(0.025, 0.975),
	shape1 = sum(dat) + 0.5,
	shape2 = sum(1 - dat) + 0.5
	)

	# by simulation
	ci_empirical <- function(dat, nsim = 10000) {
	edist <- replicate(
	n = nsim,
	expr = mean(rbinom(length(dat), 1, prob = mean(dat)))
	)
	return(quantile(edist, c(0.025, 0.975)))
	}

	# let's compare
	rbind(
	normal = ci_normal(dat),
	logodds = ci_logodds(dat),
	profile = ci_profile(dat),
	bayes = ci_bayes(dat),
	empirical = ci_empirical(dat)
	)

	# and just to check that they converge:
	# now the same thing "asymptotically"
	dat <- c(rep(0, 38000), rep(1, 2000))

	rbind(
	normal = ci_normal(dat),
	logodds = ci_logodds(dat),
	profile = ci_profile(dat),
	bayes = ci_bayes(dat),
	empirical = ci_empirical(dat)
	)