djhurio/Optimal Sample Allocation

## Optimal Sample Allocation
# Optimal Sample Allocation in case of non-response

require(ggplot2)

# 3D Scatterplot
require(rgl)

require(foreach)
require(doMC)
registerDoMC(cores = detectCores())


# Expected variance
exp.var <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
  m <- n * R
  sum(N^2 * (1 - m / N) * S^2 / m)
}

# Optimal allocation with linear equation system
ROSA <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
  H <- length(N)

  NS <- N * S

  m <- rep(1, H) %o% sqrt(R)
  diag(m) <- diag(m) * (1 - sum(NS) / NS)
  m[H, ] <- 1

  b <- c(rep(0, H-1), n)

  return(solve(m, b))
}

n <- 30

N <- c(100, 200)
S <- c(.2, .8)
R <- c(.2, .8)

# Neyman allocation
n_NA <- n * N * S / sum(N * S)
n_NA

n_opt <- n * N * S / sqrt(R) / sum(N * S / sqrt(R))
n_opt

ROSA(n = n, N = N, S = S, R = R)

exp.var(n_NA, N, S, R)
exp.var(n_opt, N, S, R)


# 2 strata ####

test.alloc <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
  n_NA <- n * N * S / sum(N * S)
  n_o1 <- n * N * S / R / sum(N * S / R)
  n_o2 <- n * N * S / sqrt(R) / sum(N * S / sqrt(R))

  n_NA <- round(n_NA)
  n_o1 <- round(n_o1)
  n_o2 <- round(n_o2)

  n1 <- 1:(n-1)
  v <- sapply(n1, function(x) exp.var(n = c(x, n-x), N = N, S = S, R = R))

  cat("Neym Allocation:", n_NA,
      "variance:", exp.var(n_NA, N, S, R), "\n")
  cat("Opt1 Allocation:", n_o1,
      "variance:", exp.var(n_o1, N, S, R), "\n")
  cat("Opt2 Allocation:", n_o2,
      "variance:", exp.var(n_o2, N, S, R), "\n")

  n1_oX <- n1[v == min(v)]
  n_oX <- c(n1_oX, n - n1_oX)

  cat("OptX Allocation:", n_oX,
      "variance:", exp.var(n_oX, N, S, R), "\n")

  qplot(n1, v, geom = "line") +
    geom_point(colour = 1 + as.integer(v == min(v))) +
    geom_vline(xintercept = n_NA[1], colour = "green") +
    geom_vline(xintercept = n_o1[1], colour = "blue") +
    geom_vline(xintercept = n_o2[1], colour = "red") +
    geom_vline(xintercept = n_oX[1], colour = "red", linetype = "dashed") +
    theme_bw()
}

test.alloc(n = 50, N = c(100, 200))
test.alloc(n = 50, N = c(100, 200), S = c(.1, .2))

test.alloc(n = 50, N = c(100, 200), R = c(.2, .8))
test.alloc(n = 50, N = c(100, 200), R = c(.4, .6))
test.alloc(n = 50, N = c(100, 200), R = c(.5, 1))

test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .2), R = c(.1, .9))
test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .5), R = c(.1, .9))
test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .9), R = c(.1, .9))

test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .2), R = c(.9, .1))
test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .5), R = c(.9, .1))


# 3 strata ####

test.alloc.3 <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
  n_NA <- round(n * N * S / sum(N * S))
  n_o1 <- round(n * N * S / R / sum(N * S / R))
  n_o2 <- round(n * N * S / sqrt(R) / sum(N * S / sqrt(R)))

  df <- foreach(n1 = 1:(n-1), .combine = "rbind") %:%
    foreach(n2 = 1:(n-1), .combine = "rbind") %:% when(n1 + n2 < n) %dopar% {
      n3 <- n - n1 - n2
      v <- exp.var(n = c(n1, n2, n3), N = N, S = S, R = R)
      data.frame(n1 = n1, n2 = n2, n3 = n3, v = v)
    }

  cat("Neym Allocation:", n_NA,
      "variance:", exp.var(n_NA, N, S, R), "\n")
  cat("Opt1 Allocation:", n_o1,
      "variance:", exp.var(n_o1, N, S, R), "\n")
  cat("Opt2 Allocation:", n_o2,
      "variance:", exp.var(n_o2, N, S, R), "\n")

  n_oX <- as.numeric(df[df$v == min(df$v), 1:3])

  cat("OptX Allocation:", n_oX,
      "variance:", exp.var(n_oX, N, S, R), "\n")

  # scatterplot3d(df$n1, df$n2, df$v)
  plot3d(df$n1, df$n2, df$v, col="red", size=3)
}

test.alloc.3(n = 50, N = c(100, 200, 300))
test.alloc.3(n = 50, N = c(100, 200, 300), S = c(.1, .2, .3))

test.alloc.3(n = 50, N = c(100, 200, 300), R = c(.2, .8, .5))
test.alloc.3(n = 50, N = c(100, 200, 300), R = c(.4, .6, .9))
test.alloc.3(n = 50, N = c(100, 200, 300), R = c(.5, 1, .2))

test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .2, .3),
             R = c(.1, .9, .5))
test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .5, .9),
             R = c(.1, .9, .5))
test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .9, .9),
             R = c(.1, .9, .5))

test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .2, .3),
             R = c(.9, .5, .1))
test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .5, .9),
             R = c(.9, .5, .1))

test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.7, .8, .9),
             R = c(.9, .5, .1))
test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.9, .9, .9),
             R = c(.6, .5, .4))
	# Optimal Sample Allocation in case of non-response

	require(ggplot2)

	# 3D Scatterplot
	require(rgl)

	require(foreach)
	require(doMC)
	registerDoMC(cores = detectCores())


	# Expected variance
	exp.var <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
	m <- n * R
	sum(N^2 * (1 - m / N) * S^2 / m)
	}

	# Optimal allocation with linear equation system
	ROSA <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
	H <- length(N)

	NS <- N * S

	m <- rep(1, H) %o% sqrt(R)
	diag(m) <- diag(m) * (1 - sum(NS) / NS)
	m[H, ] <- 1

	b <- c(rep(0, H-1), n)

	return(solve(m, b))
	}

	n <- 30

	N <- c(100, 200)
	S <- c(.2, .8)
	R <- c(.2, .8)

	# Neyman allocation
	n_NA <- n * N * S / sum(N * S)
	n_NA

	n_opt <- n * N * S / sqrt(R) / sum(N * S / sqrt(R))
	n_opt

	ROSA(n = n, N = N, S = S, R = R)

	exp.var(n_NA, N, S, R)
	exp.var(n_opt, N, S, R)


	# 2 strata ####

	test.alloc <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
	n_NA <- n * N * S / sum(N * S)
	n_o1 <- n * N * S / R / sum(N * S / R)
	n_o2 <- n * N * S / sqrt(R) / sum(N * S / sqrt(R))

	n_NA <- round(n_NA)
	n_o1 <- round(n_o1)
	n_o2 <- round(n_o2)

	n1 <- 1:(n-1)
	v <- sapply(n1, function(x) exp.var(n = c(x, n-x), N = N, S = S, R = R))

	cat("Neym Allocation:", n_NA,
	"variance:", exp.var(n_NA, N, S, R), "\n")
	cat("Opt1 Allocation:", n_o1,
	"variance:", exp.var(n_o1, N, S, R), "\n")
	cat("Opt2 Allocation:", n_o2,
	"variance:", exp.var(n_o2, N, S, R), "\n")

	n1_oX <- n1[v == min(v)]
	n_oX <- c(n1_oX, n - n1_oX)

	cat("OptX Allocation:", n_oX,
	"variance:", exp.var(n_oX, N, S, R), "\n")

	qplot(n1, v, geom = "line") +
	geom_point(colour = 1 + as.integer(v == min(v))) +
	geom_vline(xintercept = n_NA[1], colour = "green") +
	geom_vline(xintercept = n_o1[1], colour = "blue") +
	geom_vline(xintercept = n_o2[1], colour = "red") +
	geom_vline(xintercept = n_oX[1], colour = "red", linetype = "dashed") +
	theme_bw()
	}

	test.alloc(n = 50, N = c(100, 200))
	test.alloc(n = 50, N = c(100, 200), S = c(.1, .2))

	test.alloc(n = 50, N = c(100, 200), R = c(.2, .8))
	test.alloc(n = 50, N = c(100, 200), R = c(.4, .6))
	test.alloc(n = 50, N = c(100, 200), R = c(.5, 1))

	test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .2), R = c(.1, .9))
	test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .5), R = c(.1, .9))
	test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .9), R = c(.1, .9))

	test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .2), R = c(.9, .1))
	test.alloc(n = 100, N = c(1000, 2000), S = c(.1, .5), R = c(.9, .1))


	# 3 strata ####

	test.alloc.3 <- function(n, N, S = rep(1, length(N)), R = rep(1, length(N))) {
	n_NA <- round(n * N * S / sum(N * S))
	n_o1 <- round(n * N * S / R / sum(N * S / R))
	n_o2 <- round(n * N * S / sqrt(R) / sum(N * S / sqrt(R)))

	df <- foreach(n1 = 1:(n-1), .combine = "rbind") %:%
	foreach(n2 = 1:(n-1), .combine = "rbind") %:% when(n1 + n2 < n) %dopar% {
	n3 <- n - n1 - n2
	v <- exp.var(n = c(n1, n2, n3), N = N, S = S, R = R)
	data.frame(n1 = n1, n2 = n2, n3 = n3, v = v)
	}

	cat("Neym Allocation:", n_NA,
	"variance:", exp.var(n_NA, N, S, R), "\n")
	cat("Opt1 Allocation:", n_o1,
	"variance:", exp.var(n_o1, N, S, R), "\n")
	cat("Opt2 Allocation:", n_o2,
	"variance:", exp.var(n_o2, N, S, R), "\n")

	n_oX <- as.numeric(df[df$v == min(df$v), 1:3])

	cat("OptX Allocation:", n_oX,
	"variance:", exp.var(n_oX, N, S, R), "\n")

	# scatterplot3d(df$n1, df$n2, df$v)
	plot3d(df$n1, df$n2, df$v, col="red", size=3)
	}

	test.alloc.3(n = 50, N = c(100, 200, 300))
	test.alloc.3(n = 50, N = c(100, 200, 300), S = c(.1, .2, .3))

	test.alloc.3(n = 50, N = c(100, 200, 300), R = c(.2, .8, .5))
	test.alloc.3(n = 50, N = c(100, 200, 300), R = c(.4, .6, .9))
	test.alloc.3(n = 50, N = c(100, 200, 300), R = c(.5, 1, .2))

	test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .2, .3),
	R = c(.1, .9, .5))
	test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .5, .9),
	R = c(.1, .9, .5))
	test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .9, .9),
	R = c(.1, .9, .5))

	test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .2, .3),
	R = c(.9, .5, .1))
	test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.1, .5, .9),
	R = c(.9, .5, .1))

	test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.7, .8, .9),
	R = c(.9, .5, .1))
	test.alloc.3(n = 100, N = c(100, 200, 300), S = c(.9, .9, .9),
	R = c(.6, .5, .4))