arraytools/delta_correlated.R

## delta_correlated.R
# Generate {(x1,y1), ..., (xn,yn)} where x1 is indep of (y2, .., yn) but (x1, y1) are correlated.
# Output var(log(xbar/ybar)) 1. sample variance by 'nsim' simulations, 2) delta method approximation

library(MASS)
# Set the sample size and number of simulations
n <- 100
nsim <- 1000

# Set the population means, standard deviations, and correlation
mu_x <- 5
mu_y <- 3
sigma_x <- 2
sigma_y <- 1
rho <- 0.5

# Initialize a vector to store the simulated values of log(X̄/Ȳ)
log_ratio <- numeric(nsim)

# Run the simulations
set.seed(123)
for (i in 1:nsim) {
  # Generate the simulated data for X and Y
  xy <- mvrnorm(n, mu = c(mu_x, mu_y), Sigma = matrix(c(sigma_x^2, rho*sigma_x*sigma_y, rho*sigma_x*sigma_y, sigma_y^2), nrow = 2))
  x <- xy[,1]
  y <- xy[,2]

  # Compute the sample means
  xbar <- mean(x)
  ybar <- mean(y)

  # Compute log(X̄/Ȳ)
  log_ratio[i] <- log(xbar/ybar)
}

# Compute the sample variance of log(X̄/Ȳ)
var_log_ratio <- var(log_ratio)

# Compute the approximate variance of log(X̄/Ȳ) using the formula
approx_var_log_ratio <- sigma_x^2/(n*mu_x^2) + sigma_y^2/(n*mu_y^2) - 2*rho*sigma_x*sigma_y/(n*mu_x*mu_y)

# Compare the sample variance and approximate variance
cat("Sample variance of log(X̄/Ȳ):", var_log_ratio, "\n")
# Sample variance of log(X̄/Ȳ): 0.001346076
cat("Approximate variance of log(X̄/Ȳ) using the formula:", approx_var_log_ratio, "\n")
# Approximate variance of log(X̄/Ȳ) using the formula: 0.001377778

# Use the last simulate data
var(x/mean(x)-y/mean(y))/n
# [1] 0.001416225
var(x/mean(x))/n + var(y/mean(y))/n # if we ignore cov, result will be incorrect
# [1] 0.002863794

## delta_uncorrelated.R
# Generate {(x1,y1), ..., (xn,yn)} where xi is indep of yj, and (xi, yi) & (xj,yj) are independent.
# Output var(log(xbar/ybar)) 1. sample variance by 'nsim' simulations, 2) delta method approximation

# Set the sample size and number of simulations
n <- 100
nsim <- 1000

# Set the population means and standard deviations
mu_x <- 5
mu_y <- 3
sigma_x <- 2
sigma_y <- 1

# Initialize a vector to store the simulated values of log(X̄/Ȳ)
log_ratio <- numeric(nsim)

# Run the simulations
set.seed(123)
for (i in 1:nsim) {
  # Generate the simulated data for X and Y
  x <- rnorm(n, mean = mu_x, sd = sigma_x)
  y <- rnorm(n, mean = mu_y, sd = sigma_y)

  # Compute the sample means
  xbar <- mean(x)
  ybar <- mean(y)

  # Compute log(X̄/Ȳ)
  log_ratio[i] <- log(xbar/ybar)
}

# Compute the sample variance of log(X̄/Ȳ)
var_log_ratio <- var(log_ratio)

# Compute the approximate variance of log(X̄/Ȳ) using the formula
approx_var_log_ratio <- sigma_x^2/(n*mu_x^2) + sigma_y^2/(n*mu_y^2)

# Compare the sample variance and approximate variance
cat("Sample variance of log(X̄/Ȳ):", var_log_ratio, "\n")
# Sample variance of log(X̄/Ȳ): 0.002478037
cat("Approximate variance of log(X̄/Ȳ) using the formula:", approx_var_log_ratio, "\n")
# Approximate variance of log(X̄/Ȳ) using the formula: 0.002711111

# Use the last simulated data
sigma2_xhat <- var(x); sigma2_yhat <- var(y)
muhat_x <- mean(x); muhat_y <- mean(y)
sigma2_xhat/(n*muhat_x^2) + sigma2_yhat/(n*muhat_y^2)
# [1] 0.002194416
	# Generate {(x1,y1), ..., (xn,yn)} where x1 is indep of (y2, .., yn) but (x1, y1) are correlated.
	# Output var(log(xbar/ybar)) 1. sample variance by 'nsim' simulations, 2) delta method approximation

	library(MASS)
	# Set the sample size and number of simulations
	n <- 100
	nsim <- 1000

	# Set the population means, standard deviations, and correlation
	mu_x <- 5
	mu_y <- 3
	sigma_x <- 2
	sigma_y <- 1
	rho <- 0.5

	# Initialize a vector to store the simulated values of log(X̄/Ȳ)
	log_ratio <- numeric(nsim)

	# Run the simulations
	set.seed(123)
	for (i in 1:nsim) {
	# Generate the simulated data for X and Y
	xy <- mvrnorm(n, mu = c(mu_x, mu_y), Sigma = matrix(c(sigma_x^2, rhosigma_xsigma_y, rhosigma_xsigma_y, sigma_y^2), nrow = 2))
	x <- xy[,1]
	y <- xy[,2]

	# Compute the sample means
	xbar <- mean(x)
	ybar <- mean(y)

	# Compute log(X̄/Ȳ)
	log_ratio[i] <- log(xbar/ybar)
	}

	# Compute the sample variance of log(X̄/Ȳ)
	var_log_ratio <- var(log_ratio)

	# Compute the approximate variance of log(X̄/Ȳ) using the formula
	approx_var_log_ratio <- sigma_x^2/(nmu_x^2) + sigma_y^2/(nmu_y^2) - 2rhosigma_xsigma_y/(nmu_x*mu_y)

	# Compare the sample variance and approximate variance
	cat("Sample variance of log(X̄/Ȳ):", var_log_ratio, "\n")
	# Sample variance of log(X̄/Ȳ): 0.001346076
	cat("Approximate variance of log(X̄/Ȳ) using the formula:", approx_var_log_ratio, "\n")
	# Approximate variance of log(X̄/Ȳ) using the formula: 0.001377778

	# Use the last simulate data
	var(x/mean(x)-y/mean(y))/n
	# [1] 0.001416225
	var(x/mean(x))/n + var(y/mean(y))/n # if we ignore cov, result will be incorrect
	# [1] 0.002863794