olooney/sim.R

## sim.R
library(MASS)
library(ggplot2)

results <- NULL

# Generate simulated data
for ( seed in 1:30 ) {
set.seed(seed+42)
mu = c(0,-1+seed/10)
Sigma = matrix(c(1.5,-.5,-.5,.7),2,2)
#Sigma = matrix(c(0.5,0.2,0.2,1),2,2)
X = mvrnorm(10000000,mu,Sigma)

# Calculate pairwise minimums
mins = pmin(X[,1],X[,2])

# Set minumum
#m = 0.5
m = 1

# Create subsets for conditional statements
epsilon = 0.0001

# approximate the conditions with a narrow strip
dex = mins < m + epsilon & mins > m - epsilon # min(x,y) = m
dexx = X[,1] < m + epsilon & X[,1] > m - epsilon  # x = m
dexy = X[,2] < m + epsilon & X[,2] > m - epsilon  # y = m

# conditional means
muxy = mu[1]+Sigma[1,2]*(m-mu[2])/Sigma[2,2]
muyx = mu[2]+Sigma[1,2]*(m-mu[1])/Sigma[1,1]

# conditional variances
s2xy = Sigma[1,1] - Sigma[1,2]^2/Sigma[2,2]
s2yx = Sigma[2,2] - Sigma[1,2]^2/Sigma[1,1]

# conditional z-scores
zxy = (muxy-m)/sqrt(s2xy)
zyx = (muyx-m)/sqrt(s2yx)

# Check (1)
#mean(X[dex,1]>X[dex,2])
practice <- mean(X[dexx,2]>m)/(mean(X[dexx,2]>m)+mean(X[dexy,1]>m))
# Check (7)
#theory <- (1-pnorm(zxy))/(2-pnorm(zxy)-pnorm(zyx))
theory <- 1- (pnorm(zxy))/(pnorm(zxy)+pnorm(zyx))

if ( seed == 1 ) {
  n = 1000 # only show n points on the charts

  # scatter plot
  ggplot( data.frame(x=X[1:n,1], y=X[1:n,2]), aes(x,y)) +
    theme_bw() +
    theme(plot.title = element_text(hjust = 0.5)) +
    ggtitle("P(X<Y|min(X,Y)=m)") +
    coord_fixed() +
    ylim(-2, 6) +
    xlim(-2, 6) +
    geom_hline(yintercept=0) +
    geom_vline(xintercept=0) +
    geom_point(alpha=0.3) +
    geom_segment(x=m, y=m, xend=7, yend=m, color='red') +
    geom_segment(x=m, y=m, xend=m, yend=7, color='red')

  # density estimates for both conditionals (*not* truncated)
  ggplot( data.frame(y_given_xm=X[dexx,2])) +
    geom_density(aes(y_given_xm), color='red') +
    geom_density(
      data=data.frame(x_given_ym=X[dexy, 1]),
      aes(x_given_ym), color='blue') +
    theme_bw()
}

results <- rbind(results, data.frame(theory=theory, practice=practice))
}
print(results)
plot(results, ylim=c(0,1), xlim=c(0,1))
abline(a=0, b=1)
title("Theory vs. Practice")
	library(MASS)
	library(ggplot2)

	results <- NULL

	# Generate simulated data
	for ( seed in 1:30 ) {
	set.seed(seed+42)
	mu = c(0,-1+seed/10)
	Sigma = matrix(c(1.5,-.5,-.5,.7),2,2)
	#Sigma = matrix(c(0.5,0.2,0.2,1),2,2)
	X = mvrnorm(10000000,mu,Sigma)

	# Calculate pairwise minimums
	mins = pmin(X[,1],X[,2])

	# Set minumum
	#m = 0.5
	m = 1

	# Create subsets for conditional statements
	epsilon = 0.0001

	# approximate the conditions with a narrow strip
	dex = mins < m + epsilon & mins > m - epsilon # min(x,y) = m
	dexx = X[,1] < m + epsilon & X[,1] > m - epsilon # x = m
	dexy = X[,2] < m + epsilon & X[,2] > m - epsilon # y = m

	# conditional means
	muxy = mu[1]+Sigma[1,2]*(m-mu[2])/Sigma[2,2]
	muyx = mu[2]+Sigma[1,2]*(m-mu[1])/Sigma[1,1]

	# conditional variances
	s2xy = Sigma[1,1] - Sigma[1,2]^2/Sigma[2,2]
	s2yx = Sigma[2,2] - Sigma[1,2]^2/Sigma[1,1]

	# conditional z-scores
	zxy = (muxy-m)/sqrt(s2xy)
	zyx = (muyx-m)/sqrt(s2yx)

	# Check (1)
	#mean(X[dex,1]>X[dex,2])
	practice <- mean(X[dexx,2]>m)/(mean(X[dexx,2]>m)+mean(X[dexy,1]>m))
	# Check (7)
	#theory <- (1-pnorm(zxy))/(2-pnorm(zxy)-pnorm(zyx))
	theory <- 1- (pnorm(zxy))/(pnorm(zxy)+pnorm(zyx))

	if ( seed == 1 ) {
	n = 1000 # only show n points on the charts

	# scatter plot
	ggplot( data.frame(x=X[1:n,1], y=X[1:n,2]), aes(x,y)) +
	theme_bw() +
	theme(plot.title = element_text(hjust = 0.5)) +
	ggtitle("P(X<Y\|min(X,Y)=m)") +
	coord_fixed() +
	ylim(-2, 6) +
	xlim(-2, 6) +
	geom_hline(yintercept=0) +
	geom_vline(xintercept=0) +
	geom_point(alpha=0.3) +
	geom_segment(x=m, y=m, xend=7, yend=m, color='red') +
	geom_segment(x=m, y=m, xend=m, yend=7, color='red')

	# density estimates for both conditionals (not truncated)
	ggplot( data.frame(y_given_xm=X[dexx,2])) +
	geom_density(aes(y_given_xm), color='red') +
	geom_density(
	data=data.frame(x_given_ym=X[dexy, 1]),
	aes(x_given_ym), color='blue') +
	theme_bw()
	}

	results <- rbind(results, data.frame(theory=theory, practice=practice))
	}
	print(results)
	plot(results, ylim=c(0,1), xlim=c(0,1))
	abline(a=0, b=1)
	title("Theory vs. Practice")