Skip to content

Instantly share code, notes, and snippets.

@olooney
Forked from mikeguggis/sim.R
Created July 9, 2018 18:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save olooney/e88a66d2d2fa7f2f0cd0d0dd6b708739 to your computer and use it in GitHub Desktop.
Save olooney/e88a66d2d2fa7f2f0cd0d0dd6b708739 to your computer and use it in GitHub Desktop.
Stack Exchange
library(MASS)
library(ggplot2)
results <- NULL
# Generate simulated data
for ( seed in 1:30 ) {
set.seed(seed+42)
mu = c(0,-1+seed/10)
Sigma = matrix(c(1.5,-.5,-.5,.7),2,2)
#Sigma = matrix(c(0.5,0.2,0.2,1),2,2)
X = mvrnorm(10000000,mu,Sigma)
# Calculate pairwise minimums
mins = pmin(X[,1],X[,2])
# Set minumum
#m = 0.5
m = 1
# Create subsets for conditional statements
epsilon = 0.0001
# approximate the conditions with a narrow strip
dex = mins < m + epsilon & mins > m - epsilon # min(x,y) = m
dexx = X[,1] < m + epsilon & X[,1] > m - epsilon # x = m
dexy = X[,2] < m + epsilon & X[,2] > m - epsilon # y = m
# conditional means
muxy = mu[1]+Sigma[1,2]*(m-mu[2])/Sigma[2,2]
muyx = mu[2]+Sigma[1,2]*(m-mu[1])/Sigma[1,1]
# conditional variances
s2xy = Sigma[1,1] - Sigma[1,2]^2/Sigma[2,2]
s2yx = Sigma[2,2] - Sigma[1,2]^2/Sigma[1,1]
# conditional z-scores
zxy = (muxy-m)/sqrt(s2xy)
zyx = (muyx-m)/sqrt(s2yx)
# Check (1)
#mean(X[dex,1]>X[dex,2])
practice <- mean(X[dexx,2]>m)/(mean(X[dexx,2]>m)+mean(X[dexy,1]>m))
# Check (7)
#theory <- (1-pnorm(zxy))/(2-pnorm(zxy)-pnorm(zyx))
theory <- 1- (pnorm(zxy))/(pnorm(zxy)+pnorm(zyx))
if ( seed == 1 ) {
n = 1000 # only show n points on the charts
# scatter plot
ggplot( data.frame(x=X[1:n,1], y=X[1:n,2]), aes(x,y)) +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5)) +
ggtitle("P(X<Y|min(X,Y)=m)") +
coord_fixed() +
ylim(-2, 6) +
xlim(-2, 6) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0) +
geom_point(alpha=0.3) +
geom_segment(x=m, y=m, xend=7, yend=m, color='red') +
geom_segment(x=m, y=m, xend=m, yend=7, color='red')
# density estimates for both conditionals (*not* truncated)
ggplot( data.frame(y_given_xm=X[dexx,2])) +
geom_density(aes(y_given_xm), color='red') +
geom_density(
data=data.frame(x_given_ym=X[dexy, 1]),
aes(x_given_ym), color='blue') +
theme_bw()
}
results <- rbind(results, data.frame(theory=theory, practice=practice))
}
print(results)
plot(results, ylim=c(0,1), xlim=c(0,1))
abline(a=0, b=1)
title("Theory vs. Practice")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment