turgeonmaxime/t-test_outliers.md

## t-test_outliers.md

      
    Raw
  

              t-test_outliers.md
            
          
    B <- 1000
n <- 20
sigma <- 10
p <- 0.9
alpha <- 0.05

results <- replicate(B, {
    norm_vars1 <- rnorm(n)
    # Contaminated normal
    sigmas <- sample(c(1, sigma), n, replace = TRUE,
                     prob = c(p, 1 - p))
    norm_vars2 <- rnorm(n, sd = sigmas)
    
    output <- t.test(norm_vars1, norm_vars2)
    
    return(c(output$statistic,
             output$parameter, 
             "p.value" = output$p.value))
})

# Type I error rate
mean(results["p.value",] < alpha)
#> [1] 0.019

# Sampling distribution under H0
hist(results["t",], breaks = 30, freq = FALSE)

# Theoretical density
# Note: the DFs depend on variance,
#       so we take the mean as an approximation
xvars <- seq(-3, 3, length.out = 100)
lines(xvars, dt(xvars, df = mean(results["df",])), 
      col = "blue")

# Or as a QQ-plot
qqplot(results["t",], 
       qt(ppoints(B), df = mean(results["df",])),
       xlab = "Samples", ylab = "Theory")
abline(a = 0, b = 1)

^{Created on 2021-10-28 by the reprex package (v2.0.0)}