Created
January 8, 2018 17:34
-
-
Save adambear91/c8481df6bd9106a6b497ca9c26a7fd3f to your computer and use it in GitHub Desktop.
Calculates p-values in censored data sets using OLS and Tobit regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(ggplot2) | |
require(GGally) | |
require(VGAM) | |
# Function to create a simulated data set and run models on it | |
oneRun <- function(number_of_obs,eff_IV1,eff_IV2,baseline,sd,lowerCutoff,upperCutoff,roundOn){ | |
# partition data into 4 conditions, specified by IV1 and IV2 | |
partition <- number_of_obs /4 | |
df <- expand.grid(IV1 = c(T,F), IV2 = c(T,F)) | |
df <- df[rep(row.names(df), partition), 1:2] | |
# sample uncensored values from normal distribution | |
df$DV_UC <- rnorm(number_of_obs, mean = baseline, sd = sd) | |
df$DV_UC <- eff_IV1*df$IV1 + eff_IV2*df$IV2 + df$DV_UC | |
# create censored variable with upper and lower cutoffs | |
df$DV_C <-df$DV_UC | |
df$DV_C[df$DV_C>upperCutoff] <- upperCutoff | |
df$DV_C[df$DV_C<lowerCutoff] <- lowerCutoff | |
# if rounding is on, round values to nearest integer | |
if (roundOn) { | |
df$DV_C <- round(df$DV_C) | |
} | |
# Run OLS model and store interaction p-value | |
fit1 <- lm(DV_C ~ IV1 * IV2, data=df) | |
pOLS <- summary(fit1)$coefficients[[16]] | |
# Run Tobit model and store interaction p-value | |
fit2 <- vglm(DV_C ~ IV1 * IV2, data=df,family=tobit(Upper = upperCutoff, Lower = lowerCutoff)) | |
ctable <- coef(summary(fit2)) | |
pTobit <- ctable[20] | |
return (c(pOLS,pTobit)) | |
} | |
# Parameters for simulations | |
number_of_obs <- 800 # number of observations per data set | |
eff_IV1 <- 1.5 # main effect 1 | |
eff_IV2 <- 1.5 # main effect 2 | |
baseline <- 1.5 # baseline mean | |
sd <- 2 # standard deviation of all conditions | |
lowerCutoff <- 1 # lower bound on measure | |
upperCutoff <- 7 # upper bound on measure | |
roundOn <- TRUE # rounds values to nearest integer | |
numSims <- 5000 # number of simulated data sets | |
# data frame to store simulated p-values | |
df_out <- data.frame(pOLS=numeric(), | |
pTobit=numeric()) | |
# Loop through all simulations and store resulting p-values | |
for ( i in 1:numSims) { | |
if (i%%100==0) { print(i) } # print every 100 simulations to console | |
ps <- oneRun(number_of_obs,eff_IV1,eff_IV2,baseline,sd,lowerCutoff,upperCutoff,roundOn) | |
newdf <- data.frame(pOLS=ps[[1]], pTobit=ps[[2]]) | |
names(newdf)<-c("pOLS","pTobit") | |
df_out <- rbind(df_out, newdf) | |
} | |
# Create overlaid histogram of p-values | |
OLS <- data.frame(ps = df_out$pOLS) | |
Tobit <- data.frame(ps = df_out$pTobit) | |
OLS$label <- 'OLS' | |
Tobit$label <- 'Tobit' | |
pDists <- rbind(OLS,Tobit) | |
ggplot(pDists, aes(ps, fill = label)) + geom_histogram(alpha = 0.5, aes(y = ..count..), position = 'identity', binwidth=.02) | |
# Thanks to Mohsen Mosleh for help writing this code. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment