Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Calculates p-values in censored data sets using OLS and Tobit regression
require(ggplot2)
require(GGally)
require(VGAM)
# Function to create a simulated data set and run models on it
oneRun <- function(number_of_obs,eff_IV1,eff_IV2,baseline,sd,lowerCutoff,upperCutoff,roundOn){
# partition data into 4 conditions, specified by IV1 and IV2
partition <- number_of_obs /4
df <- expand.grid(IV1 = c(T,F), IV2 = c(T,F))
df <- df[rep(row.names(df), partition), 1:2]
# sample uncensored values from normal distribution
df$DV_UC <- rnorm(number_of_obs, mean = baseline, sd = sd)
df$DV_UC <- eff_IV1*df$IV1 + eff_IV2*df$IV2 + df$DV_UC
# create censored variable with upper and lower cutoffs
df$DV_C <-df$DV_UC
df$DV_C[df$DV_C>upperCutoff] <- upperCutoff
df$DV_C[df$DV_C<lowerCutoff] <- lowerCutoff
# if rounding is on, round values to nearest integer
if (roundOn) {
df$DV_C <- round(df$DV_C)
}
# Run OLS model and store interaction p-value
fit1 <- lm(DV_C ~ IV1 * IV2, data=df)
pOLS <- summary(fit1)$coefficients[[16]]
# Run Tobit model and store interaction p-value
fit2 <- vglm(DV_C ~ IV1 * IV2, data=df,family=tobit(Upper = upperCutoff, Lower = lowerCutoff))
ctable <- coef(summary(fit2))
pTobit <- ctable[20]
return (c(pOLS,pTobit))
}
# Parameters for simulations
number_of_obs <- 800 # number of observations per data set
eff_IV1 <- 1.5 # main effect 1
eff_IV2 <- 1.5 # main effect 2
baseline <- 1.5 # baseline mean
sd <- 2 # standard deviation of all conditions
lowerCutoff <- 1 # lower bound on measure
upperCutoff <- 7 # upper bound on measure
roundOn <- TRUE # rounds values to nearest integer
numSims <- 5000 # number of simulated data sets
# data frame to store simulated p-values
df_out <- data.frame(pOLS=numeric(),
pTobit=numeric())
# Loop through all simulations and store resulting p-values
for ( i in 1:numSims) {
if (i%%100==0) { print(i) } # print every 100 simulations to console
ps <- oneRun(number_of_obs,eff_IV1,eff_IV2,baseline,sd,lowerCutoff,upperCutoff,roundOn)
newdf <- data.frame(pOLS=ps[[1]], pTobit=ps[[2]])
names(newdf)<-c("pOLS","pTobit")
df_out <- rbind(df_out, newdf)
}
# Create overlaid histogram of p-values
OLS <- data.frame(ps = df_out$pOLS)
Tobit <- data.frame(ps = df_out$pTobit)
OLS$label <- 'OLS'
Tobit$label <- 'Tobit'
pDists <- rbind(OLS,Tobit)
ggplot(pDists, aes(ps, fill = label)) + geom_histogram(alpha = 0.5, aes(y = ..count..), position = 'identity', binwidth=.02)
# Thanks to Mohsen Mosleh for help writing this code.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment