Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Calculates p-values in censored data sets using OLS and Tobit regression
require(ggplot2)
require(GGally)
require(VGAM)
# Function to create a simulated data set and run models on it
oneRun <- function(number_of_obs,eff_IV1,eff_IV2,baseline,sd,lowerCutoff,upperCutoff,roundOn){
# partition data into 4 conditions, specified by IV1 and IV2
partition <- number_of_obs /4
df <- expand.grid(IV1 = c(T,F), IV2 = c(T,F))
df <- df[rep(row.names(df), partition), 1:2]
# sample uncensored values from normal distribution
df$DV_UC <- rnorm(number_of_obs, mean = baseline, sd = sd)
df$DV_UC <- eff_IV1*df$IV1 + eff_IV2*df$IV2 + df$DV_UC
# create censored variable with upper and lower cutoffs
df$DV_C <-df$DV_UC
df$DV_C[df$DV_C>upperCutoff] <- upperCutoff
df$DV_C[df$DV_C<lowerCutoff] <- lowerCutoff
# if rounding is on, round values to nearest integer
if (roundOn) {
df$DV_C <- round(df$DV_C)
}
# Run OLS model and store interaction p-value
fit1 <- lm(DV_C ~ IV1 * IV2, data=df)
pOLS <- summary(fit1)$coefficients[[16]]
# Run Tobit model and store interaction p-value
fit2 <- vglm(DV_C ~ IV1 * IV2, data=df,family=tobit(Upper = upperCutoff, Lower = lowerCutoff))
ctable <- coef(summary(fit2))
pTobit <- ctable[20]
return (c(pOLS,pTobit))
}
# Parameters for simulations
number_of_obs <- 800 # number of observations per data set
eff_IV1 <- 1.5 # main effect 1
eff_IV2 <- 1.5 # main effect 2
baseline <- 1.5 # baseline mean
sd <- 2 # standard deviation of all conditions
lowerCutoff <- 1 # lower bound on measure
upperCutoff <- 7 # upper bound on measure
roundOn <- TRUE # rounds values to nearest integer
numSims <- 5000 # number of simulated data sets
# data frame to store simulated p-values
df_out <- data.frame(pOLS=numeric(),
pTobit=numeric())
# Loop through all simulations and store resulting p-values
for ( i in 1:numSims) {
if (i%%100==0) { print(i) } # print every 100 simulations to console
ps <- oneRun(number_of_obs,eff_IV1,eff_IV2,baseline,sd,lowerCutoff,upperCutoff,roundOn)
newdf <- data.frame(pOLS=ps[[1]], pTobit=ps[[2]])
names(newdf)<-c("pOLS","pTobit")
df_out <- rbind(df_out, newdf)
}
# Create overlaid histogram of p-values
OLS <- data.frame(ps = df_out$pOLS)
Tobit <- data.frame(ps = df_out$pTobit)
OLS$label <- 'OLS'
Tobit$label <- 'Tobit'
pDists <- rbind(OLS,Tobit)
ggplot(pDists, aes(ps, fill = label)) + geom_histogram(alpha = 0.5, aes(y = ..count..), position = 'identity', binwidth=.02)
# Thanks to Mohsen Mosleh for help writing this code.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.