Skip to content

Instantly share code, notes, and snippets.

@ericpgreen
Last active October 30, 2020 18:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericpgreen/52f854996f24faf17374a23e5dfd8da9 to your computer and use it in GitHub Desktop.
Save ericpgreen/52f854996f24faf17374a23e5dfd8da9 to your computer and use it in GitHub Desktop.
prediction.R
library(simstudy)
k = 0
ppv <- data.frame(sim=NULL, r=NULL, outcome=NULL, ppv=NULL)
corr <- c(0.1, 0.4)
x <- seq(0.1, 0.5, by=0.1)
y <- 1-x
for (s in 1:1000) {
for (r in 1:length(corr)) {
corr.mat <- matrix(c(1, corr[r],
corr[r], 1), nrow = 2)
for (p in 1:length(x)) {
res <- tryCatch(
simstudy::genCorGen(1000, nvars = 2,
params1 = c(x[p], y[p]),
corMatrix = corr.mat,
dist = "binary",
method = "ep", wide = TRUE),
error=function(e) NA)
if(is.na(res)) {
simPPV <- NA
} else {
df <- as.data.frame(res[,2:3])
names(df) <- c("outcome", "predictor")
simPredicts <- df %>%
group_by(outcome, predictor) %>%
count()
simTP <- pull(simPredicts %>% filter(outcome==1 & predictor==1), n)
simPredPos <- sum(pull(simPredicts %>% filter(predictor==1), n))
#simCondPos <- sum(pull(simPredicts %>% filter(outcome==1), n))
simPPV <- (simTP/(simPredPos))*100
#simSen <- (simTP/(simCondPos))*100
}
k = k+1
ppv[k, 1] <- s
ppv[k, 2] <- corr[r]
ppv[k, 3] <- x[p]
ppv[k, 4] <- simPPV
}
}
}
names(ppv) <- c("sim", "r", "outcome", "ppv")
ppv_avg <- ppv %>%
group_by(r, outcome) %>%
summarize(ppv_mean = mean(ppv, na.rm=TRUE),
ppv_sd = sd(ppv, na.rm = TRUE),
ppv_n = n()) %>%
mutate(ppv_se = ppv_sd / sqrt(ppv_n),
ppv_lower.ci = ppv_mean - qt(1 - (0.05 / 2), ppv_n - 1) * ppv_se,
ppv_upper.ci = ppv_mean + qt(1 - (0.05 / 2), ppv_n - 1) * ppv_se) %>%
mutate(r = ifelse(r==0.1, "Correlation 0.1", "Correlation 0.4"))
ggplot(ppv_avg, aes(x=factor(outcome), y=ppv_mean)) +
geom_point(size=4) +
#geom_errorbar(aes(ymin=ppv_lower.ci, ymax=ppv_upper.ci), width=.1) +
ylim(0, 75) +
facet_wrap(~r) +
theme_bw() +
theme(plot.title.position = "plot",
strip.text = element_text(size = 15),
plot.title = element_text(face="bold")) +
labs(title="Simulated positive predictive value for classifying outcome based predictor with specified correlation to outcome,\nby level of class imbalance",
subtitle="Average of 1000 simulations of correlated datasets (N=1000)",
x = "Proportion with positive outcome (0.5 = balanced classes)",
y = "Positive predictive value",
caption = "Note. Missing points represent impossible combinations of binary correlations and class imbalance.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment