Skip to content

Instantly share code, notes, and snippets.

@JoFrhwld
Created May 16, 2012 14:47
Show Gist options
  • Save JoFrhwld/2710902 to your computer and use it in GitHub Desktop.
Save JoFrhwld/2710902 to your computer and use it in GitHub Desktop.
Blog post on the decline effect
library(ggplot2)
library(plyr)
## Set your effect size, and desired p-value threshold here
effect = 0.1
thresh = 0.05
## Sets up the simulation parameters
pars = list(mean1 = 1, mean2 = 1+effect, sd1 = 1, sd2 = 1)
nsim = 1000
nsamp = c(10, 20, 50, 100, 200,500, 1000)
## A function to do the simulation
sim_signif <- function (x, nsamps, pars, test = t.test, ...) {
rep_sim <- function(nsamp, pars, test){
samp1 <- rnorm(nsamp, pars$mean1, pars$sd2)
samp2 <- rnorm(nsamp, pars$mean2, pars$sd2)
mod <- test(samp1, samp2, ...)
if(length(mod$estimate)==2){
estimate <- diff(mod$estimate)
}else{
estimate <- mod$estimate
}
p.value <- mod$p.value
df <- data.frame(est = estimate, p.value = p.value)
df$nsamp <- nsamp
return(df)
}
out <- ldply(nsamps, rep_sim, pars = pars, test = test)
return(out)
}
## The actual simulation
sims <- ldply(1:nsim, sim_signif,
nsamp = nsamp,
pars = pars,
test = t.test,
conf.int = T,
.progress = "text")
## Calculate the estimated effect sizes based on
### 1. The sample size
### 2. Whether or not the p-value met the threshold
### 3. The sign of the estimated effect size
effect_sizes <- ddply(sims, .(nsamp, thresh = p.value<thresh, sign=sign(est)),
summarise,
est = median(est),
hi = quantile(est, probs = 0.975),
lo = quantile(est, probs = 0.025),
N = length(est))
## prob = the proportion of simulatioms wich fell into a
## particular cell in the following 2x2 table:
### significant | not-significant
### positive |
### ------------------------------------------------------
### negative |
effect_sizes <- ddply(effect_sizes, .(nsamp),
transform,
prob = N/sum(N))
## Basic plot of just the tests which found a significant difference
ggplot(subset(effect_sizes, thresh), aes(nsamp, est/effect, color = sign == -1))+
geom_point(aes(size = logit(prob)))+
geom_segment(aes(xend = nsamp, y = hi/effect, yend = lo/effect))+
geom_hline(y = 1)+
geom_hline(y = -1, color = "red")+
scale_area(li)+
scale_color_manual(values = c("black","red"), guide = F)+
scale_x_log10(breaks = nsamp)+
ylab("Estimate is y times the true effect")+
opts(title = paste("Effect = ",effect, sep = ""))
## What if we had used different thresholds?
thresholds <- c(0.05, 0.01, 0.001)
multiple_thresh <- ldply(thresholds, function(thresh, df){
effect_sizes <- ddply(df, .(nsamp, thresh = p.value<thresh, sign=sign(est)),
summarise,
est = median(est),
hi = quantile(est, probs = 0.975),
lo = quantile(est, probs = 0.025),
N = length(est))
effect_sizes <- ddply(effect_sizes, .(nsamp),
transform,
prob = N/sum(N))
effect_sizes[effect_sizes$thresh,]
effect_sizes$thresh2 <- thresh
return(effect_sizes)
}, df = sims)
multiple_thresh <- multiple_thresh[multiple_thresh$thresh, ]
ggplot(multiple_thresh, aes(nsamp, est/effect, color = sign == -1))+
geom_point(aes(size = prob))+
geom_segment(aes(xend = nsamp, y = hi/effect, yend = lo/effect))+
geom_hline(y = 1)+
geom_hline(y = -1, color = "red")+
scale_area()+
scale_color_manual(values = c("black","red"), guide = F)+
scale_x_log10(breaks = nsamp)+
facet_wrap(~thresh2)+
ylab("Estimate is y times the true effect")+
opts(title = paste("Effect = ",effect, sep = ""))
df <- data.frame(
study = c("guy", "santa anata", "bayley", "t&t", "s&d&f"),
year = c(1991, 1992, 1996, 2005,2009),
p.ret = c(0.84, 0.743, 1-0.24, 1-0.19, 1-0.18),
p.n = c(181, 836, 685, 388, 176),
s.ret = c(0.661, 0.593, 1-0.34, 1-0.21, 1-0.12),
s.n = c(56, 297, 243, 128, 78),
m.ret = c(0.619, 0.421, 1-0.56, 1-0.26, 1-0.37),
m.n = c(658, 3724, 2348, 716, 441)
)
## Fruehwald (2012) calculated confidence intervals differently
## see http://repository.upenn.edu/pwpl/vol18/iss1/10/
low <- (1-(0.95^2))/2
hi <- 1-((1-(0.95^2))/2)
df$p.min <- qbeta(low, round(df$p.n * df$p.ret), df$p.n - round(df$p.n * df$p.ret))
df$p.max <- qbeta(hi, round(df$p.n * df$p.ret), df$p.n - round(df$p.n * df$p.ret))
df$s.min <- qbeta(low, round(df$s.n * df$s.ret), df$s.n - round(df$s.n * df$s.ret))
df$s.max <- qbeta(hi, round(df$s.n * df$s.ret), df$s.n - round(df$s.n * df$s.ret))
df$m.min <- qbeta(low, round(df$m.n * df$m.ret), df$m.n - round(df$m.n * df$m.ret))
df$m.max <- qbeta(hi, round(df$m.n * df$m.ret), df$m.n - round(df$m.n * df$m.ret))
df$j.min <- log(df$s.max, base = df$p.min)
df$j <- log(df$s.ret, base = df$p.ret)
df$j.max <- log(df$s.min, base = df$p.max)
df$k.min <- log(df$m.max, base = df$p.min)
df$k <- log(df$m.ret, base = df$p.ret)
df$k.max <- log(df$m.min, base = df$p.max)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment