Skip to content

Instantly share code, notes, and snippets.

@nicebread
Last active April 5, 2016 12:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicebread/36346c00ff5c876ec7a6 to your computer and use it in GitHub Desktop.
Save nicebread/36346c00ff5c876ec7a6 to your computer and use it in GitHub Desktop.
# I try to build a prior distribution for effect sizes that closely resembles meta-analytical summaries of effect sizes. Based on nearly 34,000 effect sizes, Richard, Bond, and Stokes-Zoota (2003) conclude that about 30% of published effects sizes are smaller than d=0.20, about 76% are smaller than 0.63, and 95% are smaller than 1.15.
#
# There is a normal distribution that has a near-perfect match to these percentages: mu=0, sigma=0.55. This leads to theoretical percentages of 28%, 75%, and 96%.
# 1-pnorm(0.20, 0, 0.55, lower.tail=FALSE)*2 # 28% are smaller than 1.15
# 1-pnorm(0.63, 0, 0.55, lower.tail=FALSE)*2 # 75% are smaller than 1.15
# 1-pnorm(1.15, 0, 0.55, lower.tail=FALSE)*2 # 96% are smaller than 1.15
# However, we know that published effect sizes are overestimated due to publication bias (in RP:P and also Franco, Malhotra, & Simonovits, 2015, the reported ES was twice as large as the replication/the unreported ES.
# Therefore, we apply more shrinkage, using N(0, SD=0.3). With these settings, the shrunken original RP:P ES are exactly unbiased if the replication ES are taken as unbiased estimates of the true ES.
# Franco, A., Malhotra, N., & Simonovits, G. (2015). Underreporting in Psychology Experiments: Evidence From a Study Registry. Social Psychological and Personality Science, 7(1), 8–12. http://doi.org/10.1177/1948550615598377
# ---------------------------------------------------------------------
# Shrinkage of effect size
# 1. Likelihood of ES
# 2. Weigh with half-N(0, 1) prior + prior odds for H1/H0
# --> posterior effect size
#' deliver either d, t, r, or (as an approximation) the p-value
#' deliver either df or n1 and n2
# TODO: implement flexible prior
shrink <- function(d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, prior.sd=0.3, plot=FALSE, ...) {
if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) > 1) stop("Please provide only one of d, r, t, or p.value!")
if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) == 0) stop("Please provide either d, r, t, or p.value!")
if (is.null(df) & is.null(n1) & is.null(n2)) stop("Please provide either df or n1 & n2!")
library(dplyr)
if (is.null(df)) df <- (n1 + n2)-2
if (!is.null(d)) {
t <- (d*sqrt(df)) / 2
}
if (!is.null(t)) {
d <- (2*t)/sqrt(df)
}
if (!is.null(p.value)) {
# conversion formula for Hedge's g, see: https://books.google.de/books?id=GC42CwAAQBAJ&pg=PA100&lpg=PA100&dq=meta-analysis+convert+chi2+to+d+degrees+of+freedom&source=bl&ots=_c4EHEyRis&sig=yIaUDAbQ3RfPvLTfEE7-thpEXys&hl=de&sa=X&ved=0ahUKEwiyp-XntdnLAhWipnIKHab5CaA4ChDoAQhfMAg#v=onepage&q=meta-analysis%20convert%20chi2%20to%20d%20degrees%20of%20freedom&f=false
d <- (qnorm(1-(p.value/2))*2)/sqrt(df)
t <- (d*sqrt(df)) / 2
print("Warning: If effect sizes are derived from p-values, the results are only approximate.")
}
if (!is.null(r)) {
d <- (2 * r)/sqrt(1 - r^2)
t <- (d*sqrt(df)) / 2
}
# LH-function for these specific DFs
LHf0 <- function(delta, t, df) exp(sum(log(
suppressWarnings({dt(t, df, ncp=(delta*sqrt(df+2))/2)})
)))
LHf1 <- Vectorize(LHf0, vectorize.args = "delta")
#Normalize density
K <- 1/integrate(LHf1, -Inf, Inf, t=t, df=df)$value
LHf <- function(delta, t, df) {K*LHf1(delta, t, df)}
stepsize <- .01
LH <- data.frame(delta = seq(-5, 5, by=stepsize)) # which delta's should be probed?
LH <- LH %>% mutate(
LH = LHf(delta, t=t, df=df), # the likelihood function for delta
LH.dens = LH/(sum(LH)*stepsize), # normalize area to 1 to have a real density
# the shrinkage weighting function
shrink.weight = dnorm(delta, sd=prior.sd), # must be a real prior which integrates to 1
shrunken.LH = LH*shrink.weight, # apply the shrinkage to the likelihood
shrunken.LH.dens = shrunken.LH/(sum(shrunken.LH)*stepsize), # normalize area to 1 to have a real density
# BF computation
BF.prior = dcauchy(delta, scale=sqrt(2)/2), # must be a real prior which integrates to 1
#BF.prior = dnorm(delta, 0, 1), # must be a real prior which integrates to 1
# compute naive (i.e., face-value) posterior with standard LH
posterior.naive = LH*BF.prior,
posterior.naive.dens = posterior.naive/(sum(posterior.naive)*stepsize), # normalize area to 1 to have a real density
# compute shrunken posterior with shrunken LH
posterior = shrunken.LH*BF.prior,
posterior.dens = posterior/(sum(posterior)*stepsize) # normalize area to 1 to have a real density
)
if (plot==TRUE) {
plot(LH$delta, LH$LH.dens, type="l", xlab="Effect size (Cohen's d)", ylab="Density", ylim=c(0, max(LH[, c(3, 4, 6)])))
lines(LH$delta, LH$shrink.weight, lty="dotted", col="red")
lines(LH$delta, LH$shrunken.LH.dens, lty="dashed", col="red")
legend("topright", lty=c("solid", "dotted", "dashed"), col=c("black", "red", "red"), legend=c("Likelihood", "shrinkage prior", "posterior after shrinkage"))
}
MAP <- LH$delta[which.max(LH$shrunken.LH.dens)]
MEAN <- weighted.mean(LH$delta, LH$shrunken.LH.dens)
return(list(
d.naive = d,
d.shrunk = MEAN,
BF.naive = LH$BF.prior[LH$delta==0] / LH$posterior.naive.dens[LH$delta==0],
BF.shrunk = LH$BF.prior[LH$delta==0] / LH$posterior.dens[LH$delta==0]
))
}
shrink(d=0.772, df=41)
shrink(r=0.36, df=41)
shrink(p.value=0.0177, df=41)
# ---------------------------------------------------------------------
# RV formula
RV9 <- function(CitationCount, lagYear, altmetric.percentile,
d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, sd=0.3, plot=FALSE) {
academic.percentile <- 1-exp(-1.86 - 2*log(CitationCount) + 3.24*log(lagYear))
IMPACT <- weighted.mean(c(academic.percentile, altmetric.percentile), w=c(2, 1), na.rm=TRUE)
EVIDENCE <- shrink(d=d, r=r, p.value=p.value, df=df, n1=n1, n2=n2, t=t, sd=sd, plot=plot)
# scale factor: Top 1% papers require an evidence of 10 --> scale to RV of 1
K <- 10/0.99
RV <- K*(IMPACT/EVIDENCE$BF.shrunk)
cat(paste0(
"Impact = ", round(IMPACT*100, 2), "%\n",
"Empirical effect size (Cohen's d) = ", round(EVIDENCE$d.naive, 2), "\n",
"Shrunken effect size (Cohen's d) = ", round(EVIDENCE$d.shrunk, 2), "\n",
"Evidence = ", round(EVIDENCE$BF.shrunk, 2), " (shrunken BF_10)\n",
"Replication Value = ", round(RV, 2)
))
invisible(RV)
}
# ---------------------------------------------------------------------
# EXAMPLES for using the RV formula
# ---------------------------------------------------------------------
# You can either provide a t value or a d value.
# F-values with 1 df can be converted to t-values (actually all focused tests should have a single nominator df): t.value <- sqrt(F)
# r values can be converted to d values: d <- (2*r) / sqrt(1-r^2)
# Alter & Oppenheimer
RV9(CitationCount=190, lagYear=9, altmetric.percentile=0.987, t=2.01, df=39)
# Stroop
RV9(CitationCount=7510, lagYear=80, altmetric.percentile=NA, t=15.33, df=98)
# Bargh, J. A., Chen, M., & Burrows, L. (1996). Automaticity of social behavior: Direct effects of trait construct and stereotype activation on action. Journal of Personality and Social Psychology, 71(2), 230–244. http://doi.org/10.1037//0022-3514.71.2.230
RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.86, df=28, sd=0.3)
RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.16, df=28, sd=0.3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment