nicebread/RV9.R

## RV9.R
# I try to build a prior distribution for effect sizes that closely resembles meta-analytical summaries of effect sizes. Based on nearly 34,000 effect sizes, Richard, Bond, and Stokes-Zoota (2003) conclude that about 30% of published effects sizes are smaller than d=0.20, about 76% are smaller than 0.63, and 95% are smaller than 1.15.
#
# There is a normal distribution that has a near-perfect match to these percentages: mu=0, sigma=0.55. This leads to theoretical percentages of 28%, 75%, and 96%.

# 1-pnorm(0.20, 0, 0.55, lower.tail=FALSE)*2	# 28% are smaller than 1.15
# 1-pnorm(0.63, 0, 0.55, lower.tail=FALSE)*2	# 75% are smaller than 1.15
# 1-pnorm(1.15, 0, 0.55, lower.tail=FALSE)*2	# 96% are smaller than 1.15

# However, we know that published effect sizes are overestimated due to publication bias (in RP:P and also Franco, Malhotra, & Simonovits, 2015, the reported ES was twice as large as the replication/the unreported ES.
# Therefore, we apply more shrinkage, using N(0, SD=0.3). With these settings, the shrunken original RP:P ES are exactly unbiased if the replication ES are taken as unbiased estimates of the true ES.

# Franco, A., Malhotra, N., & Simonovits, G. (2015). Underreporting in Psychology Experiments: Evidence From a Study Registry. Social Psychological and Personality Science, 7(1), 8–12. http://doi.org/10.1177/1948550615598377

# ---------------------------------------------------------------------
# Shrinkage of effect size
# 1. Likelihood of ES
# 2. Weigh with half-N(0, 1) prior + prior odds for H1/H0
# --> posterior effect size

#' deliver either d, t, r, or (as an approximation) the p-value
#' deliver either df or n1 and n2
# TODO: implement flexible prior

shrink <- function(d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, prior.sd=0.3, plot=FALSE, ...) {

	if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) > 1) stop("Please provide only one of d, r, t, or p.value!")
	if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) == 0) stop("Please provide either d, r, t, or p.value!")
	if (is.null(df) & is.null(n1) & is.null(n2)) stop("Please provide either df or n1 & n2!")

	library(dplyr)

	if (is.null(df)) df <- (n1 + n2)-2

	if (!is.null(d)) {
		t <- (d*sqrt(df)) / 2
	}
	if (!is.null(t)) {
		d <- (2*t)/sqrt(df)
	}
	if (!is.null(p.value)) {
		# conversion formula for Hedge's g, see: https://books.google.de/books?id=GC42CwAAQBAJ&pg=PA100&lpg=PA100&dq=meta-analysis+convert+chi2+to+d+degrees+of+freedom&source=bl&ots=_c4EHEyRis&sig=yIaUDAbQ3RfPvLTfEE7-thpEXys&hl=de&sa=X&ved=0ahUKEwiyp-XntdnLAhWipnIKHab5CaA4ChDoAQhfMAg#v=onepage&q=meta-analysis%20convert%20chi2%20to%20d%20degrees%20of%20freedom&f=false
		d <- (qnorm(1-(p.value/2))*2)/sqrt(df)
		t <- (d*sqrt(df)) / 2

		print("Warning: If effect sizes are derived from p-values, the results are only approximate.")
	}
	if (!is.null(r)) {
		d <- (2 * r)/sqrt(1 - r^2)
		t <- (d*sqrt(df)) / 2
	}


	# LH-function for these specific DFs
	LHf0 <- function(delta, t, df) exp(sum(log(
		suppressWarnings({dt(t, df, ncp=(delta*sqrt(df+2))/2)})
	)))
	LHf1 <- Vectorize(LHf0, vectorize.args = "delta")

	#Normalize density
	K <- 1/integrate(LHf1, -Inf, Inf, t=t, df=df)$value
	LHf <- function(delta, t, df) {K*LHf1(delta, t, df)}


	stepsize <- .01
	LH <- data.frame(delta = seq(-5, 5, by=stepsize)) # which delta's should be probed?
	LH <- LH %>% mutate(
		LH = 		LHf(delta, t=t, df=df),		# the likelihood function for delta
		LH.dens =	LH/(sum(LH)*stepsize),		# normalize area to 1 to have a real density

		# the shrinkage weighting function
		shrink.weight = dnorm(delta, sd=prior.sd),	# must be a real prior which integrates to 1
		shrunken.LH = LH*shrink.weight,			# apply the shrinkage to the likelihood
		shrunken.LH.dens = shrunken.LH/(sum(shrunken.LH)*stepsize), # normalize area to 1 to have a real density

		# BF computation
		BF.prior = dcauchy(delta, scale=sqrt(2)/2),	# must be a real prior which integrates to 1
		#BF.prior =	dnorm(delta, 0, 1),				# must be a real prior which integrates to 1

		# compute naive (i.e., face-value) posterior with standard LH
		posterior.naive = 	LH*BF.prior,
		posterior.naive.dens = 	posterior.naive/(sum(posterior.naive)*stepsize),  # normalize area to 1 to have a real density

		# compute shrunken posterior with shrunken LH
		posterior = 	shrunken.LH*BF.prior,
		posterior.dens = posterior/(sum(posterior)*stepsize) # normalize area to 1 to have a real density
	)

	if (plot==TRUE) {
		plot(LH$delta, LH$LH.dens, type="l", xlab="Effect size (Cohen's d)", ylab="Density", ylim=c(0, max(LH[, c(3, 4, 6)])))
		lines(LH$delta, LH$shrink.weight, lty="dotted", col="red")
		lines(LH$delta, LH$shrunken.LH.dens, lty="dashed", col="red")
		legend("topright", lty=c("solid", "dotted", "dashed"), col=c("black", "red", "red"), legend=c("Likelihood", "shrinkage prior", "posterior after shrinkage"))
	}

	MAP <- LH$delta[which.max(LH$shrunken.LH.dens)]
	MEAN <- weighted.mean(LH$delta, LH$shrunken.LH.dens)

	return(list(
		d.naive = d,
		d.shrunk = MEAN,
		BF.naive = LH$BF.prior[LH$delta==0] / LH$posterior.naive.dens[LH$delta==0],
		BF.shrunk = LH$BF.prior[LH$delta==0] / LH$posterior.dens[LH$delta==0]
	))
}

shrink(d=0.772, df=41)
shrink(r=0.36, df=41)
shrink(p.value=0.0177, df=41)


# ---------------------------------------------------------------------
# RV formula

RV9 <- function(CitationCount, lagYear, altmetric.percentile,
				d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, sd=0.3, plot=FALSE) {

	academic.percentile <- 1-exp(-1.86 - 2*log(CitationCount) + 3.24*log(lagYear))
	IMPACT <- weighted.mean(c(academic.percentile, altmetric.percentile), w=c(2, 1), na.rm=TRUE)

	EVIDENCE <- shrink(d=d, r=r, p.value=p.value, df=df, n1=n1, n2=n2, t=t, sd=sd, plot=plot)

	# scale factor: Top 1% papers require an evidence of 10 --> scale to RV of 1
	K <- 10/0.99
	RV <- K*(IMPACT/EVIDENCE$BF.shrunk)

	cat(paste0(
		"Impact = ", round(IMPACT*100, 2), "%\n",
		"Empirical effect size (Cohen's d) = ", round(EVIDENCE$d.naive, 2), "\n",
		"Shrunken effect size (Cohen's d) = ", round(EVIDENCE$d.shrunk, 2), "\n",
		"Evidence = ", round(EVIDENCE$BF.shrunk, 2), " (shrunken BF_10)\n",
		"Replication Value = ", round(RV, 2)
	))
	invisible(RV)
}


# ---------------------------------------------------------------------
#  EXAMPLES for using the RV formula
# ---------------------------------------------------------------------

# You can either provide a t value or a d value.
# F-values with 1 df can be converted to t-values (actually all focused tests should have a single nominator df): t.value <- sqrt(F)
# r values can be converted to d values: d <- (2*r) / sqrt(1-r^2)

# Alter & Oppenheimer
RV9(CitationCount=190, lagYear=9, altmetric.percentile=0.987, t=2.01, df=39)

# Stroop
RV9(CitationCount=7510, lagYear=80, altmetric.percentile=NA, t=15.33, df=98)

# Bargh, J. A., Chen, M., & Burrows, L. (1996). Automaticity of social behavior: Direct effects of trait construct and stereotype activation on action. Journal of Personality and Social Psychology, 71(2), 230–244. http://doi.org/10.1037//0022-3514.71.2.230

RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.86, df=28, sd=0.3)
RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.16, df=28, sd=0.3)
	# I try to build a prior distribution for effect sizes that closely resembles meta-analytical summaries of effect sizes. Based on nearly 34,000 effect sizes, Richard, Bond, and Stokes-Zoota (2003) conclude that about 30% of published effects sizes are smaller than d=0.20, about 76% are smaller than 0.63, and 95% are smaller than 1.15.
	#
	# There is a normal distribution that has a near-perfect match to these percentages: mu=0, sigma=0.55. This leads to theoretical percentages of 28%, 75%, and 96%.

	# 1-pnorm(0.20, 0, 0.55, lower.tail=FALSE)*2 # 28% are smaller than 1.15
	# 1-pnorm(0.63, 0, 0.55, lower.tail=FALSE)*2 # 75% are smaller than 1.15
	# 1-pnorm(1.15, 0, 0.55, lower.tail=FALSE)*2 # 96% are smaller than 1.15

	# However, we know that published effect sizes are overestimated due to publication bias (in RP:P and also Franco, Malhotra, & Simonovits, 2015, the reported ES was twice as large as the replication/the unreported ES.
	# Therefore, we apply more shrinkage, using N(0, SD=0.3). With these settings, the shrunken original RP:P ES are exactly unbiased if the replication ES are taken as unbiased estimates of the true ES.

	# Franco, A., Malhotra, N., & Simonovits, G. (2015). Underreporting in Psychology Experiments: Evidence From a Study Registry. Social Psychological and Personality Science, 7(1), 8–12. http://doi.org/10.1177/1948550615598377

	# ---------------------------------------------------------------------
	# Shrinkage of effect size
	# 1. Likelihood of ES
	# 2. Weigh with half-N(0, 1) prior + prior odds for H1/H0
	# --> posterior effect size

	#' deliver either d, t, r, or (as an approximation) the p-value
	#' deliver either df or n1 and n2
	# TODO: implement flexible prior

	shrink <- function(d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, prior.sd=0.3, plot=FALSE, ...) {

	if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) > 1) stop("Please provide only one of d, r, t, or p.value!")
	if (sum(c(!is.null(d), !is.null(t), !is.null(p.value), !is.null(r))) == 0) stop("Please provide either d, r, t, or p.value!")
	if (is.null(df) & is.null(n1) & is.null(n2)) stop("Please provide either df or n1 & n2!")

	library(dplyr)

	if (is.null(df)) df <- (n1 + n2)-2

	if (!is.null(d)) {
	t <- (d*sqrt(df)) / 2
	}
	if (!is.null(t)) {
	d <- (2*t)/sqrt(df)
	}
	if (!is.null(p.value)) {
	# conversion formula for Hedge's g, see: https://books.google.de/books?id=GC42CwAAQBAJ&pg=PA100&lpg=PA100&dq=meta-analysis+convert+chi2+to+d+degrees+of+freedom&source=bl&ots=_c4EHEyRis&sig=yIaUDAbQ3RfPvLTfEE7-thpEXys&hl=de&sa=X&ved=0ahUKEwiyp-XntdnLAhWipnIKHab5CaA4ChDoAQhfMAg#v=onepage&q=meta-analysis%20convert%20chi2%20to%20d%20degrees%20of%20freedom&f=false
	d <- (qnorm(1-(p.value/2))*2)/sqrt(df)
	t <- (d*sqrt(df)) / 2

	print("Warning: If effect sizes are derived from p-values, the results are only approximate.")
	}
	if (!is.null(r)) {
	d <- (2 * r)/sqrt(1 - r^2)
	t <- (d*sqrt(df)) / 2
	}


	# LH-function for these specific DFs
	LHf0 <- function(delta, t, df) exp(sum(log(
	suppressWarnings({dt(t, df, ncp=(delta*sqrt(df+2))/2)})
	)))
	LHf1 <- Vectorize(LHf0, vectorize.args = "delta")

	#Normalize density
	K <- 1/integrate(LHf1, -Inf, Inf, t=t, df=df)$value
	LHf <- function(delta, t, df) {K*LHf1(delta, t, df)}


	stepsize <- .01
	LH <- data.frame(delta = seq(-5, 5, by=stepsize)) # which delta's should be probed?
	LH <- LH %>% mutate(
	LH = LHf(delta, t=t, df=df), # the likelihood function for delta
	LH.dens = LH/(sum(LH)*stepsize), # normalize area to 1 to have a real density

	# the shrinkage weighting function
	shrink.weight = dnorm(delta, sd=prior.sd), # must be a real prior which integrates to 1
	shrunken.LH = LH*shrink.weight, # apply the shrinkage to the likelihood
	shrunken.LH.dens = shrunken.LH/(sum(shrunken.LH)*stepsize), # normalize area to 1 to have a real density

	# BF computation
	BF.prior = dcauchy(delta, scale=sqrt(2)/2), # must be a real prior which integrates to 1
	#BF.prior = dnorm(delta, 0, 1), # must be a real prior which integrates to 1

	# compute naive (i.e., face-value) posterior with standard LH
	posterior.naive = LH*BF.prior,
	posterior.naive.dens = posterior.naive/(sum(posterior.naive)*stepsize), # normalize area to 1 to have a real density

	# compute shrunken posterior with shrunken LH
	posterior = shrunken.LH*BF.prior,
	posterior.dens = posterior/(sum(posterior)*stepsize) # normalize area to 1 to have a real density
	)

	if (plot==TRUE) {
	plot(LH$delta, LH$LH.dens, type="l", xlab="Effect size (Cohen's d)", ylab="Density", ylim=c(0, max(LH[, c(3, 4, 6)])))
	lines(LH$delta, LH$shrink.weight, lty="dotted", col="red")
	lines(LH$delta, LH$shrunken.LH.dens, lty="dashed", col="red")
	legend("topright", lty=c("solid", "dotted", "dashed"), col=c("black", "red", "red"), legend=c("Likelihood", "shrinkage prior", "posterior after shrinkage"))
	}

	MAP <- LH$delta[which.max(LH$shrunken.LH.dens)]
	MEAN <- weighted.mean(LH$delta, LH$shrunken.LH.dens)

	return(list(
	d.naive = d,
	d.shrunk = MEAN,
	BF.naive = LH$BF.prior[LH$delta==0] / LH$posterior.naive.dens[LH$delta==0],
	BF.shrunk = LH$BF.prior[LH$delta==0] / LH$posterior.dens[LH$delta==0]
	))
	}

	shrink(d=0.772, df=41)
	shrink(r=0.36, df=41)
	shrink(p.value=0.0177, df=41)


	# ---------------------------------------------------------------------
	# RV formula

	RV9 <- function(CitationCount, lagYear, altmetric.percentile,
	d=NULL, r=NULL, t=NULL, p.value=NULL, df=NULL, n1=NULL, n2=NULL, sd=0.3, plot=FALSE) {

	academic.percentile <- 1-exp(-1.86 - 2log(CitationCount) + 3.24log(lagYear))
	IMPACT <- weighted.mean(c(academic.percentile, altmetric.percentile), w=c(2, 1), na.rm=TRUE)

	EVIDENCE <- shrink(d=d, r=r, p.value=p.value, df=df, n1=n1, n2=n2, t=t, sd=sd, plot=plot)

	# scale factor: Top 1% papers require an evidence of 10 --> scale to RV of 1
	K <- 10/0.99
	RV <- K*(IMPACT/EVIDENCE$BF.shrunk)

	cat(paste0(
	"Impact = ", round(IMPACT*100, 2), "%\n",
	"Empirical effect size (Cohen's d) = ", round(EVIDENCE$d.naive, 2), "\n",
	"Shrunken effect size (Cohen's d) = ", round(EVIDENCE$d.shrunk, 2), "\n",
	"Evidence = ", round(EVIDENCE$BF.shrunk, 2), " (shrunken BF_10)\n",
	"Replication Value = ", round(RV, 2)
	))
	invisible(RV)
	}



	# ---------------------------------------------------------------------
	# EXAMPLES for using the RV formula
	# ---------------------------------------------------------------------

	# You can either provide a t value or a d value.
	# F-values with 1 df can be converted to t-values (actually all focused tests should have a single nominator df): t.value <- sqrt(F)
	# r values can be converted to d values: d <- (2*r) / sqrt(1-r^2)

	# Alter & Oppenheimer
	RV9(CitationCount=190, lagYear=9, altmetric.percentile=0.987, t=2.01, df=39)

	# Stroop
	RV9(CitationCount=7510, lagYear=80, altmetric.percentile=NA, t=15.33, df=98)

	# Bargh, J. A., Chen, M., & Burrows, L. (1996). Automaticity of social behavior: Direct effects of trait construct and stereotype activation on action. Journal of Personality and Social Psychology, 71(2), 230–244. http://doi.org/10.1037//0022-3514.71.2.230

	RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.86, df=28, sd=0.3)
	RV9(CitationCount=1339, lagYear=19, altmetric.percentile=0.98, t=2.16, df=28, sd=0.3)