Skip to content

Instantly share code, notes, and snippets.

@fdabl
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fdabl/16cbe18e2d0a79b7baf9 to your computer and use it in GitHub Desktop.
Save fdabl/16cbe18e2d0a79b7baf9 to your computer and use it in GitHub Desktop.
library('pwr') # for power calculations
library('foreign') # for dreadful spss .sav files
library('psych') # required by phack
source('http://rynesherman.com/phack.r')
SIM <- 10000
set.seed(42)
options(warn=-1) # avoid warning by read.spss
############################################
# Simulating p-hacking
############################################
run_phack <- function() {
# call it later
res <- phack(initialN=30, hackrate=3, grp1M=0, grp2M=0, grp1SD=1, grp2SD=1, maxN=120,
alpha=0.05, alternative='two.sided', graph=TRUE, sims=3000)
par(mfrow=c(1,1)) # recover from phack plots
}
############################################
# Distribution of p-values
############################################
testit <- function(d) {
x <- rnorm(mean=0, sd=1, 100)
y <- rnorm(mean=d, sd=1, 100)
t.test(x, y)$p.value
}
simplot <- function(pvalues, effect=TRUE) {
h <- hist(pvalues)
h$counts <- h$counts / sum(h$counts)
title <- sprintf('Distribution of p-values given %s effect', ifelse(effect, 'an', 'no'))
plot(h, freq=FALSE, col='blue', main=title, xlab='p-values', ylab='percentage')
}
############################################
# Relation between power and size of p-values
############################################
power <- seq(0.1, 0.9, 0.1)
test <- function(power, d=0) {
n <- pwr.t.test(d=0.43, power=power, sig.level=0.05)$n
x <- rnorm(mean=0, sd=1, n)
y <- rnorm(mean=d, sd=1, n)
t.test(x, y)$p.value
}
pplot <- function(percent, power) {
barplot(percent, col='blue', xlab='power', ylab='p-values <= 0.02', names.arg=power,
main='Frequency of p-values <= 0.02 in p-values < 0.05 given H1')
}
##########################################
# P - Curve
# Logic of p-curve:
# Computes the p value of the p value, i.e. what is the probability of obtaining a p value
# as extreme or more as the one observed, given that the p values are uniformly distributed (H0).
# e.g. p = 0.01 => ppvalue of 0.2
# generally: ppvalue = p/0.05
# We use Fisher's method to get a chisq distribution for skew.
#
# Practical Example:
# p-curve analysis on a recent PLoS One paper demonstrating publication bias http://goo.gl/lxAkKx
#################################
# TODO: test against 33% Power [relies on non-central distributions]
fisher <- function(pvalues, right=TRUE) {
# test for right-skew per default
# -2 times the sum of the natural log of each of k
# uniform distributions is distributed X² with df = 2k
ppvalues <- sapply(pvalues, function(p) p/0.05)
if (!right)
ppvalues <- 1 - ppvalues
df <- 2 * length(ppvalues)
x <- -2 * sum(sapply(ppvalues, log))
p <- 1 - pchisq(x, df)
return(list(p=p, df=df, value=x))
}
curveplot <- function(pvalues) {
h <- hist(pvalues)
h$counts <- h$counts / sum(h$counts) # use percentages
title <- sprintf('Distribution of %i p-values', length(pvalues))
plot(h, col='blue', xlab='p-values', ylab='Percentage', main=title)
}
############################################
# #################### Main
############################################
wait <- function(info) {
info <- sprintf('Press [enter] to %s \n', info)
readline(prompt=info)
}
# phacking
wait('simulate phacking')
cat('running simulations ... \n')
run_phack()
cat('\n')
# distribution
cat('running simulations (like, all the time) ... \n')
effect <- replicate(SIM, testit(0.4))
noeffect <- replicate(SIM, testit(0))
wait('see the distribution of p-values given no effect')
simplot(noeffect, effect=FALSE)
wait('see the distribution of p-values given an effect')
simplot(effect[effect <= 0.05])
# relation power and p-values
wait('see the relation of power and p-values')
cat('needs more simulations (you know the drill) ... \n\n')
effect <- sapply(power, function(p) replicate(SIM / length(power), test(p, d=0.43)))
signp <- effect[effect <= 0.05]
smallp <- function(col) sum(col <= 0.02) / length(smallp)
percent <- apply(effect, 2, smallp)
pplot(percent, power)
# p-curve analysis
wait('see a p-curve analysis of 347 p-values')
data <- read.spss('plos.sav')
pvalues <- data$p
pvalues <- pvalues[!is.na(pvalues)] # remove NA
pvalues <- pvalues[pvalues <= 0.05]
f <- fisher(pvalues)
info <- sprintf('Heavily right-skewed, with Chi^2(%s) = %.2f, p = %.2f\n', f$df, f$value, f$p)
cat(info)
curveplot(pvalues)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment