Skip to content

Instantly share code, notes, and snippets.

@devster31
Last active May 5, 2017 21:20
Show Gist options
  • Save devster31/c48191d5bd2271e8029da39cbe26adc2 to your computer and use it in GitHub Desktop.
Save devster31/c48191d5bd2271e8029da39cbe26adc2 to your computer and use it in GitHub Desktop.
Bayesian analysis of OPTIMISE trial (OpenBUGS and JAGS versions)
source("~/Doing_Bayesian_Analysis/openGraphSaveGraph.R")
source("~/Doing_Bayesian_Analysis/plotPost.R")
require(BRugs)
modelstring = "
# BUGS model specification begins here...
model {
# Likelihood. Each complication/death is Bernoulli.
for ( i in 1 : N1 ) { y1[i] ~ dbern( theta1 ) }
for ( i in 1 : N2 ) { y2[i] ~ dbern( theta2 ) }
# Prior. Independent beta distributions.
theta1 ~ dbeta( 1 , 1 )
theta2 ~ dbeta( 1 , 1 )
}
# ... end BUGS model specification
" # close quote for modelstring
# Write model to a file:
.temp = file("model.txt","w") ; writeLines(modelstring,con=.temp) ; close(.temp)
# Load model file into BRugs and check its syntax:
modelCheck( "model.txt" )
# Specify the data in a form that is compatible with BRugs model, as a list:
dataList = list(
N1 = N1 ,
y1 = c(rep(1, y1), rep(0, N1-y1)),
N2 = N2 ,
y2 = c(rep(1, y2), rep(0, N2-y2))
)
# Get the data into BRugs:
modelData( bugsData( datalist ) )
modelCompile()
modelGenInits()
samplesSet( c( "theta1" , "theta2" ) ) # Keep a record of sampled "theta" values
chainlength = 10000 # Arbitrary length of chain to generate.
modelUpdate( chainlength ) # Actually generate the chain.
theta1Sample = samplesSample( "theta1" ) # Put sampled values in a vector.
theta2Sample = samplesSample( "theta2" ) # Put sampled values in a vector.
# Plot the chains (trajectory of the last 500 sampled values).
par( pty="s" )
chainlength = NROW( mcmcChain )
plot( theta1Sample[(chainlength-500):chainlength],
theta2Sample[(chainlength-500):chainlength], type = "o",
xlim = c(0,1), xlab = bquote(theta[1]), ylim = c(0,1),
ylab = bquote(theta[2]), main="BUGS Result", col="skyblue" )
# Display means in plot.
theta1mean = mean(theta1Sample)
theta2mean = mean(theta2Sample)
if (theta1mean > .5) { xpos = 0.0 ; xadj = 0.0 }
else { xpos = 1.0; xadj = 1.0 }
if (theta2mean > .5) { ypos = 0.0 ; yadj = 0.0 }
else { ypos = 1.0; yadj = 1.0 }
text( xpos, ypos,
bquote(
"M=" * .(signif(theta1mean,3)) * "," * .(signif(theta2mean,3))
), adj=c(xadj,yadj), cex=1.5 )
# Plot a histogram of the posterior differences of theta values.
thetaRR = theta1Sample / theta2Sample # Relative risk
thetaDiff = theta1Sample - theta2Sample # Absolute risk difference
par( mar = c( 5.1, 4.1, 4.1, 2.1 ) )
# only with scripts at the beginning
plotPost( thetaRR , xlab= expression(paste("Relative risk (", theta[1]/theta[2], ")")) ,
compVal=1.0, ROPE=c(0.9, 1.1),
main="OPTIMSE Composite Primary OutcomenPosterior distribution of relative risk")
plotPost( thetaDiff , xlab=expression(paste("Absolute risk difference (", theta[1]-theta[2], ")")) ,
compVal=0.0, ROPE=c(-0.05, 0.05),
main="OPTIMSE Composite Primary OutcomenPosterior distribution of absolute risk difference")
#-----------------------------------------------------------------------------
# Use posterior prediction to determine proportion of cases in which
# using the intervention would result in no complication/death
# while not using the intervention would result in complication death
chainLength = length( theta1Sample )
# Create matrix to hold results of simulated patients:
yPred = matrix( NA , nrow=2 , ncol=chainLength )
# For each step in chain, use posterior prediction to determine outcome
for ( stepIdx in 1:chainLength ) { # step through the chain
# Probability for complication/death for each "patient" in intervention group:
pDeath1 = theta1Sample[stepIdx]
# Simulated outcome for each intervention "patient"
yPred[1,stepIdx] = sample( x=c(0,1), prob=c(1-pDeath1,pDeath1), size=1 )
# Probability for complication/death for each "patient" in control group:
pDeath2 = theta2Sample[stepIdx]
# Simulated outcome for each control "patient"
yPred[2,stepIdx] = sample( x=c(0,1), prob=c(1-pDeath2,pDeath2), size=1 )
}
# Now determine the proportion of times that the intervention group has no complication/death
# (y1 == 0) and the control group does have a complication or death (y2 == 1))
(pY1eq0andY2eq1 = sum( yPred[1,]==0 & yPred[2,]==1 ) / chainLength)
(pY1eq1andY2eq0 = sum( yPred[1,]==1 & yPred[2,]==0 ) / chainLength)
(pY1eq0andY2eq0 = sum( yPred[1,]==0 & yPred[2,]==0 ) / chainLength)
(pY10eq1andY2eq1 = sum( yPred[1,]==1 & yPred[2,]==1 ) / chainLength)
# Conclusion: in 27% of cases based on these probabilities,
# a patient in the intervention group would not have a complication,
# when a patient in control group did.
# OPTIMISE trial in a Bayesian framework
# JAMA. 2014;311(21):2181-2190. doi:10.1001/jama.2014.5305
# Ewen Harrison
# 15/02/2015
# http://www.datasurg.net/2015/02/16/bayesian-statistics-and-clinical-trial-conclusions-why-the-optimse-study-should-be-considered-positive/
# Primary outcome: composite of 30-day moderate or major complications and mortality
N1 <- 366
y1 <- 134
N2 <- 364
y2 <- 158
# N1 is total number in the Cardiac Output–Guided Hemodynamic Therapy Algorithm (intervention) group
# y1 is number with the outcome in the Cardiac Output–Guided Hemodynamic Therapy Algorithm (intervention) group
# N2 is total number in usual care (control) group
# y2 is number with the outcome in usual care (control) group
# Risk ratio
(y1/N1)/(y2/N2)
library(epitools)
riskratio(c(N1-y1, y1, N2-y2, y2), rev="rows", method="boot", replicates=100000)
# Using standard frequentist approach
# Risk ratio (bootstrapped 95% confidence intervals) = 0.84 (0.70-1.01)
# p=0.07 (Fisher exact p-value)
# Reasonably reported as no difference between groups.
# But there is a difference, it just not judged significant using conventional
# (and much criticised) wisdom.
# Bayesian analysis of same ratio
# Base script from John Krushcke, Doing Bayesian Analysis
#------------------------------------------------------------------------------
source("~/Doing_Bayesian_Analysis/openGraphSaveGraph.R")
source("~/Doing_Bayesian_Analysis/plotPost.R")
require(rjags) # Kruschke, J. K. (2011). Doing Bayesian Data Analysis, Academic Press / Elsevier.
#------------------------------------------------------------------------------
# Important
# The model will be specified with completely uninformative prior distributions (beta(1,1,).
# This presupposes that no pre-exisiting knowledge exists as to whehther a difference
# may of may not exist between these two intervention.
# Plot Beta(1,1)
# 3x1 plots
par(mfrow=c(3,1))
# Adjust size of prior plot
par(mar=c(5.1,7,4.1,7))
plot(seq(0, 1, length.out=100), dbeta(seq(0, 1, length.out=100), 1, 1),
type="l", xlab="Proportion",
ylab="Probability",
main="OPTIMSE Composite Primary OutcomenPrior distribution",
frame=FALSE, col="red", oma=c(6,6,6,6))
legend("topright", legend="beta(1,1)", lty=1, col="red", inset=0.05)
# THE MODEL.
modelString = "
# JAGS model specification begins here...
model {
# Likelihood. Each complication/death is Bernoulli.
for ( i in 1 : N1 ) { y1[i] ~ dbern( theta1 ) }
for ( i in 1 : N2 ) { y2[i] ~ dbern( theta2 ) }
# Prior. Independent beta distributions.
theta1 ~ dbeta( 1 , 1 )
theta2 ~ dbeta( 1 , 1 )
}
# ... end JAGS model specification
" # close quote for modelstring
# Write the modelString to a file, using R commands:
writeLines(modelString,con="model.txt")
#------------------------------------------------------------------------------
# THE DATA.
# Specify the data in a form that is compatible with JAGS model, as a list:
dataList = list(
N1 = N1 ,
y1 = c(rep(1, y1), rep(0, N1-y1)),
N2 = N2 ,
y2 = c(rep(1, y2), rep(0, N2-y2))
)
#------------------------------------------------------------------------------
# INTIALIZE THE CHAIN.
# Can be done automatically in jags.model() by commenting out inits argument.
# Otherwise could be established as:
# initsList = list( theta1 = sum(dataList$y1)/length(dataList$y1) ,
# theta2 = sum(dataList$y2)/length(dataList$y2) )
#------------------------------------------------------------------------------
# RUN THE CHAINS.
parameters = c( "theta1" , "theta2" ) # The parameter(s) to be monitored.
adaptSteps = 500 # Number of steps to "tune" the samplers.
burnInSteps = 1000 # Number of steps to "burn-in" the samplers.
nChains = 3 # Number of chains to run.
numSavedSteps=200000 # Total number of steps in chains to save.
thinSteps=1 # Number of steps to "thin" (1=keep every step).
nIter = ceiling( ( numSavedSteps * thinSteps ) / nChains ) # Steps per chain.
# Create, initialize, and adapt the model:
jagsModel = jags.model( "model.txt" , data=dataList , # inits=initsList ,
n.chains=nChains , n.adapt=adaptSteps )
# Burn-in:
cat( "Burning in the MCMC chain...n" )
update( jagsModel , n.iter=burnInSteps )
# The saved MCMC chain:
cat( "Sampling final MCMC chain...n" )
codaSamples = coda.samples( jagsModel , variable.names=parameters ,
n.iter=nIter , thin=thinSteps )
# resulting codaSamples object has these indices:
# codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
#------------------------------------------------------------------------------
# EXAMINE THE RESULTS.
# Convert coda-object codaSamples to matrix object for easier handling.
# But note that this concatenates the different chains into one long chain.
# Result is mcmcChain[ stepIdx , paramIdx ]
mcmcChain = as.matrix( codaSamples )
theta1Sample = mcmcChain[,"theta1"] # Put sampled values in a vector.
theta2Sample = mcmcChain[,"theta2"] # Put sampled values in a vector.
# Plot the chains (trajectory of the last 500 sampled values).
par( pty="s" )
chainlength=NROW(mcmcChain)
plot( theta1Sample[(chainlength-500):chainlength] ,
theta2Sample[(chainlength-500):chainlength] , type = "o" ,
xlim = c(0,1) , xlab = bquote(theta[1]) , ylim = c(0,1) ,
ylab = bquote(theta[2]) , main="JAGS Result" , col="skyblue" )
# Display means in plot.
theta1mean = mean(theta1Sample)
theta2mean = mean(theta2Sample)
if (theta1mean > .5) { xpos = 0.0 ; xadj = 0.0
} else { xpos = 1.0 ; xadj = 1.0 }
if (theta2mean > .5) { ypos = 0.0 ; yadj = 0.0
} else { ypos = 1.0 ; yadj = 1.0 }
text( xpos , ypos ,
bquote(
"M=" * .(signif(theta1mean,3)) * "," * .(signif(theta2mean,3))
) ,adj=c(xadj,yadj) ,cex=1.5 )
# Plot a histogram of the posterior differences of theta values.
thetaRR = theta1Sample / theta2Sample # Relative risk
thetaDiff = theta1Sample - theta2Sample # Absolute risk difference
par(mar=c(5.1, 4.1, 4.1, 2.1))
plotPost( thetaRR , xlab= expression(paste("Relative risk (", theta[1]/theta[2], ")")) ,
compVal=1.0, ROPE=c(0.9, 1.1),
main="OPTIMSE Composite Primary OutcomenPosterior distribution of relative risk")
plotPost( thetaDiff , xlab=expression(paste("Absolute risk difference (", theta[1]-theta[2], ")")) ,
compVal=0.0, ROPE=c(-0.05, 0.05),
main="OPTIMSE Composite Primary OutcomenPosterior distribution of absolute risk difference")
#-----------------------------------------------------------------------------
# Use posterior prediction to determine proportion of cases in which
# using the intervention would result in no complication/death
# while not using the intervention would result in complication death
chainLength = length( theta1Sample )
# Create matrix to hold results of simulated patients:
yPred = matrix( NA , nrow=2 , ncol=chainLength )
# For each step in chain, use posterior prediction to determine outcome
for ( stepIdx in 1:chainLength ) { # step through the chain
# Probability for complication/death for each "patient" in intervention group:
pDeath1 = theta1Sample[stepIdx]
# Simulated outcome for each intervention "patient"
yPred[1,stepIdx] = sample( x=c(0,1), prob=c(1-pDeath1,pDeath1), size=1 )
# Probability for complication/death for each "patient" in control group:
pDeath2 = theta2Sample[stepIdx]
# Simulated outcome for each control "patient"
yPred[2,stepIdx] = sample( x=c(0,1), prob=c(1-pDeath2,pDeath2), size=1 )
}
# Now determine the proportion of times that the intervention group has no complication/death
# (y1 == 0) and the control group does have a complication or death (y2 == 1))
(pY1eq0andY2eq1 = sum( yPred[1,]==0 & yPred[2,]==1 ) / chainLength)
(pY1eq1andY2eq0 = sum( yPred[1,]==1 & yPred[2,]==0 ) / chainLength)
(pY1eq0andY2eq0 = sum( yPred[1,]==0 & yPred[2,]==0 ) / chainLength)
(pY10eq1andY2eq1 = sum( yPred[1,]==1 & yPred[2,]==1 ) / chainLength)
# Conclusion: in 27% of cases based on these probabilities,
# a patient in the intervention group would not have a complication,
# when a patient in control group did.
# openGraphSaveGraph.R
# John K. Kruschke, 2013
openGraph = function( width=7 , height=7 , mag=1.0 , ... ) {
if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux
X11( width=width*mag , height=height*mag , type="cairo" , ... )
} else { # Windows OS
windows( width=width*mag , height=height*mag , ... )
}
}
saveGraph = function( file="saveGraphOutput" , type="pdf" , ... ) {
if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux
if ( any( type == c("png","jpeg","jpg","tiff","bmp")) ) {
sptype = type
if ( type == "jpg" ) { sptype = "jpeg" }
savePlot( file=paste(file,".",type,sep="") , type=sptype , ... )
}
if ( type == "pdf" ) {
dev.copy2pdf(file=paste(file,".",type,sep="") , ... )
}
if ( type == "eps" ) {
dev.copy2eps(file=paste(file,".",type,sep="") , ... )
}
} else { # Windows OS
savePlot( file=file , type=type , ... )
}
}
# plotPost.R
# John K. Kruschke, 2013
plotPost = function( paramSampleVec , credMass=0.95 , compVal=NULL ,
HDItextPlace=0.7 , ROPE=NULL , yaxt=NULL , ylab=NULL ,
xlab=NULL , cex.lab=NULL , cex=NULL , xlim=NULL , main=NULL ,
col=NULL , border=NULL , showMode=F , showCurve=F , breaks=NULL ,
... ) {
# Override defaults of hist function, if not specified by user:
# (additional arguments "..." are passed to the hist function)
if ( is.null(xlab) ) xlab="Parameter"
if ( is.null(cex.lab) ) cex.lab=1.5
if ( is.null(cex) ) cex=1.4
if ( is.null(xlim) ) xlim=range( c( compVal , paramSampleVec ) )
if ( is.null(main) ) main=""
if ( is.null(yaxt) ) yaxt="n"
if ( is.null(ylab) ) ylab=""
if ( is.null(col) ) col="skyblue"
if ( is.null(border) ) border="white"
postSummary = matrix( NA , nrow=1 , ncol=11 ,
dimnames=list( c( xlab ) ,
c("mean","median","mode",
"hdiMass","hdiLow","hdiHigh",
"compVal","pcGTcompVal",
"ROPElow","ROPEhigh","pcInROPE")))
postSummary[,"mean"] = mean(paramSampleVec)
postSummary[,"median"] = median(paramSampleVec)
mcmcDensity = density(paramSampleVec)
postSummary[,"mode"] = mcmcDensity$x[which.max(mcmcDensity$y)]
source("HDIofMCMC.R")
HDI = HDIofMCMC( paramSampleVec , credMass )
postSummary[,"hdiMass"]=credMass
postSummary[,"hdiLow"]=HDI[1]
postSummary[,"hdiHigh"]=HDI[2]
# Plot histogram.
if ( is.null(breaks) ) {
breaks = c( seq( from=min(paramSampleVec) , to=max(paramSampleVec) ,
by=(HDI[2]-HDI[1])/18 ) , max(paramSampleVec) )
}
if ( !showCurve ) {
par(xpd=NA)
histinfo = hist( paramSampleVec , xlab=xlab , yaxt=yaxt , ylab=ylab ,
freq=F , border=border , col=col ,
xlim=xlim , main=main , cex=cex , cex.lab=cex.lab ,
breaks=breaks , ... )
}
if ( showCurve ) {
par(xpd=NA)
histinfo = hist( paramSampleVec , plot=F )
densCurve = density( paramSampleVec , adjust=2 )
plot( densCurve$x , densCurve$y , type="l" , lwd=5 , col=col , bty="n" ,
xlim=xlim , xlab=xlab , yaxt=yaxt , ylab=ylab ,
main=main , cex=cex , cex.lab=cex.lab , ... )
}
cenTendHt = 0.9*max(histinfo$density)
cvHt = 0.7*max(histinfo$density)
ROPEtextHt = 0.55*max(histinfo$density)
# Display mean or mode:
if ( showMode==F ) {
meanParam = mean( paramSampleVec )
text( meanParam , cenTendHt ,
bquote(mean==.(signif(meanParam,3))) , adj=c(.5,0) , cex=cex )
} else {
dres = density( paramSampleVec )
modeParam = dres$x[which.max(dres$y)]
text( modeParam , cenTendHt ,
bquote(mode==.(signif(modeParam,3))) , adj=c(.5,0) , cex=cex )
}
# Display the comparison value.
if ( !is.null( compVal ) ) {
cvCol = "darkgreen"
pcgtCompVal = round( 100 * sum( paramSampleVec > compVal )
/ length( paramSampleVec ) , 1 )
pcltCompVal = 100 - pcgtCompVal
lines( c(compVal,compVal) , c(0.96*cvHt,0) ,
lty="dotted" , lwd=1 , col=cvCol )
text( compVal , cvHt ,
bquote( .(pcltCompVal)*"% < " *
.(signif(compVal,3)) * " < "*.(pcgtCompVal)*"%" ) ,
adj=c(pcltCompVal/100,0) , cex=0.8*cex , col=cvCol )
postSummary[,"compVal"] = compVal
postSummary[,"pcGTcompVal"] = ( sum( paramSampleVec > compVal )
/ length( paramSampleVec ) )
}
# Display the ROPE.
if ( !is.null( ROPE ) ) {
ropeCol = "darkred"
pcInROPE = ( sum( paramSampleVec > ROPE[1] & paramSampleVec < ROPE[2] )
/ length( paramSampleVec ) )
lines( c(ROPE[1],ROPE[1]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 ,
col=ropeCol )
lines( c(ROPE[2],ROPE[2]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 ,
col=ropeCol)
text( mean(ROPE) , ROPEtextHt ,
bquote( .(round(100*pcInROPE))*"% in ROPE" ) ,
adj=c(.5,0) , cex=1 , col=ropeCol )
postSummary[,"ROPElow"]=ROPE[1]
postSummary[,"ROPEhigh"]=ROPE[2]
postSummary[,"pcInROPE"]=pcInROPE
}
# Display the HDI.
lines( HDI , c(0,0) , lwd=4 )
text( mean(HDI) , 0 , bquote(.(100*credMass) * "% HDI" ) ,
adj=c(.5,-1.7) , cex=cex )
text( HDI[1] , 0 , bquote(.(signif(HDI[1],3))) ,
adj=c(HDItextPlace,-0.5) , cex=cex )
text( HDI[2] , 0 , bquote(.(signif(HDI[2],3))) ,
adj=c(1.0-HDItextPlace,-0.5) , cex=cex )
par(xpd=F)
#
return( postSummary )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment