Created
November 26, 2019 14:26
-
-
Save cdriveraus/37125c2ff1447b5383e6f7b0f9c1dca0 to your computer and use it in GitHub Desktop.
devtools::run_examples problem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' ctStanFit | |
#' | |
#' Fits a ctsem model specified via \code{\link{ctModel}} with type either 'stanct' or 'standt', using Bayseian inference software | |
#' Stan. | |
#' | |
#' @param datalong long format data containing columns for subject id (numeric values, 1 to max subjects), manifest variables, | |
#' any time dependent (i.e. varying within subject) predictors, | |
#' and any time independent (not varying within subject) predictors. | |
#' @param ctstanmodel model object as generated by \code{\link{ctModel}} with type='stanct' or 'standt', for continuous or discrete time | |
#' models respectively. | |
#' @param stanmodeltext already specified Stan model character string, generally leave NA unless modifying Stan model directly. | |
#' (Possible after modification of output from fit=FALSE) | |
#' @param intoverstates logical indicating whether or not to integrate over latent states using a Kalman filter. | |
#' Generally recommended to set TRUE unless using non-gaussian measurement model. | |
#' @param binomial Deprecated. Logical indicating the use of binary rather than Gaussian data, as with IRT analyses. | |
#' This now sets \code{intoverstates = FALSE} and the \code{manifesttype} of every indicator to 1, for binary. | |
#' @param fit If TRUE, fit specified model using Stan, if FALSE, return stan model object without fitting. | |
#' @param intoverpop if TRUE, integrates over population distribution of parameters rather than full sampling. | |
#' Allows for optimization of non-linearities and random effects. | |
#' @param plot if TRUE, a Shiny program is launched upon fitting to interactively plot samples. | |
#' May struggle with many (e.g., > 5000) parameters, and may leave sample files in working directory if sampling is terminated. | |
#' @param derrind vector of integers denoting which latent variables are involved in dynamic error calculations. | |
#' latents involved only in deterministic trends or input effects can be removed from matrices (ie, that | |
#' obtain no additional stochastic inputs after first observation), speeding up calculations. | |
#' If unsure, leave default of 'all' ! Ignored if intoverstates=FALSE. | |
#' @param optimize if TRUE, use \code{\link{stanoptimis}} function for maximum a posteriori / importance sampling estimates, | |
#' otherwise use the HMC sampler from Stan, which is (much) slower, but generally more robust, accurate, and informative. | |
#' @param optimcontrol list of parameters sent to \code{\link{stanoptimis}} governing optimization / importance sampling. | |
#' @param nopriors logical. If TRUE, any priors are disabled -- sometimes desirable for optimization. | |
#' @param iter number of iterations, half of which will be devoted to warmup by default when sampling. | |
#' When optimizing, this is the maximum number of iterations to allow -- convergence hopefully occurs before this! | |
#' @param inits vector of parameter start values, as returned by the rstan function \code{rstan::unconstrain_pars} for instance. | |
#' @param chains number of chains to sample, during HMC or post-optimization importance sampling. Unless the cores | |
#' argument is also set, the number of chains determines the number of cpu cores used, up to | |
#' the maximum available minus one. Irrelevant when \code{optimize=TRUE}. | |
#' @param cores number of cpu cores to use. Either 'maxneeded' to use as many as available minus one, | |
#' up to the number of chains, or a positive integer. If \code{optimize=TRUE}, more cores are generally faster. | |
#' @param control List of arguments sent to \code{\link[rstan]{stan}} control argument, | |
#' regarding warmup / sampling behaviour. Unless specified, values used are: | |
#' list(adapt_delta = .8, adapt_window=2, max_treedepth=10, adapt_init_buffer=2, stepsize = .001) | |
#' @param nlcontrol List of non-linear control parameters. | |
#' \code{nldynamics} defaults to "auto", but may also be a logical. Set to FALSE to use estimator that assumes linear dynamics, | |
#' TRUE to use non-linear estimator. "auto" selects linear when the model is obviously linear, | |
#' otherwise nonlinear -- nonlinear is slower. | |
#' \code{nlmeasurement} defaults to "auto", but may also be a logical. Set to TRUE to use non linear measurement model estimator, | |
#' FALSE to use linear model. "auto" selects linear if appropriate, otherwise nonlinear. Non-linear methods are slower but applicable to both linear | |
#' and non linear cases. | |
#' \code{ukffull} may be TRUE or FALSE. If FALSE, nonlinear filtering via the unscented filter uses a minimal number of sigma points, | |
#' that does not capture skew in the resulting distribution. | |
#' \code{maxtimestep} must be a positive numeric, specifying the largest time | |
#' span covered by the numerical integration. The large default ensures that for each observation time interval, | |
#' only a single step of exponential integration is used. When \code{maxtimestep} is smaller than the observation time interval, | |
#' the integration is nested within an Euler like loop. | |
#' Smaller values may offer greater accuracy, but are slower and not always necessary. Given the exponential integration, | |
#' linear model elements are fit exactly with only a single step. | |
#' \code{ukfspread} should be a small positive numeric value, indicating what fraction of a standard deviation to | |
#' use for unscented sigma points. Values between 1e-4 and 2 have tended to be reasonable, in our experience. | |
#' In general, larger values may not make sense when using the default of \code{ukffull=FALSE}. | |
#' @param verbose Integer from 0 to 2. Higher values print more information during model fit -- for debugging. | |
#' @param stationary Logical. If TRUE, T0VAR and T0MEANS input matrices are ignored, | |
#' the parameters are instead fixed to long run expectations. More control over this can be achieved | |
#' by instead setting parameter names of T0MEANS and T0VAR matrices in the input model to 'stationary', for | |
#' elements that should be fixed to stationarity. | |
#' @param forcerecompile logical. For development purposes. | |
#' If TRUE, stan model is recompiled, regardless of apparent need for compilation. | |
#' @param savescores Logical. If TRUE, output from the Kalman filter is saved in output. For datasets with many variables | |
#' or time points, will increase file size substantially. | |
#' @param savesubjectmatrices Logical. If TRUE, subject specific matrices are saved -- only relevant when either time dependent predictors | |
#' are used, or individual differences are obtained via sampling (not via optimization, where they are integrated over). | |
#' @param gendata Logical -- If TRUE, uses provided data for only covariates and a time and missingness structure, and | |
#' generates random data according to the specified model / priors. | |
#' Generated data is in the $Ygen subobject after running \code{extract} on the fit object. | |
#' For datasets with many manifest variables or time points, file size may be large. | |
#' To generate data based on the posterior of a fitted model, see \code{\link{ctStanGenerateFromFit}}. | |
#' @param ... additional arguments to pass to \code{\link[rstan]{stan}} function. | |
#' @importFrom Rcpp evalCpp | |
#' @export | |
#' @examples | |
#' \donttest{ | |
#' #test data with 2 manifest indicators measuring 1 latent process each, | |
#' # 1 time dependent predictor, 3 time independent predictors | |
#' head(ctstantestdat) | |
#' | |
#' #generate a ctStanModel | |
#' model<-ctModel(type='stanct', | |
#' n.latent=2, latentNames=c('eta1','eta2'), | |
#' n.manifest=2, manifestNames=c('Y1','Y2'), | |
#' n.TDpred=1, TDpredNames='TD1', | |
#' n.TIpred=3, TIpredNames=c('TI1','TI2','TI3'), | |
#' LAMBDA=diag(2)) | |
#' | |
#' #set all parameters except manifest means to be fixed across subjects | |
#' model$pars$indvarying[-c(19,20)] <- FALSE | |
#' | |
#' #fit model to data (takes a few minutes - but insufficient | |
#' # iterations and max_treedepth for inference!) | |
#' fit<-ctStanFit(ctstantestdat, model, iter=200, chains=2, | |
#' control=list(max_treedepth=6)) | |
#' | |
#' #output functions | |
#' summary(fit) | |
#' | |
#' plot(fit,wait=FALSE) | |
#' | |
#' } | |
#' \dontrun{ | |
#' library(ctsem) | |
#' set.seed(3) | |
#' | |
#' # Data generation (run this, but no need to understand!) ----------------- | |
#' | |
#' Tpoints <- 20 | |
#' nmanifest <- 4 | |
#' nlatent <- 2 | |
#' nsubjects<-20 | |
#' | |
#' #random effects | |
#' age <- rnorm(nsubjects) #standardised | |
#' cint1<-rnorm(nsubjects,2,.3)+age*.5 | |
#' cint2 <- cint1*.5+rnorm(nsubjects,1,.2)+age*.5 | |
#' tdpredeffect <- rnorm(nsubjects,5,.3)+age*.5 | |
#' | |
#' for(i in 1:nsubjects){ | |
#' #generating model | |
#' gm<-ctModel(Tpoints=Tpoints,n.manifest = nmanifest,n.latent = nlatent,n.TDpred = 1, | |
#' LAMBDA = matrix(c(1,0,0,0, 0,1,.8,1.3),nrow=nmanifest,ncol=nlatent), | |
#' DRIFT=matrix(c(-.3, .2, 0, -.5),nlatent,nlatent), | |
#' TDPREDEFFECT=matrix(c(tdpredeffect[i],0),nrow=nlatent), | |
#' TDPREDMEANS=matrix(c(rep(0,Tpoints-10),1,rep(0,9)),ncol=1), | |
#' DIFFUSION = matrix(c(1, 0, 0, .5),2,2), | |
#' CINT = matrix(c(cint1[i],cint2[i]),ncol=1), | |
#' T0VAR=diag(2,nlatent,nlatent), | |
#' MANIFESTVAR = diag(.5, nmanifest)) | |
#' | |
#' #generate data | |
#' newdat <- ctGenerate(ctmodelobj = gm,n.subjects = 1,burnin = 2, | |
#' dtmat<-rbind(c(rep(.5,8),3,rep(.5,Tpoints-9))), | |
#' wide = FALSE) | |
#' newdat[,'id'] <- i #set id for each subject | |
#' newdat <- cbind(newdat,age[i]) #include time independent predictor | |
#' if(i ==1) { | |
#' dat <- newdat[1:(Tpoints-10),] #pre intervention data | |
#' dat2 <- newdat #including post intervention data | |
#' } | |
#' if(i > 1) { | |
#' dat <- rbind(dat, newdat[1:(Tpoints-10),]) | |
#' dat2 <- rbind(dat2,newdat) | |
#' } | |
#' } | |
#' colnames(dat)[ncol(dat)] <- 'age' | |
#' colnames(dat2)[ncol(dat)] <- 'age' | |
#' | |
#' | |
#' #plot generated data for sanity | |
#' plot(age) | |
#' matplot(dat[,gm$manifestNames],type='l',pch=1) | |
#' plotvar <- 'Y1' | |
#' plot(dat[dat[,'id']==1,'time'],dat[dat[,'id']==1,plotvar],type='l', | |
#' ylim=range(dat[,plotvar],na.rm=TRUE)) | |
#' for(i in 2:nsubjects){ | |
#' points(dat[dat[,'id']==i,'time'],dat[dat[,'id']==i,plotvar],type='l',col=i) | |
#' } | |
#' | |
#' | |
#' dat2[,gm$manifestNames][sample(1:length(dat2[,gm$manifestNames]),size = 100)] <- NA | |
#' | |
#' | |
#' #data structure | |
#' head(dat2) | |
#' | |
#' | |
#' # Model fitting ----------------------------------------------------------- | |
#' | |
#' ##simple univariate default model | |
#' | |
#' m <- ctModel(type = 'stanct', manifestNames = c('Y1'), LAMBDA = diag(1)) | |
#' ctModelLatex(m) | |
#' | |
#' #Specify univariate linear growth curve | |
#' | |
#' m1 <- ctModel(type = 'stanct', | |
#' manifestNames = c('Y1'), latentNames=c('eta1'), | |
#' DRIFT=matrix(-.0001,nrow=1,ncol=1), | |
#' DIFFUSION=matrix(0,nrow=1,ncol=1), | |
#' T0VAR=matrix(0,nrow=1,ncol=1), | |
#' CINT=matrix(c('cint1'),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' LAMBDA = diag(1), | |
#' MANIFESTMEANS=matrix(0,ncol=1), | |
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
#' | |
#' ctModelLatex(m1) | |
#' | |
#' #fit | |
#' f1 <- ctStanFit(datalong = dat2, ctstanmodel = m1, optimize=TRUE, nopriors=TRUE,verbose=1) | |
#' | |
#' summary(f1) | |
#' | |
#' #plots of individual subject models v data | |
#' ctKalman(f1,plot=TRUE,subjects=1,kalmanvec=c('y','yprior'),timestep=.01) | |
#' ctKalman(f1,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),timestep=.01,errorvec=NA) | |
#' | |
#' ctStanPostPredict(f1, wait=FALSE) #compare randomly generated data from posterior to observed data | |
#' | |
#' cf<-ctCheckFit(f1) #compare mean and covariance of randomly generated data to observed cov | |
#' plot(cf,wait=FALSE) | |
#' | |
#' | |
#' | |
#' #Include intervention | |
#' m2 <- ctModel(type = 'stanct', | |
#' manifestNames = c('Y1'), latentNames=c('eta1'), | |
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
#' DRIFT=matrix(-1e-5,nrow=1,ncol=1), | |
#' DIFFUSION=matrix(0,nrow=1,ncol=1), | |
#' CINT=matrix(c('cint1'),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' T0VAR=matrix(0,nrow=1,ncol=1), | |
#' LAMBDA = diag(1), | |
#' MANIFESTMEANS=matrix(0,ncol=1), | |
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
#' | |
#' f2 <- ctStanFit(datalong = dat2, ctstanmodel = m2, optimize=TRUE) | |
#' | |
#' summary(f2) | |
#' | |
#' ctKalman(f2,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth')) | |
#' ctKalman(f2,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),errorvec=NA,legend=FALSE) | |
#' | |
#' ctStanPostPredict(f2, datarows=1:100, wait=FALSE) | |
#' | |
#' | |
#' | |
#' #Individual differences in intervention, Bayesian estimation, covariates | |
#' m2i <- ctModel(type = 'stanct', | |
#' manifestNames = c('Y1'), latentNames=c('eta1'), | |
#' TIpredNames = 'age', | |
#' TDpredNames = 'TD1', #this line includes the intervention | |
#' TDPREDEFFECT=matrix(c('tdpredeffect||TRUE'),nrow=1,ncol=1), #intervention effect | |
#' DRIFT=matrix(-1e-5,nrow=1,ncol=1), | |
#' DIFFUSION=matrix(0,nrow=1,ncol=1), | |
#' CINT=matrix(c('cint1'),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' T0VAR=matrix(0,nrow=1,ncol=1), | |
#' LAMBDA = diag(1), | |
#' MANIFESTMEANS=matrix(0,ncol=1), | |
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
#' | |
#' plot(m2i) | |
#' | |
#' f2i <- ctStanFit(datalong = dat2, ctstanmodel = m2i, | |
#' iter=300,chains=3,control=list(max_treedepth=7)) | |
#' summary(f2i) | |
#' ctStanPlotPost(f2i) | |
#' ctKalman(f2i,kalmanvec=c('y','ysmooth'),subjects=2:4,plot=TRUE,errorvec=NA) | |
#' | |
#' | |
#' #Including covariate effects | |
#' m2ic <- ctModel(type = 'stanct', | |
#' manifestNames = c('Y1'), latentNames=c('eta1'), | |
#' n.TIpred = 1, TIpredNames = 'age', | |
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
#' DRIFT=matrix(-1e-5,nrow=1,ncol=1), | |
#' DIFFUSION=matrix(0,nrow=1,ncol=1), | |
#' CINT=matrix(c('cint1'),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' T0VAR=matrix(0,nrow=1,ncol=1), | |
#' LAMBDA = diag(1), | |
#' MANIFESTMEANS=matrix(0,ncol=1), | |
#' MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
#' | |
#' m2ic$pars$indvarying[m2ic$pars$matrix %in% 'TDPREDEFFECT'] <- TRUE | |
#' | |
#' plot(m2ic) | |
#' | |
#' f2ic <- ctStanFit(datalong = dat2, ctstanmodel = m2ic,optimize=TRUE) | |
#' summary(f2ic) | |
#' | |
#' ctStanTIpredeffects(fit = f2ic,includeMeanUncertainty = TRUE,whichpars = 'TDPREDEFFECT', | |
#' plot=TRUE,probs = c(.025,.5,.975)) | |
#' | |
#' #Include deterministic dynamics | |
#' m3 <- ctModel(type = 'stanct', | |
#' manifestNames = c('Y1'), latentNames=c('eta1'), | |
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
#' DRIFT=matrix('drift11',nrow=1,ncol=1), | |
#' DIFFUSION=matrix(0,nrow=1,ncol=1), | |
#' CINT=matrix(c('cint1'),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
#' LAMBDA = diag(1), | |
#' MANIFESTMEANS=matrix(0,ncol=1), | |
#' MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1)) | |
#' | |
#' ctModelLatex(m3) | |
#' | |
#' f3 <- ctStanFit(datalong = dat2, ctstanmodel = m3, optimize=TRUE) | |
#' | |
#' summary(f3) | |
#' | |
#' ctKalman(f3,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth')) | |
#' ctKalman(f3,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
#' | |
#' | |
#' | |
#' | |
#' | |
#' #Add system noise to allow for fluctuations that persist in time | |
#' m3n <- ctModel(type = 'stanct', | |
#' manifestNames = c('Y1'), latentNames=c('eta1'), | |
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
#' DRIFT=matrix('drift11',nrow=1,ncol=1), | |
#' DIFFUSION=matrix('diffusion',nrow=1,ncol=1), | |
#' CINT=matrix(c('cint1'),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
#' LAMBDA = diag(1), | |
#' MANIFESTMEANS=matrix(0,ncol=1), | |
#' MANIFESTVAR=matrix(c(0),nrow=1,ncol=1)) | |
#' | |
#' ctModelLatex(m3n) | |
#' | |
#' f3n <- ctStanFit(datalong = dat2, ctstanmodel = m3n, optimize=TRUE,cores=4) | |
#' | |
#' summary(f3n) | |
#' | |
#' k=ctKalman(f3n,plot=T,subjects=1,kalmanvec=c('y','etasmooth'),timestep=.01) | |
#' ctKalman(f3n,plot=TRUE,subjects=1:3,kalmanvec=c('y','etasmooth'),errorvec=NA) | |
#' | |
#' | |
#' | |
#' | |
#' | |
#' #include 2nd latent process | |
#' | |
#' m4 <- ctModel(n.manifest = 2,n.latent = 2, type = 'stanct', | |
#' manifestNames = c('Y1','Y2'), latentNames=c('L1','L2'), | |
#' n.TDpred=1,TDpredNames = 'TD1', | |
#' TDPREDEFFECT=matrix(c('tdpredeffect1','tdpredeffect2'),nrow=2,ncol=1), | |
#' DRIFT=matrix(c('drift11','drift21','drift12','drift22'),nrow=2,ncol=2), | |
#' DIFFUSION=matrix(c('diffusion11','diffusion21',0,'diffusion22'),nrow=2,ncol=2), | |
#' CINT=matrix(c('cint1','cint2'),nrow=2,ncol=1), | |
#' T0MEANS=matrix(c('t0m1','t0m2'),nrow=2,ncol=1), | |
#' T0VAR=matrix(c('t0var11','t0var21',0,'t0var22'),nrow=2,ncol=2), | |
#' LAMBDA = matrix(c(1,0,0,1),nrow=2,ncol=2), | |
#' MANIFESTMEANS=matrix(c(0,0),nrow=2,ncol=1), | |
#' MANIFESTVAR=matrix(c('merror1',0,0,'merror2'),nrow=2,ncol=2)) | |
#' | |
#' f4 <- ctStanFit(datalong = dat2, ctstanmodel = m4,optimize=TRUE,cores=1) | |
#' | |
#' summary(f4) | |
#' | |
#' ctStanDiscretePars(f4,plot=TRUE) #auto and cross regressive plots over time | |
#' | |
#' ctKalman(f4,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth')) | |
#' ctKalman(f4,plot=TRUE,subjects=1:2,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
#' | |
#' | |
#' | |
#' #non-linear dedpendencies - based on m3 model (including intervention) | |
#' #specify intervention as dependent on extra parameters in PARS matrix, and latent process 1 | |
#' | |
#' m3nl <- ctModel( type = 'stanct', | |
#' manifestNames = c('Y1'), latentNames=c('eta1'), | |
#' n.TDpred=1,TDpredNames = 'TD1', | |
#' TDPREDEFFECT=matrix(c('PARS[1,1] + PARS[1,2] * state[1]'),nrow=1,ncol=1), | |
#' PARS=matrix(c('tdpredeffect_int','tdpredeffect_multiply'),1,2), | |
#' DRIFT=matrix('drift11',nrow=1,ncol=1), | |
#' DIFFUSION=matrix('diffusion11',nrow=1,ncol=1), | |
#' CINT=matrix(c('cint1'),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
#' LAMBDA = diag(1), | |
#' MANIFESTMEANS=matrix(0,ncol=1), | |
#' MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1)) | |
#' | |
#' l=ctModelLatex(m3nl) | |
#' | |
#' #here fit using optimization instead of sampling -- not appropriate in all cases! | |
#' f3nl <- ctStanFit(datalong = dat2, ctstanmodel = m3nl, optimize=TRUE) | |
#' | |
#' summary(f3nl) | |
#' | |
#' ctKalman(f3nl,subjects=1:4,plot=TRUE,errorvec=NA) | |
#' #?plot.ctKalman #for plotting arguments | |
#' | |
#' | |
#' | |
#' #dynamic factor model -- fixing CINT to 0 and freeing indicator level intercepts | |
#' | |
#' m3df <- ctModel(type = 'stanct', | |
#' manifestNames = c('Y2','Y3'), latentNames=c('eta1'), | |
#' n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
#' TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
#' DRIFT=matrix('drift11',nrow=1,ncol=1), | |
#' DIFFUSION=matrix('diffusion',nrow=1,ncol=1), | |
#' CINT=matrix(c(0),ncol=1), | |
#' T0MEANS=matrix(c('t0m1'),ncol=1), | |
#' T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
#' LAMBDA = matrix(c(1,'Y3loading'),nrow=2,ncol=1), | |
#' MANIFESTMEANS=matrix(c('Y2_int','Y3_int'),nrow=2,ncol=1), | |
#' MANIFESTVAR=matrix(c('Y2residual',0,0,'Y3residual'),nrow=2,ncol=2)) | |
#' | |
#' ctModelLatex(m3df) | |
#' | |
#' f3df <- ctStanFit(datalong = dat2, ctstanmodel = m3df, optimize=TRUE) | |
#' | |
#' summary(f3df) | |
#' | |
#' ctKalman(f3df,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
#' ctKalman(f3df,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
#' | |
#' | |
#' | |
#' } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Name: ctStanFit | |
### Title: ctStanFit | |
### Aliases: ctStanFit | |
### ** Examples | |
## No test: | |
##D #test data with 2 manifest indicators measuring 1 latent process each, | |
##D # 1 time dependent predictor, 3 time independent predictors | |
##D head(ctstantestdat) | |
##D | |
##D #generate a ctStanModel | |
##D model<-ctModel(type='stanct', | |
##D n.latent=2, latentNames=c('eta1','eta2'), | |
##D n.manifest=2, manifestNames=c('Y1','Y2'), | |
##D n.TDpred=1, TDpredNames='TD1', | |
##D n.TIpred=3, TIpredNames=c('TI1','TI2','TI3'), | |
##D LAMBDA=diag(2)) | |
##D | |
##D #set all parameters except manifest means to be fixed across subjects | |
##D model$pars$indvarying[-c(19,20)] <- FALSE | |
##D | |
##D #fit model to data (takes a few minutes - but insufficient | |
##D # iterations and max_treedepth for inference!) | |
##D fit<-ctStanFit(ctstantestdat, model, iter=200, chains=2, | |
##D control=list(max_treedepth=6)) | |
##D | |
##D #output functions | |
##D summary(fit) | |
##D | |
##D plot(fit,wait=FALSE) | |
##D | |
## End(No test) | |
library(ctsem) | |
set.seed(3) | |
# Data generation (run this, but no need to understand!) ----------------- | |
Tpoints <- 20 | |
nmanifest <- 4 | |
nlatent <- 2 | |
nsubjects<-20 | |
#random effects | |
age <- rnorm(nsubjects) #standardised | |
cint1<-rnorm(nsubjects,2,.3)+age*.5 | |
cint2 <- cint1*.5+rnorm(nsubjects,1,.2)+age*.5 | |
tdpredeffect <- rnorm(nsubjects,5,.3)+age*.5 | |
for(i in 1:nsubjects){ | |
#generating model | |
gm<-ctModel(Tpoints=Tpoints,n.manifest = nmanifest,n.latent = nlatent,n.TDpred = 1, | |
LAMBDA = matrix(c(1,0,0,0, 0,1,.8,1.3),nrow=nmanifest,ncol=nlatent), | |
DRIFT=matrix(c(-.3, .2, 0, -.5),nlatent,nlatent), | |
TDPREDEFFECT=matrix(c(tdpredeffect[i],0),nrow=nlatent), | |
TDPREDMEANS=matrix(c(rep(0,Tpoints-10),1,rep(0,9)),ncol=1), | |
DIFFUSION = matrix(c(1, 0, 0, .5),2,2), | |
CINT = matrix(c(cint1[i],cint2[i]),ncol=1), | |
T0VAR=diag(2,nlatent,nlatent), | |
MANIFESTVAR = diag(.5, nmanifest)) | |
#generate data | |
newdat <- ctGenerate(ctmodelobj = gm,n.subjects = 1,burnin = 2, | |
dtmat<-rbind(c(rep(.5,8),3,rep(.5,Tpoints-9))), | |
wide = FALSE) | |
newdat[,'id'] <- i #set id for each subject | |
newdat <- cbind(newdat,age[i]) #include time independent predictor | |
if(i ==1) { | |
dat <- newdat[1:(Tpoints-10),] #pre intervention data | |
dat2 <- newdat #including post intervention data | |
} | |
if(i > 1) { | |
dat <- rbind(dat, newdat[1:(Tpoints-10),]) | |
dat2 <- rbind(dat2,newdat) | |
} | |
} | |
colnames(dat)[ncol(dat)] <- 'age' | |
colnames(dat2)[ncol(dat)] <- 'age' | |
#plot generated data for sanity | |
plot(age) | |
matplot(dat[,gm$manifestNames],type='l',pch=1) | |
plotvar <- 'Y1' | |
plot(dat[dat[,'id']==1,'time'],dat[dat[,'id']==1,plotvar],type='l', | |
ylim=range(dat[,plotvar],na.rm=TRUE)) | |
for(i in 2:nsubjects){ | |
points(dat[dat[,'id']==i,'time'],dat[dat[,'id']==i,plotvar],type='l',col=i) | |
} | |
dat2[,gm$manifestNames][sample(1:length(dat2[,gm$manifestNames]),size = 100)] <- NA | |
#data structure | |
head(dat2) | |
# Model fitting ----------------------------------------------------------- | |
##simple univariate default model | |
m <- ctModel(type = 'stanct', manifestNames = c('Y1'), LAMBDA = diag(1)) | |
ctModelLatex(m) | |
#Specify univariate linear growth curve | |
m1 <- ctModel(type = 'stanct', | |
manifestNames = c('Y1'), latentNames=c('eta1'), | |
DRIFT=matrix(-.0001,nrow=1,ncol=1), | |
DIFFUSION=matrix(0,nrow=1,ncol=1), | |
T0VAR=matrix(0,nrow=1,ncol=1), | |
CINT=matrix(c('cint1'),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
LAMBDA = diag(1), | |
MANIFESTMEANS=matrix(0,ncol=1), | |
MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
ctModelLatex(m1) | |
#fit | |
f1 <- ctStanFit(datalong = dat2, ctstanmodel = m1, optimize=TRUE, nopriors=TRUE,verbose=1) | |
summary(f1) | |
#plots of individual subject models v data | |
ctKalman(f1,plot=TRUE,subjects=1,kalmanvec=c('y','yprior'),timestep=.01) | |
ctKalman(f1,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),timestep=.01,errorvec=NA) | |
ctStanPostPredict(f1, wait=FALSE) #compare randomly generated data from posterior to observed data | |
cf<-ctCheckFit(f1) #compare mean and covariance of randomly generated data to observed cov | |
plot(cf,wait=FALSE) | |
#Include intervention | |
m2 <- ctModel(type = 'stanct', | |
manifestNames = c('Y1'), latentNames=c('eta1'), | |
n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
DRIFT=matrix(-1e-5,nrow=1,ncol=1), | |
DIFFUSION=matrix(0,nrow=1,ncol=1), | |
CINT=matrix(c('cint1'),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
T0VAR=matrix(0,nrow=1,ncol=1), | |
LAMBDA = diag(1), | |
MANIFESTMEANS=matrix(0,ncol=1), | |
MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
f2 <- ctStanFit(datalong = dat2, ctstanmodel = m2, optimize=TRUE) | |
summary(f2) | |
ctKalman(f2,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth')) | |
ctKalman(f2,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),errorvec=NA,legend=FALSE) | |
ctStanPostPredict(f2, datarows=1:100, wait=FALSE) | |
#Individual differences in intervention, Bayesian estimation, covariates | |
m2i <- ctModel(type = 'stanct', | |
manifestNames = c('Y1'), latentNames=c('eta1'), | |
TIpredNames = 'age', | |
TDpredNames = 'TD1', #this line includes the intervention | |
TDPREDEFFECT=matrix(c('tdpredeffect||TRUE'),nrow=1,ncol=1), #intervention effect | |
DRIFT=matrix(-1e-5,nrow=1,ncol=1), | |
DIFFUSION=matrix(0,nrow=1,ncol=1), | |
CINT=matrix(c('cint1'),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
T0VAR=matrix(0,nrow=1,ncol=1), | |
LAMBDA = diag(1), | |
MANIFESTMEANS=matrix(0,ncol=1), | |
MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
plot(m2i) | |
f2i <- ctStanFit(datalong = dat2, ctstanmodel = m2i, | |
iter=300,chains=3,control=list(max_treedepth=7)) | |
summary(f2i) | |
ctStanPlotPost(f2i) | |
ctKalman(f2i,kalmanvec=c('y','ysmooth'),subjects=2:4,plot=TRUE,errorvec=NA) | |
#Including covariate effects | |
m2ic <- ctModel(type = 'stanct', | |
manifestNames = c('Y1'), latentNames=c('eta1'), | |
n.TIpred = 1, TIpredNames = 'age', | |
n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
DRIFT=matrix(-1e-5,nrow=1,ncol=1), | |
DIFFUSION=matrix(0,nrow=1,ncol=1), | |
CINT=matrix(c('cint1'),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
T0VAR=matrix(0,nrow=1,ncol=1), | |
LAMBDA = diag(1), | |
MANIFESTMEANS=matrix(0,ncol=1), | |
MANIFESTVAR=matrix(c('merror'),nrow=1,ncol=1)) | |
m2ic$pars$indvarying[m2ic$pars$matrix | |
plot(m2ic) | |
f2ic <- ctStanFit(datalong = dat2, ctstanmodel = m2ic,optimize=TRUE) | |
summary(f2ic) | |
ctStanTIpredeffects(fit = f2ic,includeMeanUncertainty = TRUE,whichpars = 'TDPREDEFFECT', | |
plot=TRUE,probs = c(.025,.5,.975)) | |
#Include deterministic dynamics | |
m3 <- ctModel(type = 'stanct', | |
manifestNames = c('Y1'), latentNames=c('eta1'), | |
n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
DRIFT=matrix('drift11',nrow=1,ncol=1), | |
DIFFUSION=matrix(0,nrow=1,ncol=1), | |
CINT=matrix(c('cint1'),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
LAMBDA = diag(1), | |
MANIFESTMEANS=matrix(0,ncol=1), | |
MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1)) | |
ctModelLatex(m3) | |
f3 <- ctStanFit(datalong = dat2, ctstanmodel = m3, optimize=TRUE) | |
summary(f3) | |
ctKalman(f3,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth')) | |
ctKalman(f3,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
#Add system noise to allow for fluctuations that persist in time | |
m3n <- ctModel(type = 'stanct', | |
manifestNames = c('Y1'), latentNames=c('eta1'), | |
n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
DRIFT=matrix('drift11',nrow=1,ncol=1), | |
DIFFUSION=matrix('diffusion',nrow=1,ncol=1), | |
CINT=matrix(c('cint1'),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
LAMBDA = diag(1), | |
MANIFESTMEANS=matrix(0,ncol=1), | |
MANIFESTVAR=matrix(c(0),nrow=1,ncol=1)) | |
ctModelLatex(m3n) | |
f3n <- ctStanFit(datalong = dat2, ctstanmodel = m3n, optimize=TRUE,cores=4) | |
summary(f3n) | |
k=ctKalman(f3n,plot=T,subjects=1,kalmanvec=c('y','etasmooth'),timestep=.01) | |
ctKalman(f3n,plot=TRUE,subjects=1:3,kalmanvec=c('y','etasmooth'),errorvec=NA) | |
#include 2nd latent process | |
m4 <- ctModel(n.manifest = 2,n.latent = 2, type = 'stanct', | |
manifestNames = c('Y1','Y2'), latentNames=c('L1','L2'), | |
n.TDpred=1,TDpredNames = 'TD1', | |
TDPREDEFFECT=matrix(c('tdpredeffect1','tdpredeffect2'),nrow=2,ncol=1), | |
DRIFT=matrix(c('drift11','drift21','drift12','drift22'),nrow=2,ncol=2), | |
DIFFUSION=matrix(c('diffusion11','diffusion21',0,'diffusion22'),nrow=2,ncol=2), | |
CINT=matrix(c('cint1','cint2'),nrow=2,ncol=1), | |
T0MEANS=matrix(c('t0m1','t0m2'),nrow=2,ncol=1), | |
T0VAR=matrix(c('t0var11','t0var21',0,'t0var22'),nrow=2,ncol=2), | |
LAMBDA = matrix(c(1,0,0,1),nrow=2,ncol=2), | |
MANIFESTMEANS=matrix(c(0,0),nrow=2,ncol=1), | |
MANIFESTVAR=matrix(c('merror1',0,0,'merror2'),nrow=2,ncol=2)) | |
f4 <- ctStanFit(datalong = dat2, ctstanmodel = m4,optimize=TRUE,cores=1) | |
summary(f4) | |
ctStanDiscretePars(f4,plot=TRUE) #auto and cross regressive plots over time | |
ctKalman(f4,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth')) | |
ctKalman(f4,plot=TRUE,subjects=1:2,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
#non-linear dedpendencies - based on m3 model (including intervention) | |
#specify intervention as dependent on extra parameters in PARS matrix, and latent process 1 | |
m3nl <- ctModel( type = 'stanct', | |
manifestNames = c('Y1'), latentNames=c('eta1'), | |
n.TDpred=1,TDpredNames = 'TD1', | |
TDPREDEFFECT=matrix(c('PARS[1,1] + PARS[1,2] * state[1]'),nrow=1,ncol=1), | |
PARS=matrix(c('tdpredeffect_int','tdpredeffect_multiply'),1,2), | |
DRIFT=matrix('drift11',nrow=1,ncol=1), | |
DIFFUSION=matrix('diffusion11',nrow=1,ncol=1), | |
CINT=matrix(c('cint1'),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
LAMBDA = diag(1), | |
MANIFESTMEANS=matrix(0,ncol=1), | |
MANIFESTVAR=matrix(c('merror1'),nrow=1,ncol=1)) | |
l=ctModelLatex(m3nl) | |
#here fit using optimization instead of sampling -- not appropriate in all cases! | |
f3nl <- ctStanFit(datalong = dat2, ctstanmodel = m3nl, optimize=TRUE) | |
summary(f3nl) | |
ctKalman(f3nl,subjects=1:4,plot=TRUE,errorvec=NA) | |
#?plot.ctKalman #for plotting arguments | |
#dynamic factor model -- fixing CINT to 0 and freeing indicator level intercepts | |
m3df <- ctModel(type = 'stanct', | |
manifestNames = c('Y2','Y3'), latentNames=c('eta1'), | |
n.TDpred=1,TDpredNames = 'TD1', #this line includes the intervention | |
TDPREDEFFECT=matrix(c('tdpredeffect'),nrow=1,ncol=1), #intervention effect | |
DRIFT=matrix('drift11',nrow=1,ncol=1), | |
DIFFUSION=matrix('diffusion',nrow=1,ncol=1), | |
CINT=matrix(c(0),ncol=1), | |
T0MEANS=matrix(c('t0m1'),ncol=1), | |
T0VAR=matrix('t0var11',nrow=1,ncol=1), | |
LAMBDA = matrix(c(1,'Y3loading'),nrow=2,ncol=1), | |
MANIFESTMEANS=matrix(c('Y2_int','Y3_int'),nrow=2,ncol=1), | |
MANIFESTVAR=matrix(c('Y2residual',0,0,'Y3residual'),nrow=2,ncol=2)) | |
ctModelLatex(m3df) | |
f3df <- ctStanFit(datalong = dat2, ctstanmodel = m3df, optimize=TRUE) | |
summary(f3df) | |
ctKalman(f3df,plot=TRUE,subjects=1,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
ctKalman(f3df,plot=TRUE,subjects=1:3,kalmanvec=c('y','ysmooth'),errorvec=NA) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment