tobigithub/nbaParallel.R

## nbaParallel.R
#' Basketball: A function to run a lot of different models
#'
#' This function allows you to run a lot of different models from the caret package
#' @param method The methods to use i.e c("glm", "rf")
#' @param formula The formula to use
#' @param data The data frame to use. Can leave blank if supplying independent training and testing
#' @param regression Logical TRUE if type of model is "regression"
#' @param metric Metic to use. For example, "RMSE" if regression and "Accuracy" if classification
#' @param Train Do you want to supply a training set bypassing the partitioning
#' @param Test If you bypassed partition need to also supply testing set
#' @param p What percentage to split for the training and testing sets if you didn't supply training and testing
#' @param seed Set the seed
#' @param predictType What type of prediction to use; note "probs" does not work for all classification problems
#' @param na.action What should the prediction do with NAs. i.e. na.omit or NULL
#' @param roundPred Should you round the predictions, useful for problems with count data
#' @param verbose Logical; should it be verbose or not
#' @param plotting Logical; should the function also evaluate plots
#' @param ctrl Specify the ctrl, see caret package for more details
#' @param ntree Number of trees for random forest
#' @param family The family for a glm model
#' @param familyGBM The family for the gbm model
#' @param familyGLMboost The family for the glmboost model
#' @param glmBoostGrid Grid for glmboost #getModelInfo("glmboost", FALSE)[[1]]$grid
#' @param gbmGrid Grid for gbm
#' @param multinomGrid Grid for multinom
#' @param rfGrid Grid for random forest
#' @param LMTGrid Grid for LMT
#' @param LogitBoostGrid Grid for LogitBoost
#' @keywords caret
#' @keywords machine-learning
#' @export
#' @examples
#' #data(iris)
#' #models <- nbaParallel(method=c('rpart', 'ctree', 'rpart2'), metric="Accuracy", formula= Species~Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data=iris, regression=FALSE)
#' #Could use to predict with mean of models' predictions
#' #predlist <- predict(models$Tune, newdata=models$Test)#,na.action=NULL)
#' #predlist <- lapply(predlist, as.integer)
#' #predlist <- do.call(cbind, predlist)
#' #predagg <- round(apply(predlist, 1, mean),0)

nbaParallel <-
  function(method,
           formula,
           data,
           regression = NULL, #what type of analysis are you doing
           metric = NULL, #RMSE, Accuracy
           Train = NULL,
           Test = NULL,
           p=0.8, #how to split the data
           seed = 1, #a general seed to use
           predictType = "raw",
           na.action = na.omit,
           roundPred = FALSE, #if count data or using a non-classification method for classification
           verbose = FALSE,
           plotting = TRUE,
           ctrl = trainControl(method = "cv",
                               number = 10,
                               savePred=T),
           ntree = 500, #for random forest
           family = NULL,
           familyGBM = NULL,
           familyGLMboost = NULL, #Binomial(link = c("logit")),
           glmBoostGrid = NULL, # data.frame(mstop = floor((1:10) * 50), prune = "no"),
           gbmGrid = NULL, #expand.grid(interaction.depth = seq(1, 2), n.trees = floor((1:10) * 50), shrinkage = .05), gbmGrid <- expand.grid(.interaction.depth = seq(1, 7, by = 2),.n.trees = seq(100, 1000, by = 50), .shrinkage = c(0.01, 0.1))
           multinomGrid = NULL, #expand.grid(decay = c(0, 5 ^ seq(-1, -4, length = 60)))
           rfGrid = NULL, #mtryVals <- floor(seq(10, ncol(solTrainXtrans), length = 10))           mtryGrid <- data.frame(.mtry = mtryVals)
           LMTGrid = NULL,
           LogitBoostGrid = NULL
           ){

#**********Begin the function*************************************************************************************#

    library('caret')
    library('foreach')
    library('ggplot2')
    #seperate predictors from not and create new data
    y <- all.vars(formula)[1]
    predictors <- all.vars(formula)[all.vars(formula) != y]
    data <- data[,c(y, predictors)]

    #use supplied control method
    set.seed(seed)
    ctrl <- ctrl

#**********Seperate into training and testing**********************************************************************#

    #seperate into training and testing sets if not supplied, else use the supplied
    if(is.null(Train) & is.null(Test)){
      set.seed(seed)
      inTrainingSet <- createDataPartition( data[,y], p = p, list = FALSE)
      Train <- data[ inTrainingSet,]
      Test <- data[-inTrainingSet,]

    } else if(!is.null(Train) & !is.null(Test)){
      Train <- Train
      Test <- Test

    } else if(!is.null(Train) & is.null(Test)){
      Train <- Train
      Test <- NULL

    }

#**********FUNCTION TO ALLOW FOR FAMILY AND PARAMETER**********************************************************************#

    addParam <- function(expr, grid=NULL, family=NULL) {
      if(!is.null(grid) & is.null(family)) {
        ll<-as.list(expr)
        ll$tuneGrid <- grid
        as.call(ll)
      } else if(!is.null(family) & is.null(grid)) {
        ll<-as.list(expr)
        ll$family <- family
        as.call(ll)
      } else if(!is.null(grid) & !is.null(family)) {
        ll<-as.list(expr)
        ll$family <- family
        ll$tuneGrid <- grid
        as.call(ll)
      } else {
        expr
      }
    }

    #' mycall<-quote(train(form= medv~., data = Boston, method = "glmboost"))
    #' call <- addParam(mycall, grid=quote(glmBoostGrid), family=NULL)
    #' eval(call)

 #********************************************************************************#
    #create empty vectors (bad idea should allocate upfront)

    Tune <- vector("list", length(method))
    names(Tune) <- method

    Predict <- vector("list", length(method))
    names(Predict) <- method

    Confusion <- vector("list", length(method))
    names(Confusion) <- method

    if(regression==FALSE){
      if(ctrl$classProbs==TRUE){ #using two class summary
        testSummary <- data.frame(ROC = numeric(0), Sens=numeric(0), Spec=numeric(0))
      } else {  #using default summary
        testSummary <- data.frame(Accuracy = numeric(0), Kappa=numeric(0))
      }
    } else {
      testSummary <- data.frame(RMSE = numeric(0), R2=numeric(0))
    }
    Plot <- list()
    Plot2 <- list()
    Plot3 <- list()


#********************************************************************************#


Tune <- foreach(i=1:length(method), .inorder=TRUE) %do% {
  cat("Tuning model", method[i], "\n")
      #*******************#
      #evaluate models
      #*******************#

      set.seed(seed)

#glm but poisson and quasipoisson specified below
      if(method[i]=="glm"){

        set.seed(seed)

        mycall <- quote( train( form = formula,
                                    data = Train,
                                    method = method[i],
                                    metric = metric,
                                    trControl = ctrl))
        mycall <- addParam(mycall, grid=NULL, family=family)
        Tune[[i]] <- eval(mycall)
        Tune[[i]]

#Gradient Boosting with no Family Specified and default grid
      } else if(method[i]=="gbm"){

        set.seed(seed)
        mycall <- quote ( train( form = formula,
                                 data = Train,
                                 method = method[i],
                                 metric = metric,
                                 trControl = ctrl,
                                 verbose = verbose))

        mycall <- addParam( mycall, grid=gbmGrid, family=NULL) #it's called distribution for gbm
        Tune[[i]] <- eval(mycall)
        Tune[[i]]


#Other GLM Boosts  (using default Grid)
      } else if(method[i]=="glmboost"){
        library(mboost)

        set.seed(seed)
        mycall <- quote( train ( form = formula,
                                 data = Train,
                                 method = method[i],
                                 metric = metric,
                                 trControl = ctrl))
        mycall <- addParam( mycall, grid=glmBoostGrid, family=familyGLMboost)
        Tune[[i]] <- eval(mycall)
        Tune[[i]]

#lmBoost
      } else if(method[i]=="lmBoost"){
        library(mboost)
        set.seed(seed)
        mycall <- quote( train ( form = update(formula, as.numeric(as.character(.))~.),
                                 data = Train,
                                 method = "glmboost",
                                 metric = metric,
                                 trControl = ctrl) )

        mycall <- addParam( mycall, grid=glmBoostGrid, family=Gaussian())
        Tune[[i]] <- eval(mycall)
        Tune[[i]]

#LogitBoost
      } else if(method[i]=="LogitBoost"){

        set.seed(seed)
        mycall <- quote( train ( form = formula,
                                 data = Train,
                                 method = method[i],
                                 metric = metric,
                                 trControl = ctrl) )

        mycall <- addParam( mycall, grid=LogitBoostGrid)
        Tune[[i]] <- eval(mycall)
        Tune[[i]]

#LogitBoost
      } else if(method[i]=="LMT"){

        set.seed(seed)
        mycall <- quote( train ( form = formula,
                                 data = Train,
                                 method = method[i],
                                 metric = metric,
                                 trControl = ctrl) )

        mycall <- addParam( mycall, grid=LMTGrid)
        Tune[[i]] <- eval(mycall)
        Tune[[i]]


#POISSON
      } else if(method[i]=="poisson"){

        set.seed(seed)
        Tune[[i]] <- train( form = update(formula, as.numeric(as.character(.))~.),
                            data = Train,
                            method = "glm",
                            trControl = ctrl,
                            metric = metric,
                            family = poisson(link = "log") )


#QUASI POISSON
      } else if(method[i]=="quasipoisson"){

        Tune[[i]] <- train( form = update(formula, as.numeric(as.character(.))~.),
                            data = Train,
                            method = "glm",
                            trControl = ctrl,
                            metric = metric,
                            family = quasipoisson(link = "log") )


#Linear Model
      } else if(method[i]=="lm"){ #no verbose argument and need to change factor to numeric

        set.seed(seed)
        Tune[[i]] <- train( form = update(formula, as.numeric(as.character(.)) ~.),
                            data = Train,
                            method = method[i],
                            metric = metric,
                            trControl = ctrl )


#Multinomial
      } else if(method[i]=="multinom"){ #no verbose argument and need to change factor to numeric

        set.seed(seed)
        mycall <- quote( train( form = formula,
                                data = Train,
                                method = method[i],
                                metric = metric,
                                trControl = ctrl) )

        mycall <- addParam( mycall, grid=multinomGrid, family=NULL)
        Tune[[i]] <- eval(mycall)
        Tune[[i]]


#Random Forest
      } else if(method[i]=="rf"){ #added ntree argument for randomForest

        set.seed(seed)
        mycall <- quote( train( form = formula,
                                data = Train,
                                method = method[i],
                                metric = metric,
                                trControl = ctrl,
                                ntree = ntree,
                                verbose = verbose ) )
        mycall <- addParam( mycall, grid=rfGrid, family=NULL)
        Tune[[i]] <- eval(mycall)
        Tune[[i]]

#All Other Models
      } else {

        set.seed(seed)
        Tune[[i]] <- train( form = formula,
                            data = Train,
                            method = method[i],
                            metric = metric,
                            trControl = ctrl )

      }
}#end foreach
      names(Tune) <- method


#********************************************************************************#
if(!(!is.null(Train) & is.null(Test))) {      #if you want to skip predictions when not specifying Tune
      #*******************#
      #evaluate predictions
      #*******************#
Predict <- foreach(i=1:length(method), .inorder=TRUE) %do% {
  cat("Predicting with model", method[i], "\n")
      if(regression == FALSE){

try(
            if(roundPred == TRUE & class(predict.train(Tune[[i]], newdata=Test, type = predictType))!= "factor"){
              try( factor( round( predict.train(Tune[[i]], newdata=Test, type = predictType, na.action=na.action), 0) ) )
            } else if(class(predict.train(Tune[[i]], newdata=Test, type = predictType))!= "factor"){
              try( factor( predict.train( Tune[[i]], newdata=Test, type = predictType, na.action=na.action ) ) )
            } else {
              try(predict.train(Tune[[i]], newdata=Test, type = predictType, na.action = na.action))
            }
)


      } else if(regression == TRUE | method[i] == "lm"){
        if(roundPred == FALSE){
          predict.train( Tune[[i]], newdata=Test, na.action = na.action)
        } else {
          round( predict.train(Tune[[i]], newdata=Test, na.action = na.action), 0)
        }
      }

} #end Predict Loop
names(Predict) <- method

#********************************************************************************#
testSummary <- foreach(i=1:length(method), .inorder=TRUE, .combine=rbind) %do% {

                      if(regression == FALSE){
                                cat("Model", method[i],
                                    "has levels: ", levels(predict.train(Tune[[i]], newdata=Test, type=predictType)) ,
                                    "and the observed levels are: ", levels(factor(Test[, y])), "\n" )
                          if(ctrl$classProbs==TRUE){
                            try(rbind( twoClassSummary( data.frame( obs=factor(Test[, y]), pred=Predict[[i]]) ) ))
                          } else {
                              try(rbind( defaultSummary( data.frame( obs=factor(Test[, y]), pred=Predict[[i]]) ) ))
                          }

                      } else if(regression == TRUE){

                              rbind( defaultSummary( data.frame( obs=Test[, y], pred=Predict[[i]]) ) )

                      }
          }#end foreach

rownames(testSummary) <- method

                  if(regression == FALSE){
                      Confusion <- foreach(i=1:length(method), .inorder=TRUE) %do% {
                                        try(confusionMatrix(data = Predict[[i]], reference = factor(Test[,y])))
                      }#end foreach
                      names(Confusion) <- method
                  }#end if

#**********PLOTTING**********************************************************************#
    if(plotting==TRUE){
        if(regression==TRUE){
#***********#
    Plot2 <- foreach(i=1:length(method), .inorder=TRUE) %do% {
                print(qplot( Test[,y],
                        Predict[[i]],
                        xlab = "Observed",
                        ylab = "Predicted",
                        main = paste(method[i],"- Test Set - Observed vs. Predicted"
                        )) + geom_abline(intercept = 0, slope = 1, colour = "red", size = 1))
    }#end foreach
    names(Plot2) <- method

#***********#
    Plot3 <- foreach(i=1:length(method), .inorder=TRUE) %do% {
                    print(qplot( Predict[[i]],
                           Test[,y] - Predict[[i]],
                           xlab = "Predicted",
                           ylab = "Error",
                           main = paste(method[i], "- Test Set - Error vs. Predicted"
                           ))+ geom_abline(intercept = 0, slope = 0, colour = "red", size = 1))
    }
    names(Plot3) <- method

      }#end if regression==True
    }


#********************************************************************************#
    if(length(method)>1){
      cvValues <- resamples(Tune)


      if(regression == TRUE){
        finalSummary <- data.frame(cbind(Methods = cvValues$methods,
                                         testSummary,
                                         cvRMSE = summary(cvValues)$statistics$RMSE[,"Mean"],
                                         cvR2 = summary(cvValues)$statistics$Rsquared[,"Mean"],
                                         Time = round(cvValues$timings$Everything, 2)
        ))
        finalSummary <- finalSummary[order(finalSummary[,2], decreasing=FALSE), ]

      } else {
        #if using default summary
        if(ctrl$classProbs==FALSE){
        finalSummary <- data.frame(cbind(Methods = cvValues$methods,
                                         testSummary,
                                         cvAccuracy = summary(cvValues)$statistics$Accuracy[,"Mean"],
                                         cvKappa = summary(cvValues)$statistics$Kappa[,"Mean"],
                                         Time = round(cvValues$timings$Everything, 2) ))
        #if using twoclass summary
        } else {
          finalSummary <- data.frame(cbind(Methods = cvValues$methods,
                                           testSummary,
                                           cvROC = summary(cvValues)$statistics$ROC[,"Mean"],
                                           cvSens = summary(cvValues)$statistics$Sens[,"Mean"],
                                           cvSpec = summary(cvValues)$statistics$Spec[,"Mean"],
                                           Time = round(cvValues$timings$Everything, 2) ))

        }

        finalSummary <- finalSummary[order(finalSummary[,2], decreasing=TRUE), ]
      }
    } else {
      cvValues <- "Needs more than one model"
      finalSummary <- "Needs more than one model"
    }


Observations <- data.frame(Train.Obs = nrow(Train),
                           Train.Mean = mean( as.numeric(Train[,y])),
                           Train.SD = sd( as.numeric(Train[,y])),
                           Test.Obs = nrow(Test),
                           Test.Mean = mean( as.numeric(Test[,y])),
                           Test.SD = sd( as.numeric(Test[,y])) )


#********************************************************************************#

} else {
  Observations <- data.frame(Train.Obs = nrow(Train),
                             Train.Mean = mean( as.numeric(Train[,y])),
                             Train.SD = sd( as.numeric(Train[,y])))
  finalSummary <- "You did not specify a test set"
  Confusion <- "You did not specify a test set"
}#if you wanted to add if(!(!is.null(Train) & is.null(Test))) from since prediction

#********TUNE SUMMARY AND PLOTS************************************************************************#

col.want <- c("Accuracy","Kappa", "AccuracySD", "KappaSD",
              "ROC", "Spec","Sens","ROCSD","SpecSD", "SensSD",
              "RMSE", "Rsquared", "RMSESD", "RsquaredSD")


# tuneSummary <- sapply(method, function(x) Tune[[x]]$results[rownames(Tune[[x]]$bestTune), colnames(Tune[[x]]$results)%in%col.want])
# tuneSummary <- t(tuneSummary) #still treats each element it as a list
tuneSummary <- lapply(method, function(x)
                              Tune[[x]]$results[rownames(Tune[[x]]$bestTune),
                                                colnames(Tune[[x]]$results)%in%col.want])

tuneSummary <- do.call("rbind", tuneSummary)
rownames(tuneSummary) <- method

#*****PLOT TUNE GRID AND PLOT PERFORMANCE******************************************#
if(plotting==TRUE){
  Plot <- foreach(i=1:length(method), .inorder=TRUE) %do% {
    if(inherits(tryCatch(ggplot(Tune[[i]]), error=function(e) e), "error")) {
      c()
    } else {
      print(ggplot(Tune[[i]]) + labs(title=method[i]))
    }
  }#end foreach
  names(Plot) <- method

  if(length(method)>1){
        (Plot4 <- dotplot(resamples(Tune), metric=metric, main="Resamples Plot"))
  }
}
#********************************************************************************#

    index <- Tune[[1]]$control$index


    #********************************************************************************#
    Results <- list( Observations = Observations,
                     index = index,
                     Train = Train,
                     Test = Test,
                     Tune = Tune,
                     Predict = Predict,
                     Plot = Plot,
                     Plot2 = Plot2,
                     Plot3 = Plot3,
                     Plot4 = Plot4,
                     tuneSummary = tuneSummary,
                     testSummary = testSummary,
                     Confusion = Confusion,
#                      cvValues = cvValues,
                     finalSummary = finalSummary
    )

    return(Results)
  } #end function
	#' Basketball: A function to run a lot of different models
	#'
	#' This function allows you to run a lot of different models from the caret package
	#' @param method The methods to use i.e c("glm", "rf")
	#' @param formula The formula to use
	#' @param data The data frame to use. Can leave blank if supplying independent training and testing
	#' @param regression Logical TRUE if type of model is "regression"
	#' @param metric Metic to use. For example, "RMSE" if regression and "Accuracy" if classification
	#' @param Train Do you want to supply a training set bypassing the partitioning
	#' @param Test If you bypassed partition need to also supply testing set
	#' @param p What percentage to split for the training and testing sets if you didn't supply training and testing
	#' @param seed Set the seed
	#' @param predictType What type of prediction to use; note "probs" does not work for all classification problems
	#' @param na.action What should the prediction do with NAs. i.e. na.omit or NULL
	#' @param roundPred Should you round the predictions, useful for problems with count data
	#' @param verbose Logical; should it be verbose or not
	#' @param plotting Logical; should the function also evaluate plots
	#' @param ctrl Specify the ctrl, see caret package for more details
	#' @param ntree Number of trees for random forest
	#' @param family The family for a glm model
	#' @param familyGBM The family for the gbm model
	#' @param familyGLMboost The family for the glmboost model
	#' @param glmBoostGrid Grid for glmboost #getModelInfo("glmboost", FALSE)[[1]]$grid
	#' @param gbmGrid Grid for gbm
	#' @param multinomGrid Grid for multinom
	#' @param rfGrid Grid for random forest
	#' @param LMTGrid Grid for LMT
	#' @param LogitBoostGrid Grid for LogitBoost
	#' @keywords caret
	#' @keywords machine-learning
	#' @export
	#' @examples
	#' #data(iris)
	#' #models <- nbaParallel(method=c('rpart', 'ctree', 'rpart2'), metric="Accuracy", formula= Species~Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, data=iris, regression=FALSE)
	#' #Could use to predict with mean of models' predictions
	#' #predlist <- predict(models$Tune, newdata=models$Test)#,na.action=NULL)
	#' #predlist <- lapply(predlist, as.integer)
	#' #predlist <- do.call(cbind, predlist)
	#' #predagg <- round(apply(predlist, 1, mean),0)

	nbaParallel <-
	function(method,
	formula,
	data,
	regression = NULL, #what type of analysis are you doing
	metric = NULL, #RMSE, Accuracy
	Train = NULL,
	Test = NULL,
	p=0.8, #how to split the data
	seed = 1, #a general seed to use
	predictType = "raw",
	na.action = na.omit,
	roundPred = FALSE, #if count data or using a non-classification method for classification
	verbose = FALSE,
	plotting = TRUE,
	ctrl = trainControl(method = "cv",
	number = 10,
	savePred=T),
	ntree = 500, #for random forest
	family = NULL,
	familyGBM = NULL,
	familyGLMboost = NULL, #Binomial(link = c("logit")),
	glmBoostGrid = NULL, # data.frame(mstop = floor((1:10) * 50), prune = "no"),
	gbmGrid = NULL, #expand.grid(interaction.depth = seq(1, 2), n.trees = floor((1:10) * 50), shrinkage = .05), gbmGrid <- expand.grid(.interaction.depth = seq(1, 7, by = 2),.n.trees = seq(100, 1000, by = 50), .shrinkage = c(0.01, 0.1))
	multinomGrid = NULL, #expand.grid(decay = c(0, 5 ^ seq(-1, -4, length = 60)))
	rfGrid = NULL, #mtryVals <- floor(seq(10, ncol(solTrainXtrans), length = 10)) mtryGrid <- data.frame(.mtry = mtryVals)
	LMTGrid = NULL,
	LogitBoostGrid = NULL
	){

	#********Begin the function***********************************************************************************#

	library('caret')
	library('foreach')
	library('ggplot2')
	#seperate predictors from not and create new data
	y <- all.vars(formula)[1]
	predictors <- all.vars(formula)[all.vars(formula) != y]
	data <- data[,c(y, predictors)]

	#use supplied control method
	set.seed(seed)
	ctrl <- ctrl

	#********Seperate into training and testing********************************************************************#

	#seperate into training and testing sets if not supplied, else use the supplied
	if(is.null(Train) & is.null(Test)){
	set.seed(seed)
	inTrainingSet <- createDataPartition( data[,y], p = p, list = FALSE)
	Train <- data[ inTrainingSet,]
	Test <- data[-inTrainingSet,]

	} else if(!is.null(Train) & !is.null(Test)){
	Train <- Train
	Test <- Test

	} else if(!is.null(Train) & is.null(Test)){
	Train <- Train
	Test <- NULL

	}

	#********FUNCTION TO ALLOW FOR FAMILY AND PARAMETER********************************************************************#

	addParam <- function(expr, grid=NULL, family=NULL) {
	if(!is.null(grid) & is.null(family)) {
	ll<-as.list(expr)
	ll$tuneGrid <- grid
	as.call(ll)
	} else if(!is.null(family) & is.null(grid)) {
	ll<-as.list(expr)
	ll$family <- family
	as.call(ll)
	} else if(!is.null(grid) & !is.null(family)) {
	ll<-as.list(expr)
	ll$family <- family
	ll$tuneGrid <- grid
	as.call(ll)
	} else {
	expr
	}
	}

	#' mycall<-quote(train(form= medv~., data = Boston, method = "glmboost"))
	#' call <- addParam(mycall, grid=quote(glmBoostGrid), family=NULL)
	#' eval(call)

	#********************************************************************************#
	#create empty vectors (bad idea should allocate upfront)

	Tune <- vector("list", length(method))
	names(Tune) <- method

	Predict <- vector("list", length(method))
	names(Predict) <- method

	Confusion <- vector("list", length(method))
	names(Confusion) <- method

	if(regression==FALSE){
	if(ctrl$classProbs==TRUE){ #using two class summary
	testSummary <- data.frame(ROC = numeric(0), Sens=numeric(0), Spec=numeric(0))
	} else { #using default summary
	testSummary <- data.frame(Accuracy = numeric(0), Kappa=numeric(0))
	}
	} else {
	testSummary <- data.frame(RMSE = numeric(0), R2=numeric(0))
	}
	Plot <- list()
	Plot2 <- list()
	Plot3 <- list()









	#********************************************************************************#


	Tune <- foreach(i=1:length(method), .inorder=TRUE) %do% {
	cat("Tuning model", method[i], "\n")
	#*******************#
	#evaluate models
	#*******************#

	set.seed(seed)

	#glm but poisson and quasipoisson specified below
	if(method[i]=="glm"){

	set.seed(seed)

	mycall <- quote( train( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl))
	mycall <- addParam(mycall, grid=NULL, family=family)
	Tune[[i]] <- eval(mycall)
	Tune[[i]]

	#Gradient Boosting with no Family Specified and default grid
	} else if(method[i]=="gbm"){

	set.seed(seed)
	mycall <- quote ( train( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl,
	verbose = verbose))

	mycall <- addParam( mycall, grid=gbmGrid, family=NULL) #it's called distribution for gbm
	Tune[[i]] <- eval(mycall)
	Tune[[i]]


	#Other GLM Boosts (using default Grid)
	} else if(method[i]=="glmboost"){
	library(mboost)

	set.seed(seed)
	mycall <- quote( train ( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl))
	mycall <- addParam( mycall, grid=glmBoostGrid, family=familyGLMboost)
	Tune[[i]] <- eval(mycall)
	Tune[[i]]

	#lmBoost
	} else if(method[i]=="lmBoost"){
	library(mboost)
	set.seed(seed)
	mycall <- quote( train ( form = update(formula, as.numeric(as.character(.))~.),
	data = Train,
	method = "glmboost",
	metric = metric,
	trControl = ctrl) )

	mycall <- addParam( mycall, grid=glmBoostGrid, family=Gaussian())
	Tune[[i]] <- eval(mycall)
	Tune[[i]]

	#LogitBoost
	} else if(method[i]=="LogitBoost"){

	set.seed(seed)
	mycall <- quote( train ( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl) )

	mycall <- addParam( mycall, grid=LogitBoostGrid)
	Tune[[i]] <- eval(mycall)
	Tune[[i]]

	#LogitBoost
	} else if(method[i]=="LMT"){

	set.seed(seed)
	mycall <- quote( train ( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl) )

	mycall <- addParam( mycall, grid=LMTGrid)
	Tune[[i]] <- eval(mycall)
	Tune[[i]]


	#POISSON
	} else if(method[i]=="poisson"){

	set.seed(seed)
	Tune[[i]] <- train( form = update(formula, as.numeric(as.character(.))~.),
	data = Train,
	method = "glm",
	trControl = ctrl,
	metric = metric,
	family = poisson(link = "log") )



	#QUASI POISSON
	} else if(method[i]=="quasipoisson"){

	Tune[[i]] <- train( form = update(formula, as.numeric(as.character(.))~.),
	data = Train,
	method = "glm",
	trControl = ctrl,
	metric = metric,
	family = quasipoisson(link = "log") )


	#Linear Model
	} else if(method[i]=="lm"){ #no verbose argument and need to change factor to numeric

	set.seed(seed)
	Tune[[i]] <- train( form = update(formula, as.numeric(as.character(.)) ~.),
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl )



	#Multinomial
	} else if(method[i]=="multinom"){ #no verbose argument and need to change factor to numeric

	set.seed(seed)
	mycall <- quote( train( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl) )

	mycall <- addParam( mycall, grid=multinomGrid, family=NULL)
	Tune[[i]] <- eval(mycall)
	Tune[[i]]


	#Random Forest
	} else if(method[i]=="rf"){ #added ntree argument for randomForest

	set.seed(seed)
	mycall <- quote( train( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl,
	ntree = ntree,
	verbose = verbose ) )
	mycall <- addParam( mycall, grid=rfGrid, family=NULL)
	Tune[[i]] <- eval(mycall)
	Tune[[i]]

	#All Other Models
	} else {

	set.seed(seed)
	Tune[[i]] <- train( form = formula,
	data = Train,
	method = method[i],
	metric = metric,
	trControl = ctrl )

	}
	}#end foreach
	names(Tune) <- method




	#********************************************************************************#
	if(!(!is.null(Train) & is.null(Test))) { #if you want to skip predictions when not specifying Tune
	#*******************#
	#evaluate predictions
	#*******************#
	Predict <- foreach(i=1:length(method), .inorder=TRUE) %do% {
	cat("Predicting with model", method[i], "\n")
	if(regression == FALSE){

	try(
	if(roundPred == TRUE & class(predict.train(Tune[[i]], newdata=Test, type = predictType))!= "factor"){
	try( factor( round( predict.train(Tune[[i]], newdata=Test, type = predictType, na.action=na.action), 0) ) )
	} else if(class(predict.train(Tune[[i]], newdata=Test, type = predictType))!= "factor"){
	try( factor( predict.train( Tune[[i]], newdata=Test, type = predictType, na.action=na.action ) ) )
	} else {
	try(predict.train(Tune[[i]], newdata=Test, type = predictType, na.action = na.action))
	}
	)


	} else if(regression == TRUE \| method[i] == "lm"){
	if(roundPred == FALSE){
	predict.train( Tune[[i]], newdata=Test, na.action = na.action)
	} else {
	round( predict.train(Tune[[i]], newdata=Test, na.action = na.action), 0)
	}
	}

	} #end Predict Loop
	names(Predict) <- method

	#********************************************************************************#
	testSummary <- foreach(i=1:length(method), .inorder=TRUE, .combine=rbind) %do% {

	if(regression == FALSE){
	cat("Model", method[i],
	"has levels: ", levels(predict.train(Tune[[i]], newdata=Test, type=predictType)) ,
	"and the observed levels are: ", levels(factor(Test[, y])), "\n" )
	if(ctrl$classProbs==TRUE){
	try(rbind( twoClassSummary( data.frame( obs=factor(Test[, y]), pred=Predict[[i]]) ) ))
	} else {
	try(rbind( defaultSummary( data.frame( obs=factor(Test[, y]), pred=Predict[[i]]) ) ))
	}

	} else if(regression == TRUE){

	rbind( defaultSummary( data.frame( obs=Test[, y], pred=Predict[[i]]) ) )

	}
	}#end foreach

	rownames(testSummary) <- method

	if(regression == FALSE){
	Confusion <- foreach(i=1:length(method), .inorder=TRUE) %do% {
	try(confusionMatrix(data = Predict[[i]], reference = factor(Test[,y])))
	}#end foreach
	names(Confusion) <- method
	}#end if

	#********PLOTTING********************************************************************#
	if(plotting==TRUE){
	if(regression==TRUE){
	#***********#
	Plot2 <- foreach(i=1:length(method), .inorder=TRUE) %do% {
	print(qplot( Test[,y],
	Predict[[i]],
	xlab = "Observed",
	ylab = "Predicted",
	main = paste(method[i],"- Test Set - Observed vs. Predicted"
	)) + geom_abline(intercept = 0, slope = 1, colour = "red", size = 1))
	}#end foreach
	names(Plot2) <- method

	#***********#
	Plot3 <- foreach(i=1:length(method), .inorder=TRUE) %do% {
	print(qplot( Predict[[i]],
	Test[,y] - Predict[[i]],
	xlab = "Predicted",
	ylab = "Error",
	main = paste(method[i], "- Test Set - Error vs. Predicted"
	))+ geom_abline(intercept = 0, slope = 0, colour = "red", size = 1))
	}
	names(Plot3) <- method

	}#end if regression==True
	}


	#********************************************************************************#
	if(length(method)>1){
	cvValues <- resamples(Tune)


	if(regression == TRUE){
	finalSummary <- data.frame(cbind(Methods = cvValues$methods,
	testSummary,
	cvRMSE = summary(cvValues)$statistics$RMSE[,"Mean"],
	cvR2 = summary(cvValues)$statistics$Rsquared[,"Mean"],
	Time = round(cvValues$timings$Everything, 2)
	))
	finalSummary <- finalSummary[order(finalSummary[,2], decreasing=FALSE), ]

	} else {
	#if using default summary
	if(ctrl$classProbs==FALSE){
	finalSummary <- data.frame(cbind(Methods = cvValues$methods,
	testSummary,
	cvAccuracy = summary(cvValues)$statistics$Accuracy[,"Mean"],
	cvKappa = summary(cvValues)$statistics$Kappa[,"Mean"],
	Time = round(cvValues$timings$Everything, 2) ))
	#if using twoclass summary
	} else {
	finalSummary <- data.frame(cbind(Methods = cvValues$methods,
	testSummary,
	cvROC = summary(cvValues)$statistics$ROC[,"Mean"],
	cvSens = summary(cvValues)$statistics$Sens[,"Mean"],
	cvSpec = summary(cvValues)$statistics$Spec[,"Mean"],
	Time = round(cvValues$timings$Everything, 2) ))

	}

	finalSummary <- finalSummary[order(finalSummary[,2], decreasing=TRUE), ]
	}
	} else {
	cvValues <- "Needs more than one model"
	finalSummary <- "Needs more than one model"
	}


	Observations <- data.frame(Train.Obs = nrow(Train),
	Train.Mean = mean( as.numeric(Train[,y])),
	Train.SD = sd( as.numeric(Train[,y])),
	Test.Obs = nrow(Test),
	Test.Mean = mean( as.numeric(Test[,y])),
	Test.SD = sd( as.numeric(Test[,y])) )


	#********************************************************************************#

	} else {
	Observations <- data.frame(Train.Obs = nrow(Train),
	Train.Mean = mean( as.numeric(Train[,y])),
	Train.SD = sd( as.numeric(Train[,y])))
	finalSummary <- "You did not specify a test set"
	Confusion <- "You did not specify a test set"
	}#if you wanted to add if(!(!is.null(Train) & is.null(Test))) from since prediction

	#******TUNE SUMMARY AND PLOTS**********************************************************************#

	col.want <- c("Accuracy","Kappa", "AccuracySD", "KappaSD",
	"ROC", "Spec","Sens","ROCSD","SpecSD", "SensSD",
	"RMSE", "Rsquared", "RMSESD", "RsquaredSD")


	# tuneSummary <- sapply(method, function(x) Tune[[x]]$results[rownames(Tune[[x]]$bestTune), colnames(Tune[[x]]$results)%in%col.want])
	# tuneSummary <- t(tuneSummary) #still treats each element it as a list
	tuneSummary <- lapply(method, function(x)
	Tune[[x]]$results[rownames(Tune[[x]]$bestTune),
	colnames(Tune[[x]]$results)%in%col.want])

	tuneSummary <- do.call("rbind", tuneSummary)
	rownames(tuneSummary) <- method

	#***PLOT TUNE GRID AND PLOT PERFORMANCE****************************************#
	if(plotting==TRUE){
	Plot <- foreach(i=1:length(method), .inorder=TRUE) %do% {
	if(inherits(tryCatch(ggplot(Tune[[i]]), error=function(e) e), "error")) {
	c()
	} else {
	print(ggplot(Tune[[i]]) + labs(title=method[i]))
	}
	}#end foreach
	names(Plot) <- method

	if(length(method)>1){
	(Plot4 <- dotplot(resamples(Tune), metric=metric, main="Resamples Plot"))
	}
	}
	#********************************************************************************#

	index <- Tune[[1]]$control$index



	#********************************************************************************#
	Results <- list( Observations = Observations,
	index = index,
	Train = Train,
	Test = Test,
	Tune = Tune,
	Predict = Predict,
	Plot = Plot,
	Plot2 = Plot2,
	Plot3 = Plot3,
	Plot4 = Plot4,
	tuneSummary = tuneSummary,
	testSummary = testSummary,
	Confusion = Confusion,
	# cvValues = cvValues,
	finalSummary = finalSummary
	)

	return(Results)
	} #end function