HanjoStudy/ordinal_forest.R

## ordinal_forest.R
# install.packages("ordinalForest")
library(ordinalForest)
#
# start by naming my method to pass to train
#
ordinalForest <- list(type = "Classification",
                      library = "ordinalForest",
                      loop = NULL)
#
# define the tuning parameters
#
prm <- data.frame(parameter = c("nsets", "ntreeperdiv", "ntreefinal", "npermtrial", "nbest"),
                  class = rep("numeric", 5),
                  label = c("Score sets", "Number of Trees (small)", "Number of Trees (final)", "Tried score sets" ,"Best score sets" ))
#
# append them to the list
#
ordinalForest$parameters <- prm
#
# define the default training grid. some models can do
# a random search, but I wont implement that
#
ordinalForestGrid <- function(x, y, len = NULL, search = grid) {
  if(search == grid) {
    out <- expand.grid(nsets = 1000,
                       ntreeperdiv = 100,
                       ntreefinal = 5000,
                       npermtrial = 500,
                       nbest = 10)
  } else {
    stop('random search not yet implemented')
  }
  out
}
#
# append to list
#
ordinalForest$grid <- ordinalForestGrid

#
# define the fitting function. Here, it is the
# ordfor constructor function ordfor()
# I notice that the function doesnt allow for ordfor(x, y, ...), so I have to get around that
function(x, y, param, lev, last, classProbs, ...) {
  library(ordinalForest)

  data <- data.frame(x, Class = y)

  ordfor(depvar = "Class", data, nsets = param$nsets,
         ntreeperdiv = param$ntreeperdiv, ntreefinal = param$ntreefinal,
         perffunction = c("equal"), classimp,
         classweights, nbest = param$nbest, naive = FALSE, num.threads = NULL,
         npermtrial = param$npermtrial, permperdefault = FALSE)

}

#
# append to list
#

ordinalForest$fit <- ordinalForestFit

#
# define the levels of the outcome.
# they are held in the classes slot of objects of
# class ordfor
#

ordinalForest$levels <- function(x) x@classes

#
# define the classification prediction with the
# predict generic
#
ordinalForestPred <- function(modelFit, newdata, preProc = NULL, submodels = NULL) {
  predict(modelFit, newdata)
}
#
# append to list
#
ordinalForest$predict <- ordinalForestPred
#
# define the class probability with the
# predict generic
#
ordinalForestProb <- function(modelFit, newdata, preProc = NULL, submodels = NULL) {
  predict(modelFit, newdata, type = "prob")
}
#
# append to list
#
ordinalForest$prob <- ordinalForestProb
#
# define the sort function, i.e. how the tuning parameters
# are ordered in case similar performance obtained
#
ordinalForestSort <- function (x) x[order(x$nsets, x$ntreeperdiv, x$ntreefinal,
                                          x$npermtrial, x$nbest), ]
#
# append to list
#
ordinalForest$sort <- ordinalForestSort

# TEST TEST TEST ----------------------------------------------------------

# load caret and doParallel library
library(caret)
library(doParallel)
data("hearth")
# register cores for parallel processing
#
#
# train control options. want repeated 10-fold CV
#
# define grid of parameter values
#
tuneGrid <- expand.grid(nsets = 1000,
                        ntreeperdiv = 100,
                        ntreefinal = 5000,
                        npermtrial = 500,
                        nbest = 10)

set.seed(825)
OFTune <- train(x = hearth[,-11],
                y = hearth[,11],
                method = ordinalForestFit,
                tuneGrid = tuneGrid)


devtools::session_info()
	# install.packages("ordinalForest")
	library(ordinalForest)
	#
	# start by naming my method to pass to train
	#
	ordinalForest <- list(type = "Classification",
	library = "ordinalForest",
	loop = NULL)
	#
	# define the tuning parameters
	#
	prm <- data.frame(parameter = c("nsets", "ntreeperdiv", "ntreefinal", "npermtrial", "nbest"),
	class = rep("numeric", 5),
	label = c("Score sets", "Number of Trees (small)", "Number of Trees (final)", "Tried score sets" ,"Best score sets" ))
	#
	# append them to the list
	#
	ordinalForest$parameters <- prm
	#
	# define the default training grid. some models can do
	# a random search, but I wont implement that
	#
	ordinalForestGrid <- function(x, y, len = NULL, search = grid) {
	if(search == grid) {
	out <- expand.grid(nsets = 1000,
	ntreeperdiv = 100,
	ntreefinal = 5000,
	npermtrial = 500,
	nbest = 10)
	} else {
	stop('random search not yet implemented')
	}
	out
	}
	#
	# append to list
	#
	ordinalForest$grid <- ordinalForestGrid

	#
	# define the fitting function. Here, it is the
	# ordfor constructor function ordfor()
	# I notice that the function doesnt allow for ordfor(x, y, ...), so I have to get around that
	function(x, y, param, lev, last, classProbs, ...) {
	library(ordinalForest)

	data <- data.frame(x, Class = y)

	ordfor(depvar = "Class", data, nsets = param$nsets,
	ntreeperdiv = param$ntreeperdiv, ntreefinal = param$ntreefinal,
	perffunction = c("equal"), classimp,
	classweights, nbest = param$nbest, naive = FALSE, num.threads = NULL,
	npermtrial = param$npermtrial, permperdefault = FALSE)

	}

	#
	# append to list
	#

	ordinalForest$fit <- ordinalForestFit

	#
	# define the levels of the outcome.
	# they are held in the classes slot of objects of
	# class ordfor
	#

	ordinalForest$levels <- function(x) x@classes

	#
	# define the classification prediction with the
	# predict generic
	#
	ordinalForestPred <- function(modelFit, newdata, preProc = NULL, submodels = NULL) {
	predict(modelFit, newdata)
	}
	#
	# append to list
	#
	ordinalForest$predict <- ordinalForestPred
	#
	# define the class probability with the
	# predict generic
	#
	ordinalForestProb <- function(modelFit, newdata, preProc = NULL, submodels = NULL) {
	predict(modelFit, newdata, type = "prob")
	}
	#
	# append to list
	#
	ordinalForest$prob <- ordinalForestProb
	#
	# define the sort function, i.e. how the tuning parameters
	# are ordered in case similar performance obtained
	#
	ordinalForestSort <- function (x) x[order(x$nsets, x$ntreeperdiv, x$ntreefinal,
	x$npermtrial, x$nbest), ]
	#
	# append to list
	#
	ordinalForest$sort <- ordinalForestSort

	# TEST TEST TEST ----------------------------------------------------------

	# load caret and doParallel library
	library(caret)
	library(doParallel)
	data("hearth")
	# register cores for parallel processing
	#
	#
	# train control options. want repeated 10-fold CV
	#
	# define grid of parameter values
	#
	tuneGrid <- expand.grid(nsets = 1000,
	ntreeperdiv = 100,
	ntreefinal = 5000,
	npermtrial = 500,
	nbest = 10)

	set.seed(825)
	OFTune <- train(x = hearth[,-11],
	y = hearth[,11],
	method = ordinalForestFit,
	tuneGrid = tuneGrid)


	devtools::session_info()