Skip to content

Instantly share code, notes, and snippets.

@MirzaCengic
Last active December 4, 2020 18:10
Show Gist options
  • Save MirzaCengic/fd5a5347f7237e8ad37494c4439b93f1 to your computer and use it in GitHub Desktop.
Save MirzaCengic/fd5a5347f7237e8ad37494c4439b93f1 to your computer and use it in GitHub Desktop.
ensemble_modeling2.R
cat("BIOMOD_EnsembleModeling2 loaded", "\n")
BIOMOD_EnsembleModeling2 <- function (modeling.output, chosen.models = "all", em.by = "PA_dataset+repet",
eval.metric = "all", eval.metric.quality.threshold = NULL,
models.eval.meth = c("KAPPA", "TSS", "ROC"), prob.mean = TRUE,
prob.cv = FALSE, prob.ci = FALSE, prob.ci.alpha = 0.05, prob.median = FALSE,
committee.averaging = FALSE, prob.mean.weight = FALSE, prob.mean.weight.decay = "proportional",
VarImport = 0)
{
biomod2:::.bmCat("Build Ensemble Models")
args <- biomod2:::.BIOMOD_EnsembleModeling.check.args(modeling.output,
chosen.models, eval.metric, eval.metric.quality.threshold,
models.eval.meth, prob.mean, prob.cv, prob.ci, prob.ci.alpha,
prob.median, committee.averaging, prob.mean.weight, prob.mean.weight.decay,
em.by)
modeling.output <- args$modeling.output
chosen.models <- args$chosen.models
eval.metric <- args$eval.metric
eval.metric.quality.threshold <- args$eval.metric.quality.threshold
models.eval.meth <- args$models.eval.meth
prob.mean <- args$prob.mean
prob.cv <- args$prob.cv
prob.ci <- args$prob.ci
prob.ci.alpha <- args$prob.ci.alpha
prob.median <- args$prob.median
committee.averaging <- args$committee.averaging
prob.mean.weight <- args$prob.mean.weight
prob.mean.weight.decay <- args$prob.mean.weight.decay
em.by <- args$em.by
rm("args")
em.avail <- c("prob.mean", "prob.cv", "prob.ci.inf", "prob.ci.sup",
"prob.median", "committee.averaging", "prob.mean.weight")
em.algo <- em.avail[c(prob.mean, prob.cv, prob.ci, prob.ci,
prob.median, committee.averaging, prob.mean.weight)]
Options <- list(em.by = em.by)
expl_var_type = get_var_type(get_formal_data(modeling.output,
"expl.var"))
expl_var_range = get_var_range(get_formal_data(modeling.output,
"expl.var"))
EM <- new("BIOMOD.EnsembleModeling.out", sp.name = modeling.output@sp.name,
expl.var.names = modeling.output@expl.var.names, em.by = em.by,
modeling.id = modeling.output@modeling.id)
EM@models.out.obj@link <- file.path(modeling.output@sp.name,
paste(modeling.output@sp.name, ".", modeling.output@modeling.id,
".models.out", sep = ""))
em.mod.assemb <- biomod2:::.em.models.assembling(chosen.models, em.by)
for (assemb in names(em.mod.assemb)) {
cat("\n\n >", assemb, "ensemble modeling")
models.kept <- em.mod.assemb[[assemb]]
if (modeling.output@has.evaluation.data) {
eval.obs <- get_formal_data(modeling.output, "eval.resp.var")
eval.expl <- get_formal_data(modeling.output, "eval.expl.var")
}
obs <- get_formal_data(modeling.output, "resp.var")
expl <- get_formal_data(modeling.output, "expl.var")
if (em.by %in% c("PA_dataset", "PA_dataset+algo", "PA_dataset+repet")) {
if (unlist(strsplit(assemb, "_"))[3] != "AllData") {
if (inherits(get_formal_data(modeling.output),
"BIOMOD.formated.data.PA")) {
kept_cells <- get_formal_data(modeling.output)@PA[,
unlist(strsplit(assemb, "_"))[3]]
}
else {
kept_cells <- rep(T, length(obs))
}
obs <- obs[kept_cells]
expl <- expl[kept_cells, , drop = F]
}
}
obs[is.na(obs)] <- 0
needed_predictions <- biomod2:::.get_needed_predictions(modeling.output,
em.by, models.kept, eval.metric, eval.metric.quality.threshold)
if (!length(needed_predictions))
next
for (eval.m in eval.metric) {
models.kept <- needed_predictions$models.kept[[eval.m]]
models.kept.scores <- needed_predictions$models.kept.scores[[eval.m]]
for (algo in em.algo) {
if (algo == "prob.mean") {
cat("\n > Mean of probabilities...")
model_name <- paste(modeling.output@sp.name,
"_", "EMmeanBy", eval.m, "_", assemb, sep = "")
model.bm <- new("EMmean_biomod2_model", model = models.kept,
model_name = model_name, model_class = "EMmean",
model_options = Options, resp_name = modeling.output@sp.name,
expl_var_names = modeling.output@expl.var.names,
expl_var_type = expl_var_type, expl_var_range = expl_var_range,
modeling.id = modeling.output@modeling.id)
}
if (algo == "prob.cv") {
cat("\n > Coef of variation of probabilities...")
model_name <- paste(modeling.output@sp.name,
"_", "EMcvBy", eval.m, "_", assemb, sep = "")
model.bm <- new("EMcv_biomod2_model", model = models.kept,
model_name = model_name, model_class = "EMcv",
model_options = Options, resp_name = modeling.output@sp.name,
expl_var_names = modeling.output@expl.var.names,
expl_var_type = expl_var_type, expl_var_range = expl_var_range,
modeling.id = modeling.output@modeling.id)
}
if (algo == "prob.median") {
cat("\n > Median of probabilities...")
model_name <- paste(modeling.output@sp.name,
"_", "EMmedianBy", eval.m, "_", assemb, sep = "")
model.bm <- new("EMmedian_biomod2_model", model = models.kept,
model_name = model_name, model_class = "EMmedian",
model_options = Options, resp_name = modeling.output@sp.name,
expl_var_names = modeling.output@expl.var.names,
expl_var_type = expl_var_type, expl_var_range = expl_var_range,
modeling.id = modeling.output@modeling.id)
}
if (algo == "prob.ci.inf") {
cat("\n > Confidence Interval...")
model_name <- paste(modeling.output@sp.name,
"_", "EMciInfBy", eval.m, "_", assemb, sep = "")
model.bm <- new("EMci_biomod2_model", model = models.kept,
model_name = model_name, model_class = "EMci",
model_options = Options, resp_name = modeling.output@sp.name,
expl_var_names = modeling.output@expl.var.names,
expl_var_type = expl_var_type, expl_var_range = expl_var_range,
modeling.id = modeling.output@modeling.id,
alpha = prob.ci.alpha, side = "inferior")
}
if (algo == "prob.ci.sup") {
model_name <- paste(modeling.output@sp.name,
"_", "EMciSupBy", eval.m, "_", assemb, sep = "")
model.bm <- new("EMci_biomod2_model", model = models.kept,
model_name = model_name, model_class = "EMci",
model_options = Options, resp_name = modeling.output@sp.name,
expl_var_names = modeling.output@expl.var.names,
expl_var_type = expl_var_type, expl_var_range = expl_var_range,
modeling.id = modeling.output@modeling.id,
alpha = prob.ci.alpha, side = "superior")
}
if (algo == "committee.averaging") {
cat("\n > Committee averaging...")
model_name <- paste(modeling.output@sp.name,
"_", "EMcaBy", eval.m, "_", assemb, sep = "")
models.kept.tresh <- unlist(lapply(models.kept,
function(x) {
mod <- tail(unlist(strsplit(x, "_")), 3)[3]
run <- tail(unlist(strsplit(x, "_")), 3)[2]
dat <- tail(unlist(strsplit(x, "_")), 3)[1]
return(get_evaluations(modeling.output)[eval.m,
"Cutoff", mod, run, dat])
}))
names(models.kept.tresh) <- models.kept
to_keep <- is.finite(models.kept.tresh)
model.bm <- new("EMca_biomod2_model", model = models.kept[to_keep],
model_name = model_name, model_class = "EMca",
model_options = Options, resp_name = modeling.output@sp.name,
expl_var_names = modeling.output@expl.var.names,
expl_var_type = expl_var_type, expl_var_range = expl_var_range,
modeling.id = modeling.output@modeling.id,
tresholds = models.kept.tresh[to_keep])
}
if (algo == "prob.mean.weight") {
cat("\n > Probabilities weighting mean...")
model_name <- paste(modeling.output@sp.name,
"_", "EMwmeanBy", eval.m, "_", assemb, sep = "")
models.kept.tmp <- models.kept
models.kept.scores.tmp <- models.kept.scores
if (eval.m == "ROC") {
sre.id <- grep("_SRE", models.kept)
if (length(sre.id) > 0) {
cat("\n ! SRE modeling were switched off")
models.kept.tmp <- models.kept[-sre.id]
models.kept.scores.tmp <- models.kept.scores[-sre.id]
}
}
models.kept.tmp <- models.kept.tmp[is.finite(models.kept.scores.tmp)]
models.kept.scores.tmp <- models.kept.scores.tmp[is.finite(models.kept.scores.tmp)]
models.kept.scores.tmp <- round(models.kept.scores.tmp,
3)
cat("\n\t\t", " original models scores = ",
models.kept.scores.tmp)
if (is.numeric(prob.mean.weight.decay)) {
DecayCount <- sum(models.kept.scores.tmp >
0)
WOrder <- order(models.kept.scores.tmp, decreasing = T)
Dweights <- models.kept.scores.tmp
for (J in 1:DecayCount) Dweights[WOrder[J]] <- I(prob.mean.weight.decay^(DecayCount -
J + 1))
for (J in 1:length(models.kept.scores.tmp)) {
if (sum(models.kept.scores.tmp[J] == models.kept.scores.tmp) >
1)
Dweights[which(models.kept.scores.tmp[J] ==
models.kept.scores.tmp)] <- mean(Dweights[which(models.kept.scores.tmp[J] ==
models.kept.scores.tmp)])
}
models.kept.scores.tmp <- round(Dweights,
digits = 3)
rm(list = c("Dweights", "DecayCount", "WOrder"))
}
else if (is.function(prob.mean.weight.decay)) {
models.kept.scores.tmp <- sapply(models.kept.scores.tmp,
prob.mean.weight.decay)
}
models.kept.scores.tmp <- round(models.kept.scores.tmp/sum(models.kept.scores.tmp,
na.rm = T), digits = 3)
cat("\n\t\t", " final models weights = ", models.kept.scores.tmp)
model.bm <- new("EMwmean_biomod2_model", model = models.kept.tmp,
model_name = model_name, model_class = "EMwmean",
model_options = Options, resp_name = modeling.output@sp.name,
expl_var_names = modeling.output@expl.var.names,
expl_var_type = expl_var_type, expl_var_range = expl_var_range,
modeling.id = modeling.output@modeling.id,
penalization_scores = models.kept.scores.tmp)
}
pred.bm <- predict(model.bm, expl, formal_predictions = needed_predictions$predictions[,
model.bm@model, drop = F], on_0_1000 = T)
pred.bm.name <- paste0(model_name, ".predictions")
pred.bm.outfile <- file.path(model.bm@resp_name,
".BIOMOD_DATA", model.bm@modeling.id, "ensemble.models",
"ensemble.models.predictions", pred.bm.name)
dir.create(dirname(pred.bm.outfile), showWarnings = FALSE,
recursive = TRUE)
assign(pred.bm.name, pred.bm)
save(list = pred.bm.name, file = pred.bm.outfile,
compress = TRUE)
rm(list = pred.bm.name)
if (exists("eval.obs") & exists("eval.expl")) {
eval_pred.bm <- predict(model.bm, eval.expl)
pred.bm.name <- paste0(model_name, ".predictionsEval")
assign(pred.bm.name, eval_pred.bm)
save(list = pred.bm.name, file = pred.bm.outfile,
compress = TRUE)
rm(list = pred.bm.name)
}
if (length(models.eval.meth)) {
cat("\n\t\t\tEvaluating Model stuff...")
if (algo == "prob.cv") {
cross.validation <- matrix(NA, 4, length(models.eval.meth),
dimnames = list(c("Testing.data", "Cutoff",
"Sensitivity", "Specificity"), models.eval.meth))
}
else {
if (em.by == "PA_dataset+repet") {
calib_lines <- get_calib_lines(modeling.output)
pa_dataset_id <- paste("_", unlist(strsplit(assemb,
"_"))[3], sep = "")
repet_id <- paste("_", unlist(strsplit(assemb,
"_"))[2], sep = "")
if (repet_id == "_Full") {
eval_lines <- rep(T, length(pred.bm))
}
else {
eval_lines <- !na.omit(calib_lines[,
repet_id, pa_dataset_id])
if (all(!eval_lines)) {
eval_lines <- !eval_lines
}
}
}
else {
eval_lines <- rep(T, length(pred.bm))
}
cross.validation <- sapply(models.eval.meth,
Find.Optim.Stat, Fit = pred.bm[eval_lines],
Obs = obs[eval_lines])
rownames(cross.validation) <- c("Testing.data",
"Cutoff", "Sensitivity", "Specificity")
}
if (exists("eval_pred.bm")) {
if (algo == "prob.cv") {
cross.validation <- matrix(NA, 5, length(models.eval.meth),
dimnames = list(c("Testing.data", "Evaluating.data",
"Cutoff", "Sensitivity", "Specificity"),
models.eval.meth))
}
else {
true.evaluation <- sapply(models.eval.meth,
function(x) {
Find.Optim.Stat(Fit = eval_pred.bm *
1000, Obs = eval.obs, Fixed.thresh = cross.validation["Cutoff",
x])
})
cross.validation <- rbind(cross.validation["Testing.data",
], true.evaluation)
rownames(cross.validation) <- c("Testing.data",
"Evaluating.data", "Cutoff", "Sensitivity",
"Specificity")
}
}
cross.validation <- t(round(cross.validation,
digits = 3))
model.bm@model_evaluation <- cross.validation
rm(list = c("cross.validation"))
}
if (VarImport > 0) {
cat("\n\t\t\tEvaluating Predictor Contributions...",
"\n")
variables.importance <- variables_importance(model.bm,
expl, nb_rand = VarImport)
model.bm@model_variables_importance <- variables.importance$mat
rm(list = c("variables.importance"))
}
assign(model_name, model.bm)
save(list = model_name, file = file.path(modeling.output@sp.name,
"models", modeling.output@modeling.id, model_name))
EM@em.models <- c(EM@em.models, model.bm)
EM@em.computed <- c(EM@em.computed, model_name)
}
}
}
names(EM@em.models) <- EM@em.computed
model.name <- paste(EM@sp.name, ".", EM@modeling.id, "ensemble.models.out",
sep = "")
assign(x = model.name, value = EM)
save(list = model.name, file = file.path(EM@sp.name, model.name))
biomod2:::.bmCat("Done")
return(EM)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment