Created
November 28, 2013 08:46
-
-
Save menugget/7688922 to your computer and use it in GitHub Desktop.
BVSTEP (Clarke and Ainsworth 1992). Requires package "vegan".
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
bv.step <- function(fix.mat, var.mat, | |
fix.dist.method="bray", var.dist.method="euclidean", | |
scale.fix=FALSE, scale.var=TRUE, | |
max.rho=0.95, | |
min.delta.rho=0.001, | |
random.selection=TRUE, | |
prop.selected.var=0.2, | |
num.restarts=10, | |
var.always.include=NULL, | |
var.exclude=NULL, | |
output.best=10 | |
){ | |
if(dim(fix.mat)[1] != dim(var.mat)[1]){stop("fixed and variable matrices must have the same number of rows")} | |
if(sum(var.always.include %in% var.exclude) > 0){stop("var.always.include and var.exclude share a variable")} | |
require(vegan) | |
if(scale.fix){fix.mat<-scale(fix.mat)}else{fix.mat<-fix.mat} | |
if(scale.var){var.mat<-scale(var.mat)}else{var.mat<-var.mat} | |
fix.dist <- vegdist(as.matrix(fix.mat), method=fix.dist.method) | |
#an initial removal phase | |
var.dist.full <- vegdist(as.matrix(var.mat), method=var.dist.method) | |
full.cor <- suppressWarnings(cor.test(fix.dist, var.dist.full, method="spearman"))$estimate | |
var.comb <- combn(1:ncol(var.mat), ncol(var.mat)-1) | |
RES <- data.frame(var.excl=rep(NA,ncol(var.comb)), n.var=ncol(var.mat)-1, rho=NA) | |
for(i in 1:dim(var.comb)[2]){ | |
var.dist <- vegdist(as.matrix(var.mat[,var.comb[,i]]), method=var.dist.method) | |
temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
RES$var.excl[i] <- c(1:ncol(var.mat))[-var.comb[,i]] | |
RES$rho[i] <- temp$estimate | |
} | |
delta.rho <- RES$rho - full.cor | |
exclude <- sort(unique(c(RES$var.excl[which(abs(delta.rho) < min.delta.rho)], var.exclude))) | |
if(random.selection){ | |
num.restarts=num.restarts | |
prop.selected.var=prop.selected.var | |
prob<-rep(1,ncol(var.mat)) | |
if(prop.selected.var< 1){ | |
prob[exclude]<-0 | |
} | |
n.selected.var <- min(sum(prob),prop.selected.var*dim(var.mat)[2]) | |
} else { | |
num.restarts=1 | |
prop.selected.var=1 | |
prob<-rep(1,ncol(var.mat)) | |
n.selected.var <- min(sum(prob),prop.selected.var*dim(var.mat)[2]) | |
} | |
RES_TOT <- c() | |
for(i in 1:num.restarts){ | |
step=1 | |
RES <- data.frame(step=step, step.dir="F", var.incl=NA, n.var=0, rho=0) | |
attr(RES$step.dir, "levels") <- c("F","B") | |
best.comb <- which.max(RES$rho) | |
best.rho <- RES$rho[best.comb] | |
delta.rho <- Inf | |
selected.var <- sort(unique(c(sample(1:dim(var.mat)[2], n.selected.var, prob=prob), var.always.include))) | |
while(best.rho < max.rho & delta.rho > min.delta.rho & RES$n.var[best.comb] < length(selected.var)){ | |
#forward step | |
step.dir="F" | |
step=step+1 | |
var.comb <- combn(selected.var, RES$n.var[best.comb]+1, simplify=FALSE) | |
if(RES$n.var[best.comb] == 0){ | |
var.comb.incl<-1:length(var.comb) | |
} else { | |
var.keep <- as.numeric(unlist(strsplit(RES$var.incl[best.comb], ","))) | |
temp <- NA*1:length(var.comb) | |
for(j in 1:length(temp)){ | |
temp[j] <- all(var.keep %in% var.comb[[j]]) | |
} | |
var.comb.incl <- which(temp==1) | |
} | |
RES.f <- data.frame(step=rep(step, length(var.comb.incl)), step.dir=step.dir, var.incl=NA, n.var=RES$n.var[best.comb]+1, rho=NA) | |
for(f in 1:length(var.comb.incl)){ | |
var.incl <- var.comb[[var.comb.incl[f]]] | |
var.incl <- var.incl[order(var.incl)] | |
var.dist <- vegdist(as.matrix(var.mat[,var.incl]), method=var.dist.method) | |
temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
RES.f$var.incl[f] <- paste(var.incl, collapse=",") | |
RES.f$rho[f] <- temp$estimate | |
} | |
last.F <- max(which(RES$step.dir=="F")) | |
RES <- rbind(RES, RES.f[which.max(RES.f$rho),]) | |
best.comb <- which.max(RES$rho) | |
delta.rho <- RES$rho[best.comb] - best.rho | |
best.rho <- RES$rho[best.comb] | |
if(best.comb == step){ | |
while(best.comb == step & RES$n.var[best.comb] > 1){ | |
#backward step | |
step.dir="B" | |
step <- step+1 | |
var.keep <- as.numeric(unlist(strsplit(RES$var.incl[best.comb], ","))) | |
var.comb <- combn(var.keep, RES$n.var[best.comb]-1, simplify=FALSE) | |
RES.b <- data.frame(step=rep(step, length(var.comb)), step.dir=step.dir, var.incl=NA, n.var=RES$n.var[best.comb]-1, rho=NA) | |
for(b in 1:length(var.comb)){ | |
var.incl <- var.comb[[b]] | |
var.incl <- var.incl[order(var.incl)] | |
var.dist <- vegdist(as.matrix(var.mat[,var.incl]), method=var.dist.method) | |
temp <- suppressWarnings(cor.test(fix.dist, var.dist, method="spearman")) | |
RES.b$var.incl[b] <- paste(var.incl, collapse=",") | |
RES.b$rho[b] <- temp$estimate | |
} | |
RES <- rbind(RES, RES.b[which.max(RES.b$rho),]) | |
best.comb <- which.max(RES$rho) | |
best.rho<- RES$rho[best.comb] | |
} | |
} else { | |
break() | |
} | |
} | |
RES_TOT <- rbind(RES_TOT, RES[2:dim(RES)[1],]) | |
print(paste(round((i/num.restarts)*100,3), "% finished")) | |
} | |
RES_TOT <- unique(RES_TOT[,3:5]) | |
if(dim(RES_TOT)[1] > output.best){ | |
order.by.best <- RES_TOT[order(RES_TOT$rho, decreasing=TRUE)[1:output.best],] | |
} else { | |
order.by.best <- RES_TOT[order(RES_TOT$rho, decreasing=TRUE), ] | |
} | |
rownames(order.by.best)<-NULL | |
order.by.i.comb <- c() | |
for(i in 1:length(selected.var)){ | |
f1 <- which(RES_TOT$n.var==i) | |
f2 <- which.max(RES_TOT$rho[f1]) | |
order.by.i.comb <- rbind(order.by.i.comb, RES_TOT[f1[f2],]) | |
} | |
rownames(order.by.i.comb)<-NULL | |
if(length(exclude)<1){var.exclude=NULL} else {var.exclude=exclude} | |
out <- list( | |
order.by.best=order.by.best, | |
order.by.i.comb=order.by.i.comb, | |
best.model.vars=paste(colnames(var.mat)[as.numeric(unlist(strsplit(order.by.best$var.incl[1], ",")))], collapse=","), | |
best.model.rho=order.by.best$rho[1], | |
var.always.include=var.always.include, | |
var.exclude=var.exclude | |
) | |
out | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment