vif_func<-function(in_frame,thresh=10,trace=T,wts=NULL,...){ library(fmsb) if(any(!'data.frame' %in% class(in_frame))) in_frame<-data.frame(in_frame) if(is.null(wts)) wts <- rep(1, ncol(in_frame)) if(!is.null(wts)) if(length(wts)!=ncol(in_frame)) stop('length of weights must equal number of variables') if(any(wts < 0)) stop('weights must be positive') names(wts) <- names(in_frame) #get initial vif value for all comparisons of variables vif_init<-NULL var_names <- names(in_frame) for(val in var_names){ wt <- wts[val] regressors <- var_names[-which(var_names == val)] form <- paste(regressors, collapse = '+') form_in <- formula(paste(val, '~', form)) vif_init<-rbind(vif_init, c(val, VIF(lm(form_in, data = in_frame, ...)) / wt)) } vif_max<-max(as.numeric(vif_init[,2]), na.rm = TRUE) if(vif_max < thresh){ if(trace==T){ #print output of each iteration prmatrix(vif_init,collab=c('var','vif'),rowlab=rep('',nrow(vif_init)),quote=F) cat('\n') cat(paste('All variables have VIF < ', thresh,', max VIF ',round(vif_max,2), sep=''),'\n\n') } return(var_names) } else{ in_dat<-in_frame #backwards selection of explanatory variables, stops when all VIF values are below 'thresh' while(vif_max >= thresh){ vif_vals<-NULL var_names <- names(in_dat) for(val in var_names){ wt <- wts[val] regressors <- var_names[-which(var_names == val)] form <- paste(regressors, collapse = '+') form_in <- formula(paste(val, '~', form)) vif_add<- VIF(lm(form_in, data = in_dat, ...)) / wt vif_vals<-rbind(vif_vals,c(val,vif_add)) } max_row<-which(vif_vals[,2] == max(as.numeric(vif_vals[,2]), na.rm = TRUE))[1] vif_max<-as.numeric(vif_vals[max_row,2]) if(vif_max<thresh) break if(trace==T){ #print output of each iteration prmatrix(vif_vals,collab=c('var','vif'),rowlab=rep('',nrow(vif_vals)),quote=F) cat('\n') cat('removed: ',vif_vals[max_row,1],vif_max,'\n\n') flush.console() } in_dat<-in_dat[,!names(in_dat) %in% vif_vals[max_row,1], drop = F] if(ncol(in_dat)==1) break } return(names(in_dat)) } }