Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created February 16, 2011 00:59
Show Gist options
  • Save stephenturner/828631 to your computer and use it in GitHub Desktop.
Save stephenturner/828631 to your computer and use it in GitHub Desktop.
2011-02-15 rf adiposity.r
library(randomForest)
###############################################################################
############################## load functions #################################
###############################################################################
# need to document this!
rfr2 = function(randomForestModel) {
printoutput = capture.output(print(randomForestModel))
varline = grep("explained",printoutput,value=TRUE)
if (length(varline)>2) stop("more than two var's explained!")
r2out <- NULL
for (i in 1:length(varline)) {
thisr2=as.numeric(strsplit(varline[i], ":")[[1]][2])
r2out <- c(r2out,thisr2)
}
#r2out=sapply(r2out, function(n) if(n<0) return(0) else return(n))
if (class(r2out) == "numeric") return(r2out/100) else stop("r2 not numeric, problem")
}
# Function to plot the importance with the R2 of the model in the title.
impplot=function (randomForestModel, maintitle="ethnicgroup") {
r2=rfr2(randomForestModel)
# single dataset case
if (length(r2)==1) {
varImpPlot(randomForestModel, pch=16, type=1,
main=paste(maintitle, "\nOOB Training R²=" , r2)
)
} else if (length(r2)==2) {
varImpPlot(randomForestModel, pch=16, type=1,
main=paste(maintitle, "\nOOB Training R²=" , r2[1],"\nTesting R²=",r2[2])
)
} else stop("you have more than two r2s!")
}
# permute a column in a data.frame
permute <- function (df, columnToPermute="column", seed=NULL) {
if (!is.null(seed)) set.seed(seed)
print(paste("Random seed:",seed))
colindex <- which(names(df)==columnToPermute)
permutedcol <- df[ ,colindex][sample(1:nrow(df))]
df[colindex] <- permutedcol
return(df)
}
#splitdf splits a data frame into a training and testing set.
#returns a list of two data frames: trainset and testset.
#you can optionally apply a random seed.
splitdf <- function(dataframe, seed=NULL) {
if (!is.null(seed)) set.seed(seed)
index <- 1:nrow(dataframe)
trainindex <- sample(index, trunc(length(index)/2))
trainset <- dataframe[trainindex, ]
testset <- dataframe[-trainindex, ]
list(trainset=trainset,testset=testset)
}
#this function utilizes the function above.
#you give it a data frame you want to randomize,
#and a character vector with column names you want to be sure are
#equally distributed among the two different sets.
#these columns must be continuous variables. chi2 not yet implemented.
splitdf.randomize <- function(dataframe, ttestcolnames=c("cols","to","test")) {
d <- dataframe
if (!all(ttestcolnames %in% names(d))) stop(paste(ttestcolnames,"not in dataframe"))
ps <- NULL
while (is.null(ps) | any(ps<.5)) {
set1 <- splitdf(d)$trainset
set2 <- splitdf(d)$testset
ttestcols <- which(names(d) %in% ttestcolnames)
ps <- NULL
for (col in ttestcols) {
p <- t.test(set1[ ,col], set2[ ,col])$p.value
ps=c(ps,p)
}
print(paste(ttestcolnames," t-test p-value =",ps))
cat("\n")
}
list(set1=set1,set2=set2)
}
###############################################################################
################################### load data #################################
###############################################################################
# Read in the raw data
combined=read.csv("C:/Users/turnersd/Documents/Dropbox/Docs/Work/2011-02-14 Obesity project/pilotkeyvars_missingNA.csv")
# Ethnicity is coded 1=white, 2=japanese.
# This line recodes that numeric variable into a factor variable.
combined$ethnicg2 <- factor(combined$ethnicg2, labels=c("White","Japanese"))
# Make new datasets. "totfat" contains the variable CRC_FAT_TOT (DXA total
# fat) in the first column, then every other clinical / biomarker after
# that. "trperi" contains the variable trunk_peri (trunk to peripheral fat
# ratio), then every other column after that.
matrix(names(combined))
totfat = combined[ ,c(7,2,3,5,6,15:63)]
trperi = combined[ ,c(8,2,3,5,6,15:63)]
#cbind(matrix(names(totfat)),matrix(names(trperi)))
# Do a rough imputation. This sets the NAs to the median (i.e. mimp.totfat)
mimp.totfat <- na.roughfix(totfat)
mimp.trperi <- na.roughfix(trperi)
# impute using the random forest proximity measures
set.seed(42)
rimp.totfat <- rfImpute(CRC_FAT_TOT~., data=totfat)
rimp.trperi <- rfImpute(trunk_peri~., data=trperi)
# the methylation profiles were only taken on the 44 women who had MRIs.
# worth running again using only the blood markers.
#appending the b to the dataset names - blood markers only (no mp_* markers)
totfatb=totfat[-(24:46)]
trperib=trperi[-(24:46)]
#cbind(names(totfatb),names(trperib))
# imputation with blood markers only
set.seed(42)
rimp.totfatb <- rfImpute(CRC_FAT_TOT~., data=totfatb)
rimp.trperib <- rfImpute(trunk_peri~., data=trperib)
#rimp.totfatb dataset contains total fat as the dependent variable, and all individuals, imputed using the entire dataset, blood markers only.
rimp.totfatb.w <- subset(rimp.totfatb, ethnicg2=="White", select=-ethnicg2)
rimp.totfatb.j <- subset(rimp.totfatb, ethnicg2=="Japanese", select=-ethnicg2)
rimp.trperib.w <- subset(rimp.trperib, ethnicg2=="White", select=-ethnicg2)
rimp.trperib.j <- subset(rimp.trperib, ethnicg2=="Japanese", select=-ethnicg2)
rimp.totfat.w <- subset(rimp.totfat, ethnicg2=="White", select=-ethnicg2)
rimp.totfat.j <- subset(rimp.totfat, ethnicg2=="Japanese", select=-ethnicg2)
rimp.trperi.w <- subset(rimp.trperi, ethnicg2=="White", select=-ethnicg2)
rimp.trperi.j <- subset(rimp.trperi, ethnicg2=="Japanese", select=-ethnicg2)
cols.to.remove <- c("mp_SLC2A5","mp_H19")
rimp.totfat.w <- rimp.totfat.w[ ,-match(cols.to.remove, names(rimp.totfat.w))]
rimp.totfat.j <- rimp.totfat.j[ ,-match(cols.to.remove, names(rimp.totfat.j))]
rimp.trperi.w <- rimp.trperi.w[ ,-match(cols.to.remove, names(rimp.trperi.w))]
rimp.trperi.j <- rimp.trperi.j[ ,-match(cols.to.remove, names(rimp.trperi.j))]
#Random splits
cols.to.remove <- c("mp_SLC2A5","mp_H19")
combined.lowmissing <- combined[ ,-match(cols.to.remove, names(combined))]
combined.imputed <- rfImpute(CRC_FAT_TOT~., data=combined.lowmissing)
wh <- subset(combined.imputed, ethnicg2=="White")
ja <- subset(combined.imputed, ethnicg2=="Japanese")
cols.to.randomize <- NULL
cols.to.randomize=c(cols.to.randomize,"CRC_FAT_TOT")
cols.to.randomize=c(cols.to.randomize,"trunk_peri")
cols.to.randomize=c(cols.to.randomize,"logliver")
cols.to.randomize=c(cols.to.randomize,"MRI_VISC_PERC_ABDO")
cols.to.randomize=c(cols.to.randomize,"visc_subc")
cols.to.randomize=c(cols.to.randomize,"CRC_AGE")
cols.to.randomize=c(cols.to.randomize,"CRC_TECH_BMI")
cols.to.randomize=c(cols.to.randomize,"waisthip_tech_navel")
set.seed(808)
sets.wh <- splitdf.randomize(wh, cols.to.randomize)
sets.ja <- splitdf.randomize(ja, cols.to.randomize)
set1 <- rbind(sets.wh$set1, sets.ja$set1)
set2 <- rbind(sets.wh$set2, sets.ja$set2)
matrix(names(set1))
totfat1 <- set1[ ,c(1,3,4,6,7,15:ncol(set1))]
totfat2 <- set2[ ,c(1,3,4,6,7,15:ncol(set2))]
trperi1 <- set1[ ,c(8,3,4,6,7,15:ncol(set1))]
trperi2 <- set2[ ,c(8,3,4,6,7,15:ncol(set2))]
# function to remove all variables that begin with "mp_"
remove.mp <- function(df) df[, -(grep("^mp_",names(df)))]
totfatb1 <- remove.mp(totfat1)
totfatb2 <- remove.mp(totfat2)
trperib1 <- remove.mp(trperi1)
trperib2 <- remove.mp(trperi2)
# In the last few chunks of code I took the combined dataset, imputed
# missing values, then did the randomization. I don't want to impute the
# outcome for women who didn't have MRI. That's what the next few lines do.
cols.to.remove <- c("mp_SLC2A5","mp_H19")
combined.lowmissing <- combined[ ,-match(cols.to.remove, names(combined))]
haveMRI <- !(is.na(combined.lowmissing$logliver) | is.na(combined.lowmissing$MRI_VISC_PERC_ABDO))
mri <- combined.lowmissing[haveMRI, ]
mri.imputed <- rfImpute(CRC_FAT_TOT~., data=mri)
mri.wh <- subset(mri.imputed, ethnicg2=="White") #n==28
mri.ja <- subset(mri.imputed, ethnicg2=="Japanese") #n==20
set.seed(42)
sets.mri.wh <- splitdf.randomize(mri.wh, cols.to.randomize)
sets.mri.ja <- splitdf.randomize(mri.ja, cols.to.randomize)
mri.set1 <- rbind(sets.mri.wh$set1, sets.mri.ja$set1)
mri.set2 <- rbind(sets.mri.wh$set2, sets.mri.ja$set2)
liver1 <- mri.set1[ ,c(11,3,4,6,7,15:ncol(mri.set1))]
liver2 <- mri.set2[ ,c(11,3,4,6,7,15:ncol(mri.set2))]
visc1 <- mri.set1[ ,c(9,3,4,6,7,15:ncol(mri.set1))]
visc2 <- mri.set2[ ,c(9,3,4,6,7,15:ncol(mri.set2))]
viscsub1 <- mri.set1[ ,c(14,3,4,6,7,15:ncol(mri.set1))]
viscsub2 <- mri.set2[ ,c(14,3,4,6,7,15:ncol(mri.set2))]
###############################################################################
############### total fat, imputing vs not imputing ###########################
###############################################################################
# Fit the model, no imputation
set.seed(1)
totfat.rf <- randomForest(
CRC_FAT_TOT~.,
data=totfat,
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
# Print out the model to make sure it did regression and to see the % var explained.
print(totfat.rf)
rfr2(totfat.rf)
plot(totfat.rf)
# Plot the importance scores
impplot(totfat.rf, "DXA total fat, JA+White")
png("totfat, all bmi.png", w=600, h=600)
impplot(totfat.rf, "DXA total fat, JA+White")
dev.off()
###############################################################################
# rough imputation on totfat
matrix(names(mimp.totfat))
set.seed(2)
mimp.totfat.rf <- randomForest(
CRC_FAT_TOT~.,
data=mimp.totfat,
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(mimp.totfat.rf)
rfr2(mimp.totfat.rf)
plot(mimp.totfat.rf)
impplot(mimp.totfat.rf, "DXA total fat, JA+White, median-imputed")
###############################################################################
# using RF Imputation
set.seed(3)
rimp.totfat.rf <- randomForest(
CRC_FAT_TOT~.,
data=rimp.totfat,
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.totfat.rf)
rfr2(rimp.totfat.rf)
plot(rimp.totfat.rf)
impplot(rimp.totfat.rf, "DXA total fat, JA+White, RF-imputed")
png("totfat-combined-imputed.png", w=600, h=600)
impplot(rimp.totfat.rf, "DXA total fat, JA+White, RF-imputed")
dev.off()
###############################################################################
#Plot all of those results
png("adiposity-testing-imputation.png",w=1000,h=1500, res=100)
par(mfrow=c(3,2))
plot(totfat.rf)
impplot(totfat.rf, "DXA total fat, JA+White")
plot(mimp.totfat.rf)
impplot(mimp.totfat.rf, "DXA total fat, JA+White, median-imputed")
plot(rimp.totfat.rf)
impplot(rimp.totfat.rf, "DXA total fat, JA+White, RF-imputed")
dev.off()
###############################################################################
################## combined, trunk to periphery ratio, imputed ###############
###############################################################################
set.seed(4)
rimp.trperi.rf <- randomForest(
trunk_peri~.,
data=rimp.trperi,
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.trperi.rf)
rfr2(rimp.trperi.rf)
plot(rimp.trperi.rf)
impplot(rimp.trperi.rf, "Trunk:Peri, JA+White, RF-imputed")
#What was the actual importance?
myimp <- as.data.frame(importance(rimp.trperi.rf))
myimp$var <- row.names(myimp)
row.names(myimp) <- NULL
subset(myimp, var=="ethnicg2")
#plot the importance scores saving to file.
png("trunkperi-combined-imputed.png", w=600, h=600)
impplot(rimp.trperi.rf, "Trunk:Peri, JA+White, RF-imputed")
dev.off()
# it seems like ethnicity is no longer important. what happens
# if you fit a linear model with WHR? ethnicity is no longer sig!
summary(lm(trunk_peri~ethnicg2, data=rimp.trperi))
summary(lm(trunk_peri~ethnicg2+waisthip_tech_navel, data=rimp.trperi))
###############################################################################
#what happens if you run the random forests model without allowing WHR?
set.seed(5)
rimp.trperi.rf.nowhr <- randomForest(
trunk_peri~.,
data=rimp.trperi[which(names(trperi) %nin% "waisthip_tech_navel")],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.trperi.rf.nowhr)
rfr2(rimp.trperi.rf.nowhr)
impplot(rimp.trperi.rf.nowhr, "Trunk:Peri, JA+White, RF-imputed, excl WHR")
png("trunkperi-combined-imputed-exclwhr.png", w=600, h=600)
impplot(rimp.trperi.rf.nowhr, "Trunk:Peri, JA+White, RF-imputed, excl WHR")
dev.off()
# if you account for some of the other important variables is ethnicity still important?
summary(lm(trunk_peri~ethnicg2, data=rimp.trperi))
summary(lm(trunk_peri~ethnicg2+CRC_TECH_BMI+ALSR_Insulin+ALSR_PAI1+lep_adipo+ALSR_25D3+ALSR_Glucose+ALSR_TG+HOMA_IR+ALSR_HDL, data=rimp.trperi))
###############################################################################
################## totfat and trperi, with blood markers only##################
###############################################################################
set.seed(6)
rimp.totfatb.rf <- randomForest(
CRC_FAT_TOT~.,
data=rimp.totfatb,
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.totfatb.rf)
rfr2(rimp.totfatb.rf)
impplot(rimp.totfatb.rf, "TotFat, JA+W, bloodmrks, RFimputed")
png("totfat-combined-imputed-bloodmarkersonly.png", w=600, h=600)
impplot(rimp.totfatb.rf, "TotFat, JA+W, bldmrks, RFimputed")
dev.off()
set.seed(7)
rimp.trperib.rf <- randomForest(
trunk_peri~.,
data=rimp.trperib,
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.trperib.rf)
rfr2(rimp.trperib.rf)
impplot(rimp.trperib.rf, "TotFat, JA+W, bloodmrks, RFimputed")
png("trunkperi-combined-imputed-bloodmarkersonly.png", w=600, h=600)
impplot(rimp.trperib.rf, "Trunk:Peri, JA+W, bldmrks, RFimputed")
dev.off()
###############################################################################
################## totfat and trperi, splitting by ethnicity ##################
###############################################################################
# The first thing I wanted to to is to make sure that how I've set up
# training and testing works properly. To do this, I took one subset of
# the data (white women only), and used that dataset for BOTH training and
# testing. Doing this, the testing R² should be extremely high. Then I'll
# do the same thing, but permute (shuffle) the outcome variable for the
# white women in the testing set. The OOB training R² should still be
# high, but the testing R² should plummet to near zero.
# train on white, test on white
set.seed(8)
rimp.totfatb.rf.split.ww <- randomForest(
x=rimp.totfatb.w[ ,-1], #exclude the first column, the outcome
y=rimp.totfatb.w[ , 1], #include only the first column, the outcome
xtest=rimp.totfatb.w[ ,-1],
ytest=rimp.totfatb.w[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.totfatb.rf.split.ww)
# train on white, test on permutedwhite
set.seed(9)
rimp.totfatb.rf.split.wpw <- randomForest(
x=rimp.totfatb.w[ ,-1],
y=rimp.totfatb.w[ , 1],
xtest=rimp.totfatb.w[ ,-1],
ytest=permute(rimp.totfatb.w, "CRC_FAT_TOT")[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.totfatb.rf.split.wpw)
###############################################################################
## Now, train on one ethnicity, test on another:
# train on white, test on JA, totfat, blood markers only:
set.seed(10)
rimp.totfatb.rf.split.wj <- randomForest(
x=rimp.totfatb.w[ ,-1], #exclude the first column, the outcome
y=rimp.totfatb.w[ , 1], #include only the first column, the outcome
xtest=rimp.totfatb.j[ ,-1],
ytest=rimp.totfatb.j[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.totfatb.rf.split.wj)
impplot(rimp.totfatb.rf.split.wj, "TotFat, train:W, test:JA, RF-imputed, bldmrks")
# train on white, test on JA, totfat, all markers except H19 and SLC25A:
set.seed(100)
rimp.totfat.rf.split.wj <- randomForest(
x=rimp.totfat.w[ ,-1], #exclude the first column, the outcome
y=rimp.totfat.w[ , 1], #include only the first column, the outcome
xtest=rimp.totfat.j[ ,-1],
ytest=rimp.totfat.j[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.totfat.rf.split.wj)
impplot(rimp.totfat.rf.split.wj, "TotFat, train:W, test:JA, RF-imputed, bldmrks")
# train on JA, test on white, totfat:
set.seed(10)
rimp.totfatb.rf.splitjw<- randomForest(
x=rimp.totfatb.j[ ,-1], #exclude the first column, the outcome
y=rimp.totfatb.j[ , 1], #include only the first column, the outcome
xtest=rimp.totfatb.w[ ,-1],
ytest=rimp.totfatb.w[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.totfatb.rf.split.jw)
impplot(rimp.totfatb.rf.split.jw, "TotFat, train:JA, test:W, RF-imputed, bldmrks")
# train on white, test on JA, trperi:
set.seed(12)
rimp.trperib.rf.split.wj <- randomForest(
x=rimp.trperib.w[ ,-1], #exclude the first column, the outcome
y=rimp.trperib.w[ , 1], #include only the first column, the outcome
xtest=rimp.trperib.j[ ,-1],
ytest=rimp.trperib.j[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.trperib.rf.split.wj)
impplot(rimp.trperib.rf.split.wj, "Tr/peri, train:W, test:JA, RF-imputed, bldmrks")
# train on JA, test on white, trperi:
set.seed(13)
rimp.trperib.rf.split.jw <- randomForest(
x=rimp.trperib.j[ ,-1], #exclude the first column, the outcome
y=rimp.trperib.j[ , 1], #include only the first column, the outcome
xtest=rimp.trperib.w[ ,-1],
ytest=rimp.trperib.w[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.trperib.rf.split.jw)
impplot(rimp.trperib.rf.split.jw, "Tr/peri, train:JA, test:W, RF-imputed, bldmrks")
# train on JA, test on white, trperi, all markers except H19 and HLC2A5:
set.seed(130)
rimp.trperi.rf.split.jw <- randomForest(
x=rimp.trperi.j[ ,-1], #exclude the first column, the outcome
y=rimp.trperi.j[ , 1], #include only the first column, the outcome
xtest=rimp.trperi.w[ ,-1],
ytest=rimp.trperi.w[ , 1],
importance=TRUE,
keep.forest=TRUE,
na.action=na.omit
)
print(rimp.trperi.rf.split.jw)
impplot(rimp.trperi.rf.split.jw, "Tr/peri, train:JA, test:W, RF-imputed, bldmrks")
# Totfat, train on white, test on JA
print(rimp.totfatb.rf.split.wj)
# Totfat, train on JA, test on white
print(rimp.totfatb.rf.split.jw)
# Trunk/peri train on white, test on JA
print(rimp.trperib.rf.split.wj)
# Trunk/peri train on JA, test on white
print(rimp.trperib.rf.split.jw)
png("totfat-WJ.png", w=600, h=600)
impplot(rimp.totfatb.rf.split.wj, "TotFat, train:W, test:JA")
dev.off()
png("totfat-JW.png", w=600, h=600)
impplot(rimp.totfatb.rf.split.jw, "TotFat, train:JA, test:W")
dev.off()
png("trperi-WJ.png", w=600, h=600)
impplot(rimp.trperib.rf.split.wj, "Tr/peri, train:W, test:JA")
dev.off()
png("trperi-JW.png", w=600, h=600)
impplot(rimp.trperib.rf.split.jw, "Tr/peri, train:JA, test:W")
dev.off()
png("both-totfat-trperi-wj-jw.png", w=1200, h=1200)
par(mfrow=c(2,2))
impplot(rimp.totfatb.rf.split.wj, "TotFat, train:W, test:JA")
impplot(rimp.trperib.rf.split.wj, "Tr/peri, train:W, test:JA")
impplot(rimp.totfatb.rf.split.jw, "TotFat, train:JA, test:W")
impplot(rimp.trperib.rf.split.jw, "Tr/peri, train:JA, test:W")
dev.off()
################
#randomization
#try fitting a stepwise model. The predictive R2 is way worse.
lmfit <- lm(CRC_FAT_TOT~., data=totfat1)
stepfit <- step(lmfit, direction="forward")
steppred <- predict(stepfit, totfat2[ ,-1])
rsq(steppred,totfat2[ ,1])
#now fit the random forest model
set.seed(14)
totfat.rf.split12 <- randomForest(
x=totfat1[ ,-1], #exclude the first column, the outcome
y=totfat1[ , 1], #include only the first column, the outcome
xtest=totfat2[ ,-1],
ytest=totfat2[ , 1],
keep.forest=TRUE,
importance=TRUE,
)
print(totfat.rf.split12)
rsq(predict(totfat.rf.split12, totfat2[ ,-1]), totfat2[ ,1])
png("totfat-15split.png",w=600, h=600)
impplot(totfat.rf.split12, "Total fat, 15/15 random")
dev.off()
set.seed(15)
trperi.rf.split12 <- randomForest(
x=trperi1[ ,-1], #exclude the first column, the outcome
y=trperi1[ , 1], #include only the first column, the outcome
xtest=trperi2[ ,-1],
ytest=trperi2[ , 1],
keep.forest=TRUE,
importance=TRUE,
)
print(trperi.rf.split12)
rsq(predict(trperi.rf.split12, trperi2[ ,-1]), trperi2$trunk_peri)
png("trperi-15split.png",w=600, h=600)
impplot(trperi.rf.split12, "Trunk-peri, 15/15 random")
dev.off()
###################################################
# logliver
set.seed(15)
liver.rf.split12 <- randomForest(
x=liver1[ ,-1],
y=liver1[ , 1],
xtest=liver2[ ,-1],
ytest=liver2[ , 1],
keep.forest=TRUE,
importance=TRUE,
)
print(liver.rf.split12)
rsq(predict(liver.rf.split12, liver2[ ,-1]), liver2[ ,1])
png("liver-split.png",w=600, h=600)
impplot(liver.rf.split12, "logliver, 14w/10j split")
dev.off()
# visceral
set.seed(16)
visc.rf.split12 <- randomForest(
x=visc1[ ,-1],
y=visc1[ , 1],
xtest=visc2[ ,-1],
ytest=visc2[ , 1],
keep.forest=TRUE,
importance=TRUE,
)
print(visc.rf.split12)
rsq(predict(visc.rf.split12, visc2[ ,-1]), visc2[ ,1])
png("visc-split.png",w=600, h=600)
impplot(visc.rf.split12, "%visceral, 14w/10j split")
dev.off()
# visceral/subc ratio
set.seed(16)
viscsub.rf.split12 <- randomForest(
x=viscsub1[ ,-1],
y=viscsub1[ , 1],
xtest=viscsub2[ ,-1],
ytest=viscsub2[ , 1],
keep.forest=TRUE,
importance=TRUE,
)
print(viscsub.rf.split12)
rsq(predict(viscsub.rf.split12, viscsub2[ ,-1]), viscsub2[ ,1])
png("viscsub-split.png",w=600, h=600)
impplot(viscsub.rf.split12, "visceral/subratio, 14w/10j split")
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment