stephenturner/2011-02-15 rf adiposity.r

## 2011-02-15 rf adiposity.r
library(randomForest)

###############################################################################
############################## load functions #################################
###############################################################################

# need to document this!
rfr2 = function(randomForestModel) {
	printoutput = capture.output(print(randomForestModel))
	varline = grep("explained",printoutput,value=TRUE)
	if (length(varline)>2) stop("more than two var's explained!")
	r2out <- NULL
	for (i in 1:length(varline)) {
		thisr2=as.numeric(strsplit(varline[i], ":")[[1]][2])
		r2out <- c(r2out,thisr2)
	}
	#r2out=sapply(r2out,  function(n) if(n<0) return(0) else return(n))
	if (class(r2out) == "numeric") return(r2out/100) else stop("r2 not numeric, problem")
}

# Function to plot the importance with the R2 of the model in the title.
impplot=function (randomForestModel, maintitle="ethnicgroup") {
	r2=rfr2(randomForestModel)
	# single dataset case
	if (length(r2)==1) {
		varImpPlot(randomForestModel, pch=16, type=1,
			main=paste(maintitle, "\nOOB Training R²=" , r2)
		)
	} else if (length(r2)==2) {
		varImpPlot(randomForestModel, pch=16, type=1,
			main=paste(maintitle, "\nOOB Training R²=" , r2[1],"\nTesting R²=",r2[2])
		)
	} else stop("you have more than two r2s!")
}

# permute a column in a data.frame
permute <- function (df, columnToPermute="column", seed=NULL) {
	if (!is.null(seed)) set.seed(seed)
	print(paste("Random seed:",seed))
	colindex <- which(names(df)==columnToPermute)
	permutedcol <- df[ ,colindex][sample(1:nrow(df))]
	df[colindex] <- permutedcol
	return(df)
}

#splitdf splits a data frame into a training and testing set.
#returns a list of two data frames: trainset and testset.
#you can optionally apply a random seed.
splitdf <- function(dataframe, seed=NULL) {
	if (!is.null(seed)) set.seed(seed)
	index <- 1:nrow(dataframe)
	trainindex <- sample(index, trunc(length(index)/2))
	trainset <- dataframe[trainindex, ]
	testset <- dataframe[-trainindex, ]
	list(trainset=trainset,testset=testset)
}

#this function utilizes the function above.
#you give it a data frame you want to randomize,
#and a character vector with column names you want to be sure are
#equally distributed among the two different sets.
#these columns must be continuous variables. chi2 not yet implemented.
splitdf.randomize <- function(dataframe, ttestcolnames=c("cols","to","test")) {
	d <- dataframe
	if (!all(ttestcolnames %in% names(d))) stop(paste(ttestcolnames,"not in dataframe"))
	ps <- NULL
	while (is.null(ps) | any(ps<.5)) {
		set1 <- splitdf(d)$trainset
		set2 <- splitdf(d)$testset
		ttestcols <- which(names(d) %in% ttestcolnames)
		ps <- NULL
		for (col in ttestcols) {
			p <- t.test(set1[ ,col], set2[ ,col])$p.value
			ps=c(ps,p)
		}
		print(paste(ttestcolnames," t-test p-value =",ps))
		cat("\n")
	}
	list(set1=set1,set2=set2)
}


###############################################################################
################################### load data #################################
###############################################################################


# Read in the raw data
combined=read.csv("C:/Users/turnersd/Documents/Dropbox/Docs/Work/2011-02-14 Obesity project/pilotkeyvars_missingNA.csv")

# Ethnicity is coded 1=white, 2=japanese.
# This line recodes that numeric variable into a factor variable.
combined$ethnicg2 <- factor(combined$ethnicg2, labels=c("White","Japanese"))


# Make new datasets. "totfat" contains the variable CRC_FAT_TOT (DXA total
# fat) in the first column, then every other clinical / biomarker after
# that. "trperi" contains the variable trunk_peri (trunk to peripheral fat
# ratio), then every other column after that.
matrix(names(combined))
totfat = combined[ ,c(7,2,3,5,6,15:63)]
trperi = combined[ ,c(8,2,3,5,6,15:63)]
#cbind(matrix(names(totfat)),matrix(names(trperi)))

# Do a rough imputation. This sets the NAs to the median (i.e. mimp.totfat)
mimp.totfat <- na.roughfix(totfat)
mimp.trperi <- na.roughfix(trperi)

# impute using the random forest proximity measures
set.seed(42)
rimp.totfat <- rfImpute(CRC_FAT_TOT~., data=totfat)
rimp.trperi <- rfImpute(trunk_peri~., data=trperi)

# the methylation profiles were only taken on the 44 women who had MRIs.
# worth running again using only the blood markers.
#appending the b to the dataset names - blood markers only (no mp_* markers)
totfatb=totfat[-(24:46)]
trperib=trperi[-(24:46)]
#cbind(names(totfatb),names(trperib))

# imputation with blood markers only
set.seed(42)
rimp.totfatb <- rfImpute(CRC_FAT_TOT~., data=totfatb)
rimp.trperib <- rfImpute(trunk_peri~., data=trperib)


#rimp.totfatb dataset contains total fat as the dependent variable, and all individuals, imputed using the entire dataset, blood markers only.
rimp.totfatb.w <- subset(rimp.totfatb, ethnicg2=="White", select=-ethnicg2)
rimp.totfatb.j <- subset(rimp.totfatb, ethnicg2=="Japanese", select=-ethnicg2)
rimp.trperib.w <- subset(rimp.trperib, ethnicg2=="White", select=-ethnicg2)
rimp.trperib.j <- subset(rimp.trperib, ethnicg2=="Japanese", select=-ethnicg2)

rimp.totfat.w <- subset(rimp.totfat, ethnicg2=="White", select=-ethnicg2)
rimp.totfat.j <- subset(rimp.totfat, ethnicg2=="Japanese", select=-ethnicg2)
rimp.trperi.w <- subset(rimp.trperi, ethnicg2=="White", select=-ethnicg2)
rimp.trperi.j <- subset(rimp.trperi, ethnicg2=="Japanese", select=-ethnicg2)

cols.to.remove <- c("mp_SLC2A5","mp_H19")
rimp.totfat.w <- rimp.totfat.w[ ,-match(cols.to.remove, names(rimp.totfat.w))]
rimp.totfat.j <- rimp.totfat.j[ ,-match(cols.to.remove, names(rimp.totfat.j))]
rimp.trperi.w <- rimp.trperi.w[ ,-match(cols.to.remove, names(rimp.trperi.w))]
rimp.trperi.j <- rimp.trperi.j[ ,-match(cols.to.remove, names(rimp.trperi.j))]

#Random splits
cols.to.remove <- c("mp_SLC2A5","mp_H19")
combined.lowmissing <- combined[ ,-match(cols.to.remove, names(combined))]
combined.imputed <- rfImpute(CRC_FAT_TOT~., data=combined.lowmissing)

wh <- subset(combined.imputed, ethnicg2=="White")
ja <- subset(combined.imputed, ethnicg2=="Japanese")

cols.to.randomize <- NULL
cols.to.randomize=c(cols.to.randomize,"CRC_FAT_TOT")
cols.to.randomize=c(cols.to.randomize,"trunk_peri")
cols.to.randomize=c(cols.to.randomize,"logliver")
cols.to.randomize=c(cols.to.randomize,"MRI_VISC_PERC_ABDO")
cols.to.randomize=c(cols.to.randomize,"visc_subc")
cols.to.randomize=c(cols.to.randomize,"CRC_AGE")
cols.to.randomize=c(cols.to.randomize,"CRC_TECH_BMI")
cols.to.randomize=c(cols.to.randomize,"waisthip_tech_navel")

set.seed(808)
sets.wh <- splitdf.randomize(wh, cols.to.randomize)
sets.ja <- splitdf.randomize(ja, cols.to.randomize)
set1 <- rbind(sets.wh$set1, sets.ja$set1)
set2 <- rbind(sets.wh$set2, sets.ja$set2)

matrix(names(set1))
totfat1 <- set1[ ,c(1,3,4,6,7,15:ncol(set1))]
totfat2 <- set2[ ,c(1,3,4,6,7,15:ncol(set2))]
trperi1 <- set1[ ,c(8,3,4,6,7,15:ncol(set1))]
trperi2 <- set2[ ,c(8,3,4,6,7,15:ncol(set2))]

# function to remove all variables that begin with "mp_"
remove.mp <- function(df) df[, -(grep("^mp_",names(df)))]
totfatb1 <- remove.mp(totfat1)
totfatb2 <- remove.mp(totfat2)
trperib1 <- remove.mp(trperi1)
trperib2 <- remove.mp(trperi2)


# In the last few chunks of code I took the combined dataset, imputed
# missing values, then did the randomization. I don't want to impute the
# outcome for women who didn't have MRI. That's what the next few lines do.
cols.to.remove <- c("mp_SLC2A5","mp_H19")
combined.lowmissing <- combined[ ,-match(cols.to.remove, names(combined))]
haveMRI <- !(is.na(combined.lowmissing$logliver) | is.na(combined.lowmissing$MRI_VISC_PERC_ABDO))
mri <- combined.lowmissing[haveMRI, ]
mri.imputed <- rfImpute(CRC_FAT_TOT~., data=mri)

mri.wh <- subset(mri.imputed, ethnicg2=="White") #n==28
mri.ja <- subset(mri.imputed, ethnicg2=="Japanese") #n==20

set.seed(42)
sets.mri.wh <- splitdf.randomize(mri.wh, cols.to.randomize)
sets.mri.ja <- splitdf.randomize(mri.ja, cols.to.randomize)
mri.set1 <- rbind(sets.mri.wh$set1, sets.mri.ja$set1)
mri.set2 <- rbind(sets.mri.wh$set2, sets.mri.ja$set2)

liver1 <- mri.set1[ ,c(11,3,4,6,7,15:ncol(mri.set1))]
liver2 <- mri.set2[ ,c(11,3,4,6,7,15:ncol(mri.set2))]
visc1 <- mri.set1[ ,c(9,3,4,6,7,15:ncol(mri.set1))]
visc2 <- mri.set2[ ,c(9,3,4,6,7,15:ncol(mri.set2))]
viscsub1 <- mri.set1[ ,c(14,3,4,6,7,15:ncol(mri.set1))]
viscsub2 <- mri.set2[ ,c(14,3,4,6,7,15:ncol(mri.set2))]


###############################################################################
############### total fat, imputing vs not imputing ###########################
###############################################################################

# Fit the model, no imputation
set.seed(1)
totfat.rf <- randomForest(
	CRC_FAT_TOT~.,
	data=totfat,
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)

# Print out the model to make sure it did regression and to see the % var explained.
print(totfat.rf)
rfr2(totfat.rf)
plot(totfat.rf)

# Plot the importance scores
impplot(totfat.rf, "DXA total fat, JA+White")

png("totfat, all bmi.png", w=600, h=600)
impplot(totfat.rf, "DXA total fat, JA+White")
dev.off()

###############################################################################

# rough imputation on totfat
matrix(names(mimp.totfat))

set.seed(2)
mimp.totfat.rf <- randomForest(
	CRC_FAT_TOT~.,
	data=mimp.totfat,
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)

print(mimp.totfat.rf)
rfr2(mimp.totfat.rf)
plot(mimp.totfat.rf)
impplot(mimp.totfat.rf, "DXA total fat, JA+White, median-imputed")

###############################################################################

# using RF Imputation
set.seed(3)
rimp.totfat.rf <- randomForest(
	CRC_FAT_TOT~.,
	data=rimp.totfat,
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)

print(rimp.totfat.rf)
rfr2(rimp.totfat.rf)
plot(rimp.totfat.rf)
impplot(rimp.totfat.rf, "DXA total fat, JA+White, RF-imputed")

png("totfat-combined-imputed.png", w=600, h=600)
impplot(rimp.totfat.rf, "DXA total fat, JA+White, RF-imputed")
dev.off()

###############################################################################

#Plot all of those results
png("adiposity-testing-imputation.png",w=1000,h=1500, res=100)
par(mfrow=c(3,2))
plot(totfat.rf)
impplot(totfat.rf, "DXA total fat, JA+White")
plot(mimp.totfat.rf)
impplot(mimp.totfat.rf, "DXA total fat, JA+White, median-imputed")
plot(rimp.totfat.rf)
impplot(rimp.totfat.rf, "DXA total fat, JA+White, RF-imputed")
dev.off()


###############################################################################
################## combined,  trunk to periphery ratio, imputed ###############
###############################################################################

set.seed(4)
rimp.trperi.rf <- randomForest(
	trunk_peri~.,
	data=rimp.trperi,
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)

print(rimp.trperi.rf)
rfr2(rimp.trperi.rf)
plot(rimp.trperi.rf)
impplot(rimp.trperi.rf, "Trunk:Peri, JA+White, RF-imputed")

#What was the actual importance?
myimp <- as.data.frame(importance(rimp.trperi.rf))
myimp$var <- row.names(myimp)
row.names(myimp) <- NULL
subset(myimp, var=="ethnicg2")

#plot the importance scores saving to file.
png("trunkperi-combined-imputed.png", w=600, h=600)
impplot(rimp.trperi.rf, "Trunk:Peri, JA+White, RF-imputed")
dev.off()

# it seems like ethnicity is no longer important. what happens
# if you fit a linear model with WHR? ethnicity is no longer sig!
summary(lm(trunk_peri~ethnicg2, data=rimp.trperi))
summary(lm(trunk_peri~ethnicg2+waisthip_tech_navel, data=rimp.trperi))

###############################################################################

#what happens if you run the random forests model without allowing WHR?
set.seed(5)
rimp.trperi.rf.nowhr <- randomForest(
	trunk_peri~.,
	data=rimp.trperi[which(names(trperi) %nin% "waisthip_tech_navel")],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)

print(rimp.trperi.rf.nowhr)
rfr2(rimp.trperi.rf.nowhr)
impplot(rimp.trperi.rf.nowhr, "Trunk:Peri, JA+White, RF-imputed, excl WHR")

png("trunkperi-combined-imputed-exclwhr.png", w=600, h=600)
impplot(rimp.trperi.rf.nowhr, "Trunk:Peri, JA+White, RF-imputed, excl WHR")
dev.off()

# if you account for some of the other important variables is ethnicity still important?
summary(lm(trunk_peri~ethnicg2, data=rimp.trperi))
summary(lm(trunk_peri~ethnicg2+CRC_TECH_BMI+ALSR_Insulin+ALSR_PAI1+lep_adipo+ALSR_25D3+ALSR_Glucose+ALSR_TG+HOMA_IR+ALSR_HDL, data=rimp.trperi))


###############################################################################
################## totfat and trperi, with blood markers only##################
###############################################################################

set.seed(6)
rimp.totfatb.rf <- randomForest(
	CRC_FAT_TOT~.,
	data=rimp.totfatb,
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)

print(rimp.totfatb.rf)
rfr2(rimp.totfatb.rf)
impplot(rimp.totfatb.rf, "TotFat, JA+W, bloodmrks, RFimputed")

png("totfat-combined-imputed-bloodmarkersonly.png", w=600, h=600)
impplot(rimp.totfatb.rf, "TotFat, JA+W, bldmrks, RFimputed")
dev.off()


set.seed(7)
rimp.trperib.rf <- randomForest(
	trunk_peri~.,
	data=rimp.trperib,
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)

print(rimp.trperib.rf)
rfr2(rimp.trperib.rf)
impplot(rimp.trperib.rf, "TotFat, JA+W, bloodmrks, RFimputed")

png("trunkperi-combined-imputed-bloodmarkersonly.png", w=600, h=600)
impplot(rimp.trperib.rf, "Trunk:Peri, JA+W, bldmrks, RFimputed")
dev.off()


###############################################################################
################## totfat and trperi, splitting by ethnicity ##################
###############################################################################


# The first thing I wanted to to is to make sure that how I've set up
# training and testing works properly. To do this, I took one subset of
# the data (white women only), and used that dataset for BOTH training and
# testing. Doing this, the testing R² should be extremely high. Then I'll
# do the same thing, but permute (shuffle) the outcome variable for the
# white women in the testing set. The OOB training R² should still be
# high, but the testing R² should plummet to near zero.

# train on white, test on white
set.seed(8)
rimp.totfatb.rf.split.ww <- randomForest(
	x=rimp.totfatb.w[ ,-1], #exclude the first column, the outcome
	y=rimp.totfatb.w[ , 1], #include only the first column, the outcome
	xtest=rimp.totfatb.w[ ,-1],
	ytest=rimp.totfatb.w[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.totfatb.rf.split.ww)

# train on white, test on permutedwhite
set.seed(9)
rimp.totfatb.rf.split.wpw <- randomForest(
	x=rimp.totfatb.w[ ,-1],
	y=rimp.totfatb.w[ , 1],
	xtest=rimp.totfatb.w[ ,-1],
	ytest=permute(rimp.totfatb.w, "CRC_FAT_TOT")[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.totfatb.rf.split.wpw)

###############################################################################

## Now, train on one ethnicity, test on another:
# train on white, test on JA, totfat, blood markers only:
set.seed(10)
rimp.totfatb.rf.split.wj <- randomForest(
	x=rimp.totfatb.w[ ,-1], #exclude the first column, the outcome
	y=rimp.totfatb.w[ , 1], #include only the first column, the outcome
	xtest=rimp.totfatb.j[ ,-1],
	ytest=rimp.totfatb.j[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.totfatb.rf.split.wj)
impplot(rimp.totfatb.rf.split.wj, "TotFat, train:W, test:JA, RF-imputed, bldmrks")


# train on white, test on JA, totfat, all markers except H19 and SLC25A:
set.seed(100)
rimp.totfat.rf.split.wj <- randomForest(
	x=rimp.totfat.w[ ,-1], #exclude the first column, the outcome
	y=rimp.totfat.w[ , 1], #include only the first column, the outcome
	xtest=rimp.totfat.j[ ,-1],
	ytest=rimp.totfat.j[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.totfat.rf.split.wj)
impplot(rimp.totfat.rf.split.wj, "TotFat, train:W, test:JA, RF-imputed, bldmrks")


# train on JA, test on white, totfat:
set.seed(10)
rimp.totfatb.rf.splitjw<- randomForest(
	x=rimp.totfatb.j[ ,-1], #exclude the first column, the outcome
	y=rimp.totfatb.j[ , 1], #include only the first column, the outcome
	xtest=rimp.totfatb.w[ ,-1],
	ytest=rimp.totfatb.w[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.totfatb.rf.split.jw)
impplot(rimp.totfatb.rf.split.jw, "TotFat, train:JA, test:W, RF-imputed, bldmrks")


# train on white, test on JA, trperi:
set.seed(12)
rimp.trperib.rf.split.wj <- randomForest(
	x=rimp.trperib.w[ ,-1], #exclude the first column, the outcome
	y=rimp.trperib.w[ , 1], #include only the first column, the outcome
	xtest=rimp.trperib.j[ ,-1],
	ytest=rimp.trperib.j[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.trperib.rf.split.wj)
impplot(rimp.trperib.rf.split.wj, "Tr/peri, train:W, test:JA, RF-imputed, bldmrks")

# train on JA, test on white, trperi:
set.seed(13)
rimp.trperib.rf.split.jw <- randomForest(
	x=rimp.trperib.j[ ,-1], #exclude the first column, the outcome
	y=rimp.trperib.j[ , 1], #include only the first column, the outcome
	xtest=rimp.trperib.w[ ,-1],
	ytest=rimp.trperib.w[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.trperib.rf.split.jw)
impplot(rimp.trperib.rf.split.jw, "Tr/peri, train:JA, test:W, RF-imputed, bldmrks")


# train on JA, test on white, trperi, all markers except H19 and HLC2A5:
set.seed(130)
rimp.trperi.rf.split.jw <- randomForest(
	x=rimp.trperi.j[ ,-1], #exclude the first column, the outcome
	y=rimp.trperi.j[ , 1], #include only the first column, the outcome
	xtest=rimp.trperi.w[ ,-1],
	ytest=rimp.trperi.w[ , 1],
	importance=TRUE,
	keep.forest=TRUE,
	na.action=na.omit
)
print(rimp.trperi.rf.split.jw)
impplot(rimp.trperi.rf.split.jw, "Tr/peri, train:JA, test:W, RF-imputed, bldmrks")


# Totfat, train on white, test on JA
print(rimp.totfatb.rf.split.wj)

# Totfat, train on JA, test on white
print(rimp.totfatb.rf.split.jw)

# Trunk/peri train on white, test on JA
print(rimp.trperib.rf.split.wj)

# Trunk/peri train on JA, test on white
print(rimp.trperib.rf.split.jw)

png("totfat-WJ.png", w=600, h=600)
impplot(rimp.totfatb.rf.split.wj, "TotFat,  train:W, test:JA")
dev.off()

png("totfat-JW.png", w=600, h=600)
impplot(rimp.totfatb.rf.split.jw, "TotFat, train:JA,  test:W")
dev.off()

png("trperi-WJ.png", w=600, h=600)
impplot(rimp.trperib.rf.split.wj, "Tr/peri, train:W, test:JA")
dev.off()

png("trperi-JW.png", w=600, h=600)
impplot(rimp.trperib.rf.split.jw, "Tr/peri, train:JA, test:W")
dev.off()

png("both-totfat-trperi-wj-jw.png", w=1200, h=1200)
par(mfrow=c(2,2))
impplot(rimp.totfatb.rf.split.wj, "TotFat,  train:W, test:JA")
impplot(rimp.trperib.rf.split.wj, "Tr/peri, train:W, test:JA")
impplot(rimp.totfatb.rf.split.jw, "TotFat, train:JA,  test:W")
impplot(rimp.trperib.rf.split.jw, "Tr/peri, train:JA, test:W")
dev.off()


################

#randomization

#try fitting a stepwise model. The predictive R2 is way worse.
lmfit <- lm(CRC_FAT_TOT~., data=totfat1)
stepfit <- step(lmfit, direction="forward")
steppred <- predict(stepfit, totfat2[ ,-1])
rsq(steppred,totfat2[ ,1])

#now fit the random forest model
set.seed(14)
totfat.rf.split12 <- randomForest(
	x=totfat1[ ,-1], #exclude the first column, the outcome
	y=totfat1[ , 1], #include only the first column, the outcome
	xtest=totfat2[ ,-1],
	ytest=totfat2[ , 1],
	keep.forest=TRUE,
	importance=TRUE,
)
print(totfat.rf.split12)
rsq(predict(totfat.rf.split12, totfat2[ ,-1]), totfat2[ ,1])
png("totfat-15split.png",w=600, h=600)
impplot(totfat.rf.split12, "Total fat, 15/15 random")
dev.off()

set.seed(15)
trperi.rf.split12 <- randomForest(
	x=trperi1[ ,-1], #exclude the first column, the outcome
	y=trperi1[ , 1], #include only the first column, the outcome
	xtest=trperi2[ ,-1],
	ytest=trperi2[ , 1],
	keep.forest=TRUE,
	importance=TRUE,
)
print(trperi.rf.split12)
rsq(predict(trperi.rf.split12, trperi2[ ,-1]), trperi2$trunk_peri)
png("trperi-15split.png",w=600, h=600)
impplot(trperi.rf.split12, "Trunk-peri, 15/15 random")
dev.off()


###################################################

# logliver
set.seed(15)
liver.rf.split12 <- randomForest(
	x=liver1[ ,-1],
	y=liver1[ , 1],
	xtest=liver2[ ,-1],
	ytest=liver2[ , 1],
	keep.forest=TRUE,
	importance=TRUE,
)
print(liver.rf.split12)
rsq(predict(liver.rf.split12, liver2[ ,-1]), liver2[ ,1])
png("liver-split.png",w=600, h=600)
impplot(liver.rf.split12, "logliver, 14w/10j split")
dev.off()

# visceral
set.seed(16)
visc.rf.split12 <- randomForest(
	x=visc1[ ,-1],
	y=visc1[ , 1],
	xtest=visc2[ ,-1],
	ytest=visc2[ , 1],
	keep.forest=TRUE,
	importance=TRUE,
)
print(visc.rf.split12)
rsq(predict(visc.rf.split12, visc2[ ,-1]), visc2[ ,1])
png("visc-split.png",w=600, h=600)
impplot(visc.rf.split12, "%visceral, 14w/10j split")
dev.off()

# visceral/subc ratio
set.seed(16)
viscsub.rf.split12 <- randomForest(
	x=viscsub1[ ,-1],
	y=viscsub1[ , 1],
	xtest=viscsub2[ ,-1],
	ytest=viscsub2[ , 1],
	keep.forest=TRUE,
	importance=TRUE,
)
print(viscsub.rf.split12)
rsq(predict(viscsub.rf.split12, viscsub2[ ,-1]), viscsub2[ ,1])
png("viscsub-split.png",w=600, h=600)
impplot(viscsub.rf.split12, "visceral/subratio, 14w/10j split")
dev.off()