Skip to content

Instantly share code, notes, and snippets.

@chiral
Created June 5, 2017 10:29
Show Gist options
  • Save chiral/4f7708dddd2bb4bc4889b31b9e7ce7c9 to your computer and use it in GitHub Desktop.
Save chiral/4f7708dddd2bb4bc4889b31b9e7ce7c9 to your computer and use it in GitHub Desktop.
comparison between LogisticRegression and RandomForest with "Census Income" dataset. please refer to http://archive.ics.uci.edu/ml/datasets/Adult
library(randomForest)
library(ROCR)
init <- function() {
df1 <- read.csv("adult.data",header=F)
df2 <- read.csv("adult.test",header=F)
df2$V15 <- gsub("\\.$","",df2$V15)
df <- rbind(df1,df2)
write.csv(df,"adult.all",row.names=F)
return(c(nrow(df1),nrow(df2)))
}
#--comment out this block only the first time--
#sz <- init()
#df <- read.csv("adult.all",header=T)
#train <- df[1:sz[1],]
#test <- df[sz[1]+(1:sz[2]),]
#rm(df)
#--comment out this block only the first time--
model1 <- glm(V15 ~.,family=binomial(link='logit'),data=train)
result1 <- predict(model1,newdata=test[,-15],type="response")
pred1 <- prediction(result1,test[,15])
perf1 <- performance(pred1,"prec","rec")
plot(perf1,colorize = TRUE)
model2 <- randomForest(V15 ~.,data=train,mtry=2)
result2 <- predict(model2,newdata=test[,-15],type="prob")
pred2 <- prediction(result2[,2],test[,15])
perf2 <- performance(pred2,"prec","rec")
plot(perf2,add=TRUE,colorize = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment