Skip to content

Instantly share code, notes, and snippets.

@yumaueno
Created July 18, 2019 09:56
Show Gist options
  • Save yumaueno/f6ca322dd85d2779ce2646c835d2632a to your computer and use it in GitHub Desktop.
Save yumaueno/f6ca322dd85d2779ce2646c835d2632a to your computer and use it in GitHub Desktop.
##様々な手法で予測精度の比較を行う##
##randomForest,SVM,ナイーブベイズ,ニューラルネット
library(xgboost)
library(caret)
library(Matrix)
library(kernlab)
library(randomForest)
library(e1071)
library(nnet)
library(ggplot2)
titanic<-read.csv("タイタニック乗船者リスト.csv")##データの読み込み
titanic<-na.omit(titanic)##欠損値削除
titanic$Survived<-as.factor(titanic$Survived)##質的変数に変換
sim<-10##シミュレーション回数
result<-matrix(0,sim,5)##結果の箱
for(i in 1:sim){
####学習データとテストデータに分ける####
train.id<-sample(nrow(titanic),400)
train.data<-titanic[train.id,-1]
test.data<-titanic[-train.id,-1]
##randomForest##
rf<-randomForest(Survived~.,train.data)
test.data.rf<-cbind(test.data,"predict"=predict(rf,test.data))
result[i,1]<-sum(test.data.rf$predict==test.data.rf$Survived)/nrow(test.data.rf)
##SVM##
svm<-ksvm(Survived~.,train.data)
test.data.svm<-cbind(test.data,"predict"=predict(svm,test.data))
result[i,2]<-sum(test.data.svm$predict==test.data.svm$Survived)/nrow(test.data.svm)
##ナイーブベイズ##
nb<-naiveBayes(Survived~.,train.data)
test.data.nb<-cbind(test.data,"predict"=predict(nb,test.data))
result[i,3]<-sum(test.data.nb$predict==test.data.nb$Survived)/nrow(test.data.nb)
##ニューラルネット##
nn<-nnet(Survived~.,train.data,size=5)
test.data.nn<-cbind(test.data,"predict"=round(predict(nn,test.data),0))
result[i,4]<-sum(test.data.nn$predict==test.data.nn$Survived)/nrow(test.data.nn)
##XGboost##
label.data.train<-as.integer(train.data$Survived)-1##目的変数を01に
label.data.predict<-as.integer(test.data$Survived)-1
train.data.xg<-train.data[,-1]##目的変数取り除く
test.data.xg<-test.data[,-1]
xgb.data<-xgb.DMatrix(data.matrix(train.data.xg),label=label.data.train)##Xgb型へ成型
xgb.data.predict<-xgb.DMatrix(data.matrix(test.data.xg))
param <- list(
"objective" = "binary:logistic"
,"eta" = 0.01
# ,"max_depth" = 10
,"min_child_weight" = 5
# ,"subsample" = 1
# ,"colsample_bytree" = 1
)##パラメータ適当
model <- xgboost(
param=param
,data=xgb.data
,nrounds=1000
)##実際の学習
predict_xgb<-predict(model,xgb.data.predict)
a<-c()
m<-length(predict_xgb)#行数
for(l in 1:m){
if(predict_xgb[l]>0.5){a[l]<-1}
else{a[l]<-0}
}
result[i,5]<-sum(a==label.data.predict)/length(a)
}
result.c<-apply(result,2,mean)##simの平均値
result.c<-t(as.matrix(result.c))
result.c<-rbind(result.c,result.c)
##グラフ描画
colnames(result.c) <- c("RF","SVM","Bayes","NN","xgb")
df<-data.frame(method=colnames(result.c),result=colMeans(result.c))
gp<-ggplot(df,aes(x=method,y=result,fill=method))
gp<-gp+geom_bar(width=0.8,stat="identity")
gp<-gp+ylab("result")
gp<-gp+geom_text(label=colMeans(result.c),vjust=1)
plot(gp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment