Created
July 18, 2019 09:56
-
-
Save yumaueno/f6ca322dd85d2779ce2646c835d2632a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##様々な手法で予測精度の比較を行う## | |
##randomForest,SVM,ナイーブベイズ,ニューラルネット | |
library(xgboost) | |
library(caret) | |
library(Matrix) | |
library(kernlab) | |
library(randomForest) | |
library(e1071) | |
library(nnet) | |
library(ggplot2) | |
titanic<-read.csv("タイタニック乗船者リスト.csv")##データの読み込み | |
titanic<-na.omit(titanic)##欠損値削除 | |
titanic$Survived<-as.factor(titanic$Survived)##質的変数に変換 | |
sim<-10##シミュレーション回数 | |
result<-matrix(0,sim,5)##結果の箱 | |
for(i in 1:sim){ | |
####学習データとテストデータに分ける#### | |
train.id<-sample(nrow(titanic),400) | |
train.data<-titanic[train.id,-1] | |
test.data<-titanic[-train.id,-1] | |
##randomForest## | |
rf<-randomForest(Survived~.,train.data) | |
test.data.rf<-cbind(test.data,"predict"=predict(rf,test.data)) | |
result[i,1]<-sum(test.data.rf$predict==test.data.rf$Survived)/nrow(test.data.rf) | |
##SVM## | |
svm<-ksvm(Survived~.,train.data) | |
test.data.svm<-cbind(test.data,"predict"=predict(svm,test.data)) | |
result[i,2]<-sum(test.data.svm$predict==test.data.svm$Survived)/nrow(test.data.svm) | |
##ナイーブベイズ## | |
nb<-naiveBayes(Survived~.,train.data) | |
test.data.nb<-cbind(test.data,"predict"=predict(nb,test.data)) | |
result[i,3]<-sum(test.data.nb$predict==test.data.nb$Survived)/nrow(test.data.nb) | |
##ニューラルネット## | |
nn<-nnet(Survived~.,train.data,size=5) | |
test.data.nn<-cbind(test.data,"predict"=round(predict(nn,test.data),0)) | |
result[i,4]<-sum(test.data.nn$predict==test.data.nn$Survived)/nrow(test.data.nn) | |
##XGboost## | |
label.data.train<-as.integer(train.data$Survived)-1##目的変数を01に | |
label.data.predict<-as.integer(test.data$Survived)-1 | |
train.data.xg<-train.data[,-1]##目的変数取り除く | |
test.data.xg<-test.data[,-1] | |
xgb.data<-xgb.DMatrix(data.matrix(train.data.xg),label=label.data.train)##Xgb型へ成型 | |
xgb.data.predict<-xgb.DMatrix(data.matrix(test.data.xg)) | |
param <- list( | |
"objective" = "binary:logistic" | |
,"eta" = 0.01 | |
# ,"max_depth" = 10 | |
,"min_child_weight" = 5 | |
# ,"subsample" = 1 | |
# ,"colsample_bytree" = 1 | |
)##パラメータ適当 | |
model <- xgboost( | |
param=param | |
,data=xgb.data | |
,nrounds=1000 | |
)##実際の学習 | |
predict_xgb<-predict(model,xgb.data.predict) | |
a<-c() | |
m<-length(predict_xgb)#行数 | |
for(l in 1:m){ | |
if(predict_xgb[l]>0.5){a[l]<-1} | |
else{a[l]<-0} | |
} | |
result[i,5]<-sum(a==label.data.predict)/length(a) | |
} | |
result.c<-apply(result,2,mean)##simの平均値 | |
result.c<-t(as.matrix(result.c)) | |
result.c<-rbind(result.c,result.c) | |
##グラフ描画 | |
colnames(result.c) <- c("RF","SVM","Bayes","NN","xgb") | |
df<-data.frame(method=colnames(result.c),result=colMeans(result.c)) | |
gp<-ggplot(df,aes(x=method,y=result,fill=method)) | |
gp<-gp+geom_bar(width=0.8,stat="identity") | |
gp<-gp+ylab("result") | |
gp<-gp+geom_text(label=colMeans(result.c),vjust=1) | |
plot(gp) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment