Last active
October 8, 2017 03:22
-
-
Save tsu-nera/c6d9c1c68951769668faa9fdcc5556c7 to your computer and use it in GitHub Desktop.
kaggle titanic rpart gridsearch w/ mlr
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"deletable": true, | |
"editable": true | |
}, | |
"source": [ | |
"# rpart" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"deletable": true, | |
"editable": true | |
}, | |
"source": [ | |
"* [RPubs \\- Kaggle Titanic R Tutorial](https://rpubs.com/anyuanay/283773)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"library(rpart)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"training <- read.csv(\"~/repo/kaggle/input/titanic/train.csv\")\n", | |
"testing <- read.csv(\"~/repo/kaggle/input/titanic/test.csv\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"trainingSel = subset(training, select = c('Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked'))\n", | |
"testingSel = subset(testing, select = c('Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked'))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"trainingSel = subset(trainingSel, Embarked != '')\n", | |
"trainingSel$Embarked = droplevels(trainingSel$Embarked, \"\")\n", | |
"testingSel = subset(testingSel, Embarked != '')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"agemedian = median(trainingSel$Age, na.rm = TRUE)\n", | |
"trainingSel$Age = replace(trainingSel$Age, is.na(trainingSel$Age), agemedian)\n", | |
"testingSel$Age = replace(testing$Age, is.na(testing$Age), agemedian)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"tree<-rpart(Survived~., #目的変数~説明変数, 「.」はy以外の全変数を説明変数とするという意味\n", | |
" data=trainingSel, #学習データ\n", | |
" maxdepth=9, #階層数\n", | |
" minbucket=60, #最小ノードサイズ\n", | |
" cp=1e-04, #枝刈りの強さ\n", | |
" method=\"class\", #分類\n", | |
" parms=list(split=\"infomation\") #分割基準, list(split=\"information\")とするとEntropy基準となる\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"pred_test<-predict(tree, testingSel, type=\"class\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": { | |
"collapsed": false, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"out<-data.frame(testing$PassengerId, pred_test)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [ | |
"write.table(out, #出力データ\n", | |
" \"~/repo/kaggle/titanic/submit/submit_1008_7_rpart.csv\", #出力先\n", | |
" quote=FALSE, #文字列を「\"」で囲む有無\n", | |
" col.names=c(\"PassengerId\", \"Survived\"), #変数名(列名)の有無\n", | |
" row.names=FALSE, #行番号の有無\n", | |
" sep=\",\" #区切り文字の指定\n", | |
")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true, | |
"deletable": true, | |
"editable": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "R", | |
"language": "R", | |
"name": "ir" | |
}, | |
"language_info": { | |
"codemirror_mode": "r", | |
"file_extension": ".r", | |
"mimetype": "text/x-r-source", | |
"name": "R", | |
"pygments_lexer": "r", | |
"version": "3.4.1" | |
}, | |
"toc": { | |
"colors": { | |
"hover_highlight": "#DAA520", | |
"navigate_num": "#000000", | |
"navigate_text": "#333333", | |
"running_highlight": "#FF0000", | |
"selected_highlight": "#FFD700", | |
"sidebar_border": "#EEEEEE", | |
"wrapper_background": "#FFFFFF" | |
}, | |
"moveMenuLeft": true, | |
"nav_menu": { | |
"height": "21px", | |
"width": "253px" | |
}, | |
"navigate_menu": true, | |
"number_sections": false, | |
"sideBar": true, | |
"threshold": 4, | |
"toc_cell": false, | |
"toc_section_display": "block", | |
"toc_window_display": false, | |
"widenNotebook": false | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment