Skip to content

Instantly share code, notes, and snippets.

@tsu-nera
Last active October 8, 2017 03:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tsu-nera/c6d9c1c68951769668faa9fdcc5556c7 to your computer and use it in GitHub Desktop.
Save tsu-nera/c6d9c1c68951769668faa9fdcc5556c7 to your computer and use it in GitHub Desktop.
kaggle titanic rpart gridsearch w/ mlr
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"# rpart"
]
},
{
"cell_type": "markdown",
"metadata": {
"deletable": true,
"editable": true
},
"source": [
"* [RPubs \\- Kaggle Titanic R Tutorial](https://rpubs.com/anyuanay/283773)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"library(rpart)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"training <- read.csv(\"~/repo/kaggle/input/titanic/train.csv\")\n",
"testing <- read.csv(\"~/repo/kaggle/input/titanic/test.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"trainingSel = subset(training, select = c('Survived', 'Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked'))\n",
"testingSel = subset(testing, select = c('Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Embarked'))"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"trainingSel = subset(trainingSel, Embarked != '')\n",
"trainingSel$Embarked = droplevels(trainingSel$Embarked, \"\")\n",
"testingSel = subset(testingSel, Embarked != '')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"agemedian = median(trainingSel$Age, na.rm = TRUE)\n",
"trainingSel$Age = replace(trainingSel$Age, is.na(trainingSel$Age), agemedian)\n",
"testingSel$Age = replace(testing$Age, is.na(testing$Age), agemedian)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"tree<-rpart(Survived~., #目的変数~説明変数, 「.」はy以外の全変数を説明変数とするという意味\n",
" data=trainingSel, #学習データ\n",
" maxdepth=9, #階層数\n",
" minbucket=60, #最小ノードサイズ\n",
" cp=1e-04, #枝刈りの強さ\n",
" method=\"class\", #分類\n",
" parms=list(split=\"infomation\") #分割基準, list(split=\"information\")とするとEntropy基準となる\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"pred_test<-predict(tree, testingSel, type=\"class\")"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"out<-data.frame(testing$PassengerId, pred_test)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": [
"write.table(out, #出力データ\n",
" \"~/repo/kaggle/titanic/submit/submit_1008_7_rpart.csv\", #出力先\n",
" quote=FALSE, #文字列を「\"」で囲む有無\n",
" col.names=c(\"PassengerId\", \"Survived\"), #変数名(列名)の有無\n",
" row.names=FALSE, #行番号の有無\n",
" sep=\",\" #区切り文字の指定\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true,
"deletable": true,
"editable": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"codemirror_mode": "r",
"file_extension": ".r",
"mimetype": "text/x-r-source",
"name": "R",
"pygments_lexer": "r",
"version": "3.4.1"
},
"toc": {
"colors": {
"hover_highlight": "#DAA520",
"navigate_num": "#000000",
"navigate_text": "#333333",
"running_highlight": "#FF0000",
"selected_highlight": "#FFD700",
"sidebar_border": "#EEEEEE",
"wrapper_background": "#FFFFFF"
},
"moveMenuLeft": true,
"nav_menu": {
"height": "21px",
"width": "253px"
},
"navigate_menu": true,
"number_sections": false,
"sideBar": true,
"threshold": 4,
"toc_cell": false,
"toc_section_display": "block",
"toc_window_display": false,
"widenNotebook": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment