Skip to content

Instantly share code, notes, and snippets.

@sinhrks
Last active February 22, 2018 06:13
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save sinhrks/cc9a88f74074fc296e12 to your computer and use it in GitHub Desktop.
Save sinhrks/cc9a88f74074fc296e12 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'0.4'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import xgboost as xgb\n",
"from sklearn import datasets\n",
"\n",
"import matplotlib.pyplot as plt\n",
"plt.style.use('ggplot')\n",
"\n",
"xgb.__version__"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create DMatrix from pandas.DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SepalLength</th>\n",
" <th>SepalWidth</th>\n",
" <th>PetalLength</th>\n",
" <th>PetalWidth</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>5.1</td>\n",
" <td>3.5</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>4.9</td>\n",
" <td>3.0</td>\n",
" <td>1.4</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>4.7</td>\n",
" <td>3.2</td>\n",
" <td>1.3</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4.6</td>\n",
" <td>3.1</td>\n",
" <td>1.5</td>\n",
" <td>0.2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>6.3</td>\n",
" <td>2.5</td>\n",
" <td>5.0</td>\n",
" <td>1.9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>6.5</td>\n",
" <td>3.0</td>\n",
" <td>5.2</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>6.2</td>\n",
" <td>3.4</td>\n",
" <td>5.4</td>\n",
" <td>2.3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>149</th>\n",
" <td>5.9</td>\n",
" <td>3.0</td>\n",
" <td>5.1</td>\n",
" <td>1.8</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>150 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" SepalLength SepalWidth PetalLength PetalWidth\n",
"0 5.1 3.5 1.4 0.2\n",
"1 4.9 3.0 1.4 0.2\n",
"2 4.7 3.2 1.3 0.2\n",
"3 4.6 3.1 1.5 0.2\n",
".. ... ... ... ...\n",
"146 6.3 2.5 5.0 1.9\n",
"147 6.5 3.0 5.2 2.0\n",
"148 6.2 3.4 5.4 2.3\n",
"149 5.9 3.0 5.1 1.8\n",
"\n",
"[150 rows x 4 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"iris = datasets.load_iris()\n",
"\n",
"import pandas as pd\n",
"pd.set_option('display.max_rows', 8)\n",
"\n",
"train = pd.DataFrame(iris.data, columns=['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth'])\n",
"train"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(150L, 4L)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dm = xgb.DMatrix(train, label=iris.target)\n",
"dm.num_row(), dm.num_col()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['SepalLength', 'SepalWidth', 'PetalLength', 'PetalWidth']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dm.feature_names"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"['q', 'q', 'q', 'q']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dm.feature_types"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### cv now returns pandas.DataFrame or np.ndarray"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test-mlogloss-mean</th>\n",
" <th>test-mlogloss-std</th>\n",
" <th>train-mlogloss-mean</th>\n",
" <th>train-mlogloss-std</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.753459</td>\n",
" <td>0.027033</td>\n",
" <td>0.737631</td>\n",
" <td>0.003818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.552303</td>\n",
" <td>0.048738</td>\n",
" <td>0.526929</td>\n",
" <td>0.005102</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.423481</td>\n",
" <td>0.066469</td>\n",
" <td>0.390115</td>\n",
" <td>0.005873</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.339942</td>\n",
" <td>0.082163</td>\n",
" <td>0.295637</td>\n",
" <td>0.006148</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.219242</td>\n",
" <td>0.124195</td>\n",
" <td>0.143760</td>\n",
" <td>0.006318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.200365</td>\n",
" <td>0.137163</td>\n",
" <td>0.116560</td>\n",
" <td>0.006130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.187477</td>\n",
" <td>0.145066</td>\n",
" <td>0.096047</td>\n",
" <td>0.005444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.181228</td>\n",
" <td>0.156536</td>\n",
" <td>0.080041</td>\n",
" <td>0.005265</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n",
"0 0.753459 0.027033 0.737631 \n",
"1 0.552303 0.048738 0.526929 \n",
"2 0.423481 0.066469 0.390115 \n",
"3 0.339942 0.082163 0.295637 \n",
".. ... ... ... \n",
"6 0.219242 0.124195 0.143760 \n",
"7 0.200365 0.137163 0.116560 \n",
"8 0.187477 0.145066 0.096047 \n",
"9 0.181228 0.156536 0.080041 \n",
"\n",
" train-mlogloss-std \n",
"0 0.003818 \n",
"1 0.005102 \n",
"2 0.005873 \n",
"3 0.006148 \n",
".. ... \n",
"6 0.006318 \n",
"7 0.006130 \n",
"8 0.005444 \n",
"9 0.005265 \n",
"\n",
"[10 rows x 4 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"params={'objective': 'multi:softprob',\n",
" 'eval_metric': 'mlogloss',\n",
" 'eta': 0.3,\n",
" 'num_class': 3}\n",
"\n",
"# default (returns pd.DataFrame, progress report is disabled)\n",
"# if pandas is not installed, it works as specifying as_pandas=False (see below)\n",
"xgb.cv(params, dm, num_boost_round=10, nfold=10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[0]\tcv-test-mlogloss:0.7534586+0.0270330788894\tcv-train-mlogloss:0.7376308+0.00381774878168\n",
"[1]\tcv-test-mlogloss:0.5523035+0.0487375163775\tcv-train-mlogloss:0.5269287+0.00510218267117\n",
"[2]\tcv-test-mlogloss:0.4234808+0.0664692426297\tcv-train-mlogloss:0.3901153+0.00587297442613\n",
"[3]\tcv-test-mlogloss:0.3399421+0.082162847007\tcv-train-mlogloss:0.2956371+0.0061480195421\n",
"[4]\tcv-test-mlogloss:0.2822133+0.094546005664\tcv-train-mlogloss:0.2284948+0.00582542583508\n",
"[5]\tcv-test-mlogloss:0.2445561+0.108854130468\tcv-train-mlogloss:0.1798245+0.00614406771203\n",
"[6]\tcv-test-mlogloss:0.2192424+0.124194576109\tcv-train-mlogloss:0.1437596+0.0063180363595\n",
"[7]\tcv-test-mlogloss:0.2003654+0.137162644979\tcv-train-mlogloss:0.1165601+0.00612969169616\n",
"[8]\tcv-test-mlogloss:0.1874767+0.145066157809\tcv-train-mlogloss:0.0960466+0.00544370076694\n",
"[9]\tcv-test-mlogloss:0.1812277+0.156536125975\tcv-train-mlogloss:0.0800408+0.0052649908224\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test-mlogloss-mean</th>\n",
" <th>test-mlogloss-std</th>\n",
" <th>train-mlogloss-mean</th>\n",
" <th>train-mlogloss-std</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.753459</td>\n",
" <td>0.027033</td>\n",
" <td>0.737631</td>\n",
" <td>0.003818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.552303</td>\n",
" <td>0.048738</td>\n",
" <td>0.526929</td>\n",
" <td>0.005102</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.423481</td>\n",
" <td>0.066469</td>\n",
" <td>0.390115</td>\n",
" <td>0.005873</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.339942</td>\n",
" <td>0.082163</td>\n",
" <td>0.295637</td>\n",
" <td>0.006148</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.219242</td>\n",
" <td>0.124195</td>\n",
" <td>0.143760</td>\n",
" <td>0.006318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.200365</td>\n",
" <td>0.137163</td>\n",
" <td>0.116560</td>\n",
" <td>0.006130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.187477</td>\n",
" <td>0.145066</td>\n",
" <td>0.096047</td>\n",
" <td>0.005444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.181228</td>\n",
" <td>0.156536</td>\n",
" <td>0.080041</td>\n",
" <td>0.005265</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n",
"0 0.753459 0.027033 0.737631 \n",
"1 0.552303 0.048738 0.526929 \n",
"2 0.423481 0.066469 0.390115 \n",
"3 0.339942 0.082163 0.295637 \n",
".. ... ... ... \n",
"6 0.219242 0.124195 0.143760 \n",
"7 0.200365 0.137163 0.116560 \n",
"8 0.187477 0.145066 0.096047 \n",
"9 0.181228 0.156536 0.080041 \n",
"\n",
" train-mlogloss-std \n",
"0 0.003818 \n",
"1 0.005102 \n",
"2 0.005873 \n",
"3 0.006148 \n",
".. ... \n",
"6 0.006318 \n",
"7 0.006130 \n",
"8 0.005444 \n",
"9 0.005265 \n",
"\n",
"[10 rows x 4 columns]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Specifying show_progress explishitly to display progress\n",
"xgb.cv(params, dm, num_boost_round=10, nfold=10, show_progress=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[0]\tcv-test-mlogloss:0.7534586\tcv-train-mlogloss:0.7376308\n",
"[1]\tcv-test-mlogloss:0.5523035\tcv-train-mlogloss:0.5269287\n",
"[2]\tcv-test-mlogloss:0.4234808\tcv-train-mlogloss:0.3901153\n",
"[3]\tcv-test-mlogloss:0.3399421\tcv-train-mlogloss:0.2956371\n",
"[4]\tcv-test-mlogloss:0.2822133\tcv-train-mlogloss:0.2284948\n",
"[5]\tcv-test-mlogloss:0.2445561\tcv-train-mlogloss:0.1798245\n",
"[6]\tcv-test-mlogloss:0.2192424\tcv-train-mlogloss:0.1437596\n",
"[7]\tcv-test-mlogloss:0.2003654\tcv-train-mlogloss:0.1165601\n",
"[8]\tcv-test-mlogloss:0.1874767\tcv-train-mlogloss:0.0960466\n",
"[9]\tcv-test-mlogloss:0.1812277\tcv-train-mlogloss:0.0800408\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test-mlogloss-mean</th>\n",
" <th>test-mlogloss-std</th>\n",
" <th>train-mlogloss-mean</th>\n",
" <th>train-mlogloss-std</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.753459</td>\n",
" <td>0.027033</td>\n",
" <td>0.737631</td>\n",
" <td>0.003818</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.552303</td>\n",
" <td>0.048738</td>\n",
" <td>0.526929</td>\n",
" <td>0.005102</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.423481</td>\n",
" <td>0.066469</td>\n",
" <td>0.390115</td>\n",
" <td>0.005873</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.339942</td>\n",
" <td>0.082163</td>\n",
" <td>0.295637</td>\n",
" <td>0.006148</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.219242</td>\n",
" <td>0.124195</td>\n",
" <td>0.143760</td>\n",
" <td>0.006318</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.200365</td>\n",
" <td>0.137163</td>\n",
" <td>0.116560</td>\n",
" <td>0.006130</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.187477</td>\n",
" <td>0.145066</td>\n",
" <td>0.096047</td>\n",
" <td>0.005444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.181228</td>\n",
" <td>0.156536</td>\n",
" <td>0.080041</td>\n",
" <td>0.005265</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" test-mlogloss-mean test-mlogloss-std train-mlogloss-mean \\\n",
"0 0.753459 0.027033 0.737631 \n",
"1 0.552303 0.048738 0.526929 \n",
"2 0.423481 0.066469 0.390115 \n",
"3 0.339942 0.082163 0.295637 \n",
".. ... ... ... \n",
"6 0.219242 0.124195 0.143760 \n",
"7 0.200365 0.137163 0.116560 \n",
"8 0.187477 0.145066 0.096047 \n",
"9 0.181228 0.156536 0.080041 \n",
"\n",
" train-mlogloss-std \n",
"0 0.003818 \n",
"1 0.005102 \n",
"2 0.005873 \n",
"3 0.006148 \n",
".. ... \n",
"6 0.006318 \n",
"7 0.006130 \n",
"8 0.005444 \n",
"9 0.005265 \n",
"\n",
"[10 rows x 4 columns]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Specify show_stdv=False to hide stdv from progress show stdv (for back-compat)\n",
"# Note that result contains stdv always \n",
"xgb.cv(params, dm, num_boost_round=10, nfold=10, show_progress=True, show_stdv=False)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[0]\tcv-test-mlogloss:0.7534586+0.0270330788894\tcv-train-mlogloss:0.7376308+0.00381774878168\n",
"[1]\tcv-test-mlogloss:0.5523035+0.0487375163775\tcv-train-mlogloss:0.5269287+0.00510218267117\n",
"[2]\tcv-test-mlogloss:0.4234808+0.0664692426297\tcv-train-mlogloss:0.3901153+0.00587297442613\n",
"[3]\tcv-test-mlogloss:0.3399421+0.082162847007\tcv-train-mlogloss:0.2956371+0.0061480195421\n",
"[4]\tcv-test-mlogloss:0.2822133+0.094546005664\tcv-train-mlogloss:0.2284948+0.00582542583508\n",
"[5]\tcv-test-mlogloss:0.2445561+0.108854130468\tcv-train-mlogloss:0.1798245+0.00614406771203\n",
"[6]\tcv-test-mlogloss:0.2192424+0.124194576109\tcv-train-mlogloss:0.1437596+0.0063180363595\n",
"[7]\tcv-test-mlogloss:0.2003654+0.137162644979\tcv-train-mlogloss:0.1165601+0.00612969169616\n",
"[8]\tcv-test-mlogloss:0.1874767+0.145066157809\tcv-train-mlogloss:0.0960466+0.00544370076694\n",
"[9]\tcv-test-mlogloss:0.1812277+0.156536125975\tcv-train-mlogloss:0.0800408+0.0052649908224\n"
]
},
{
"data": {
"text/plain": [
"array([[ 0.7534586 , 0.02703308, 0.7376308 , 0.00381775],\n",
" [ 0.5523035 , 0.04873752, 0.5269287 , 0.00510218],\n",
" [ 0.4234808 , 0.06646924, 0.3901153 , 0.00587297],\n",
" [ 0.3399421 , 0.08216285, 0.2956371 , 0.00614802],\n",
" [ 0.2822133 , 0.09454601, 0.2284948 , 0.00582543],\n",
" [ 0.2445561 , 0.10885413, 0.1798245 , 0.00614407],\n",
" [ 0.2192424 , 0.12419458, 0.1437596 , 0.00631804],\n",
" [ 0.2003654 , 0.13716264, 0.1165601 , 0.00612969],\n",
" [ 0.1874767 , 0.14506616, 0.0960466 , 0.0054437 ],\n",
" [ 0.1812277 , 0.15653613, 0.0800408 , 0.00526499]])"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# specifying as_pandas=False returns np.ndarray\n",
"# progress report is enabled because returned np.ndarray can't contain metadata\n",
"xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0.7534586 , 0.02703308, 0.7376308 , 0.00381775],\n",
" [ 0.5523035 , 0.04873752, 0.5269287 , 0.00510218],\n",
" [ 0.4234808 , 0.06646924, 0.3901153 , 0.00587297],\n",
" [ 0.3399421 , 0.08216285, 0.2956371 , 0.00614802],\n",
" [ 0.2822133 , 0.09454601, 0.2284948 , 0.00582543],\n",
" [ 0.2445561 , 0.10885413, 0.1798245 , 0.00614407],\n",
" [ 0.2192424 , 0.12419458, 0.1437596 , 0.00631804],\n",
" [ 0.2003654 , 0.13716264, 0.1165601 , 0.00612969],\n",
" [ 0.1874767 , 0.14506616, 0.0960466 , 0.0054437 ],\n",
" [ 0.1812277 , 0.15653613, 0.0800408 , 0.00526499]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# we can hide progress exlicitly\n",
"xgb.cv(params, dm, num_boost_round=10, nfold=10, as_pandas=False, show_progress=False)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>test-error-mean</th>\n",
" <th>test-error-std</th>\n",
" <th>train-error-mean</th>\n",
" <th>train-error-std</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.046544</td>\n",
" <td>0.007774</td>\n",
" <td>0.046544</td>\n",
" <td>0.000864</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.022273</td>\n",
" <td>0.004821</td>\n",
" <td>0.022273</td>\n",
" <td>0.000536</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.007066</td>\n",
" <td>0.002678</td>\n",
" <td>0.007066</td>\n",
" <td>0.000298</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.015207</td>\n",
" <td>0.003791</td>\n",
" <td>0.015207</td>\n",
" <td>0.000421</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.001229</td>\n",
" <td>0.001339</td>\n",
" <td>0.001229</td>\n",
" <td>0.000149</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.001229</td>\n",
" <td>0.001339</td>\n",
" <td>0.001229</td>\n",
" <td>0.000149</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.001229</td>\n",
" <td>0.001339</td>\n",
" <td>0.000956</td>\n",
" <td>0.000496</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.001229</td>\n",
" <td>0.001339</td>\n",
" <td>0.000683</td>\n",
" <td>0.000566</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" test-error-mean test-error-std train-error-mean train-error-std\n",
"0 0.046544 0.007774 0.046544 0.000864\n",
"1 0.022273 0.004821 0.022273 0.000536\n",
"2 0.007066 0.002678 0.007066 0.000298\n",
"3 0.015207 0.003791 0.015207 0.000421\n",
".. ... ... ... ...\n",
"6 0.001229 0.001339 0.001229 0.000149\n",
"7 0.001229 0.001339 0.001229 0.000149\n",
"8 0.001229 0.001339 0.000956 0.000496\n",
"9 0.001229 0.001339 0.000683 0.000566\n",
"\n",
"[10 rows x 4 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# column name will be changed depending on eval function\n",
"\n",
"dpath = '/Users/sin/Documents/Git/xgboost/demo/data/'\n",
"dtrain = xgb.DMatrix(dpath + 'agaricus.txt.train')\n",
"param = {'max_depth':2, 'eta':1, 'silent':1, 'objective':'binary:logistic' }\n",
"# specify validations set to watch performance\n",
"xgb.cv(param, dtrain, num_boost_round=10, nfold=10)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment