Skip to content

Instantly share code, notes, and snippets.

@kunsen-an
Last active May 2, 2020 05:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kunsen-an/f09966dc2ab7f3975f983091c3800fb5 to your computer and use it in GitHub Desktop.
Save kunsen-an/f09966dc2ab7f3975f983091c3800fb5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import datasets\n",
"\n",
"dataset = datasets.load_breast_cancer()\n",
"X, y = dataset.data, dataset.target"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y,\n",
" test_size=0.2,\n",
" shuffle=True,\n",
" random_state=42,\n",
" stratify=y)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import xgboost\n",
"\n",
"train_dmat = xgboost.DMatrix(X_train, label=y_train)\n",
"test_dmat = xgboost.DMatrix(X_test, label=y_test)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"evals_result = {}\n",
"\n",
"# 学習時に用いる検証用データ\n",
"evals=[\n",
" (train_dmat, 'train'),\n",
" (test_dmat, 'test'),\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# 日時を表示する関数 print_timeを戻り値とする return_callbackを定義する\n",
"import datetime\n",
"\n",
"def return_callback():\n",
" \n",
" def print_time(env):\n",
" now = datetime.datetime.now()\n",
" dt_string = now.strftime(\"%Y/%m/%d %H:%M:%S\")\n",
" i = env.iteration\n",
" if env.rank != 0 or len(env.evaluation_result_list) == 0:\n",
" print(i,dt_string)\n",
" return\n",
" msg = '\\t'.join([str(x) for x in env.evaluation_result_list])\n",
" print(i,dt_string, msg)\n",
"\n",
" return print_time\n",
"\n",
"# 日時を表示する関数をリスト callbacks にセットする\n",
"callbacks=[\n",
" return_callback()\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"train_params = {\n",
" 'objective': 'binary:logistic',\n",
" 'eval_metric': 'logloss'\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 2020/05/02 05:53:45 ('train-logloss', 0.46221)\t('test-logloss', 0.48321)\n",
"1 2020/05/02 05:53:45 ('train-logloss', 0.330034)\t('test-logloss', 0.377035)\n",
"2 2020/05/02 05:53:45 ('train-logloss', 0.246)\t('test-logloss', 0.303129)\n",
"3 2020/05/02 05:53:45 ('train-logloss', 0.185871)\t('test-logloss', 0.249715)\n",
"4 2020/05/02 05:53:45 ('train-logloss', 0.144378)\t('test-logloss', 0.223613)\n",
"5 2020/05/02 05:53:45 ('train-logloss', 0.114147)\t('test-logloss', 0.197875)\n",
"6 2020/05/02 05:53:45 ('train-logloss', 0.091335)\t('test-logloss', 0.174586)\n",
"7 2020/05/02 05:53:45 ('train-logloss', 0.075018)\t('test-logloss', 0.160585)\n",
"8 2020/05/02 05:53:45 ('train-logloss', 0.061726)\t('test-logloss', 0.14833)\n",
"9 2020/05/02 05:53:45 ('train-logloss', 0.051826)\t('test-logloss', 0.136636)\n"
]
}
],
"source": [
"booster = xgboost.train(train_params, train_dmat, \n",
" callbacks=callbacks,\n",
" # evals=evals, ... の行をコメントアウトすれば、evalsがなく、\n",
" # 検証用データの評価値はcallback関数で表示されなくなる(そもそも値がない)。\n",
" evals=evals, evals_result=evals_result,\n",
" # verbose_evalをTrueにすると evalsで指定された検証用データの評価値がラウンドごとに表示されるが\n",
" # callback の出力との区別を明確にするために Falseにしている\n",
" verbose_eval=False\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.9385964912280702\n"
]
}
],
"source": [
"import sklearn.metrics\n",
"import numpy \n",
"\n",
"y_pred_prob = booster.predict(test_dmat)\n",
"y_pred = numpy.where(y_pred_prob > 0.5, 1, 0)\n",
"acc = sklearn.metrics.accuracy_score(y_test, y_pred)\n",
"print('Accuracy:', acc)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"%matplotlib inline\n",
"# 学習過程をプロットする\n",
"from matplotlib import pyplot as plt\n",
"\n",
"train_metric = evals_result['train']['logloss']\n",
"plt.plot(train_metric, label='train logloss')\n",
"eval_metric = evals_result['test']['logloss']\n",
"plt.plot(eval_metric, label='test logloss')\n",
"plt.grid()\n",
"plt.legend()\n",
"plt.xlabel('rounds')\n",
"plt.ylabel('logloss')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment