-
-
Save shurain/8b2fc476aa0e92eca79a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"from pandas.io.parsers import read_csv\n", | |
"%matplotlib inline\n", | |
"\n", | |
"from matplotlib import pyplot as plt\n", | |
"import matplotlib as mpl\n", | |
"\n", | |
"import scipy\n", | |
"\n", | |
"import xgboost as xgb" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"dtrain = xgb.DMatrix(\"train.buffer\")\n", | |
"dvalidation = xgb.DMatrix(\"validation.buffer\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true, | |
"scrolled": true | |
}, | |
"outputs": [], | |
"source": [ | |
"param = {'bst:max_depth':7, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }\n", | |
"param['nthread'] = 4\n", | |
"plst = param.items()\n", | |
"plst += [('eval_metric', 'logloss')]\n", | |
"plst += [('eval_metric', 'auc')]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"evallist = [(dvalidation,'eval'), (dtrain,'train')]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"[0]\teval-logloss:0.253334\teval-auc:0.780535\ttrain-logloss:0.248384\ttrain-auc:0.781785\n", | |
"[1]\teval-logloss:0.207300\teval-auc:0.785617\ttrain-logloss:0.209092\ttrain-auc:0.787094\n", | |
"[2]\teval-logloss:0.197886\teval-auc:0.788099\ttrain-logloss:0.195596\ttrain-auc:0.789908\n", | |
"[3]\teval-logloss:0.196388\teval-auc:0.789676\ttrain-logloss:0.193340\ttrain-auc:0.792229\n", | |
"[4]\teval-logloss:0.195568\teval-auc:0.793677\ttrain-logloss:0.192150\ttrain-auc:0.796082\n", | |
"[5]\teval-logloss:0.195593\teval-auc:0.795120\ttrain-logloss:0.191933\ttrain-auc:0.798321\n", | |
"[6]\teval-logloss:0.195982\teval-auc:0.796112\ttrain-logloss:0.192192\ttrain-auc:0.799354\n", | |
"[7]\teval-logloss:0.195935\teval-auc:0.797171\ttrain-logloss:0.192042\ttrain-auc:0.800681\n", | |
"[8]\teval-logloss:0.195762\teval-auc:0.797791\ttrain-logloss:0.191980\ttrain-auc:0.801628\n", | |
"[9]\teval-logloss:0.195653\teval-auc:0.799015\ttrain-logloss:0.191947\ttrain-auc:0.802798\n", | |
"[10]\teval-logloss:0.195533\teval-auc:0.799382\ttrain-logloss:0.191716\ttrain-auc:0.803155\n", | |
"[11]\teval-logloss:0.195838\teval-auc:0.800146\ttrain-logloss:0.191925\ttrain-auc:0.804124\n", | |
"[12]\teval-logloss:0.195927\teval-auc:0.800358\ttrain-logloss:0.191971\ttrain-auc:0.804732\n", | |
"[13]\teval-logloss:0.195886\teval-auc:0.800860\ttrain-logloss:0.191593\ttrain-auc:0.805456\n", | |
"[14]\teval-logloss:0.196272\teval-auc:0.801299\ttrain-logloss:0.191675\ttrain-auc:0.805989\n", | |
"[15]\teval-logloss:0.196464\teval-auc:0.801970\ttrain-logloss:0.190474\ttrain-auc:0.806655\n", | |
"[16]\teval-logloss:0.196404\teval-auc:0.802252\ttrain-logloss:0.190403\ttrain-auc:0.807044\n", | |
"[17]\teval-logloss:0.196262\teval-auc:0.802891\ttrain-logloss:0.190237\ttrain-auc:0.807700\n", | |
"[18]\teval-logloss:0.196599\teval-auc:0.803300\ttrain-logloss:0.190356\ttrain-auc:0.808317\n", | |
"[19]\teval-logloss:0.196430\teval-auc:0.803652\ttrain-logloss:0.190241\ttrain-auc:0.808611\n", | |
"[20]\teval-logloss:0.196436\teval-auc:0.804174\ttrain-logloss:0.190326\ttrain-auc:0.809188\n", | |
"[21]\teval-logloss:0.196494\teval-auc:0.804444\ttrain-logloss:0.190349\ttrain-auc:0.809547\n", | |
"[22]\teval-logloss:0.196617\teval-auc:0.804801\ttrain-logloss:0.190343\ttrain-auc:0.810022\n", | |
"[23]\teval-logloss:0.196617\teval-auc:0.805046\ttrain-logloss:0.190262\ttrain-auc:0.810345\n", | |
"[24]\teval-logloss:0.196569\teval-auc:0.805390\ttrain-logloss:0.190094\ttrain-auc:0.810903\n", | |
"[25]\teval-logloss:0.196540\teval-auc:0.805762\ttrain-logloss:0.189940\ttrain-auc:0.811338\n", | |
"[26]\teval-logloss:0.196496\teval-auc:0.806066\ttrain-logloss:0.189812\ttrain-auc:0.811782\n", | |
"[27]\teval-logloss:0.196562\teval-auc:0.806010\ttrain-logloss:0.189837\ttrain-auc:0.811808\n", | |
"[28]\teval-logloss:0.196723\teval-auc:0.806168\ttrain-logloss:0.190012\ttrain-auc:0.812114\n", | |
"[29]\teval-logloss:0.197192\teval-auc:0.806515\ttrain-logloss:0.190270\ttrain-auc:0.812461\n", | |
"[30]\teval-logloss:0.197457\teval-auc:0.806607\ttrain-logloss:0.190326\ttrain-auc:0.812706\n", | |
"[31]\teval-logloss:0.197358\teval-auc:0.806797\ttrain-logloss:0.190127\ttrain-auc:0.813055\n", | |
"[32]\teval-logloss:0.197435\teval-auc:0.807031\ttrain-logloss:0.190063\ttrain-auc:0.813546\n", | |
"[33]\teval-logloss:0.197510\teval-auc:0.807228\ttrain-logloss:0.190088\ttrain-auc:0.813798\n", | |
"[34]\teval-logloss:0.197452\teval-auc:0.807467\ttrain-logloss:0.190039\ttrain-auc:0.814018\n", | |
"[35]\teval-logloss:0.197579\teval-auc:0.807707\ttrain-logloss:0.190044\ttrain-auc:0.814438\n", | |
"[36]\teval-logloss:0.197523\teval-auc:0.807915\ttrain-logloss:0.189938\ttrain-auc:0.814649\n", | |
"[37]\teval-logloss:0.197562\teval-auc:0.808144\ttrain-logloss:0.189920\ttrain-auc:0.815010\n", | |
"[38]\teval-logloss:0.197583\teval-auc:0.808244\ttrain-logloss:0.189900\ttrain-auc:0.815211\n", | |
"[39]\teval-logloss:0.197524\teval-auc:0.808328\ttrain-logloss:0.189815\ttrain-auc:0.815430\n" | |
] | |
} | |
], | |
"source": [ | |
"num_round = 40\n", | |
"bst = xgb.train( plst, dtrain, num_round, evallist )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"bst.save_model(\"0001.model\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tr_leafindex = bst.predict(dtrain, pred_leaf=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"va_leafindex = bst.predict(dvalidation, pred_leaf=True)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [], | |
"source": [ | |
"tr_y = dtrain.get_label()\n", | |
"va_y = dvalidation.get_label()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"np.save(\"tr.xgb\", tr_leafindex)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"np.save(\"va.xgb\", va_leafindex)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"np.save(\"tr.label\", tr_y)\n", | |
"np.save(\"va.label\", va_y)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.6" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment