Skip to content

Instantly share code, notes, and snippets.

@shurain
Created June 12, 2015 07:55
Show Gist options
  • Save shurain/8b2fc476aa0e92eca79a to your computer and use it in GitHub Desktop.
Save shurain/8b2fc476aa0e92eca79a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from pandas.io.parsers import read_csv\n",
"%matplotlib inline\n",
"\n",
"from matplotlib import pyplot as plt\n",
"import matplotlib as mpl\n",
"\n",
"import scipy\n",
"\n",
"import xgboost as xgb"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dtrain = xgb.DMatrix(\"train.buffer\")\n",
"dvalidation = xgb.DMatrix(\"validation.buffer\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true,
"scrolled": true
},
"outputs": [],
"source": [
"param = {'bst:max_depth':7, 'bst:eta':1, 'silent':1, 'objective':'binary:logistic' }\n",
"param['nthread'] = 4\n",
"plst = param.items()\n",
"plst += [('eval_metric', 'logloss')]\n",
"plst += [('eval_metric', 'auc')]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"evallist = [(dvalidation,'eval'), (dtrain,'train')]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[0]\teval-logloss:0.253334\teval-auc:0.780535\ttrain-logloss:0.248384\ttrain-auc:0.781785\n",
"[1]\teval-logloss:0.207300\teval-auc:0.785617\ttrain-logloss:0.209092\ttrain-auc:0.787094\n",
"[2]\teval-logloss:0.197886\teval-auc:0.788099\ttrain-logloss:0.195596\ttrain-auc:0.789908\n",
"[3]\teval-logloss:0.196388\teval-auc:0.789676\ttrain-logloss:0.193340\ttrain-auc:0.792229\n",
"[4]\teval-logloss:0.195568\teval-auc:0.793677\ttrain-logloss:0.192150\ttrain-auc:0.796082\n",
"[5]\teval-logloss:0.195593\teval-auc:0.795120\ttrain-logloss:0.191933\ttrain-auc:0.798321\n",
"[6]\teval-logloss:0.195982\teval-auc:0.796112\ttrain-logloss:0.192192\ttrain-auc:0.799354\n",
"[7]\teval-logloss:0.195935\teval-auc:0.797171\ttrain-logloss:0.192042\ttrain-auc:0.800681\n",
"[8]\teval-logloss:0.195762\teval-auc:0.797791\ttrain-logloss:0.191980\ttrain-auc:0.801628\n",
"[9]\teval-logloss:0.195653\teval-auc:0.799015\ttrain-logloss:0.191947\ttrain-auc:0.802798\n",
"[10]\teval-logloss:0.195533\teval-auc:0.799382\ttrain-logloss:0.191716\ttrain-auc:0.803155\n",
"[11]\teval-logloss:0.195838\teval-auc:0.800146\ttrain-logloss:0.191925\ttrain-auc:0.804124\n",
"[12]\teval-logloss:0.195927\teval-auc:0.800358\ttrain-logloss:0.191971\ttrain-auc:0.804732\n",
"[13]\teval-logloss:0.195886\teval-auc:0.800860\ttrain-logloss:0.191593\ttrain-auc:0.805456\n",
"[14]\teval-logloss:0.196272\teval-auc:0.801299\ttrain-logloss:0.191675\ttrain-auc:0.805989\n",
"[15]\teval-logloss:0.196464\teval-auc:0.801970\ttrain-logloss:0.190474\ttrain-auc:0.806655\n",
"[16]\teval-logloss:0.196404\teval-auc:0.802252\ttrain-logloss:0.190403\ttrain-auc:0.807044\n",
"[17]\teval-logloss:0.196262\teval-auc:0.802891\ttrain-logloss:0.190237\ttrain-auc:0.807700\n",
"[18]\teval-logloss:0.196599\teval-auc:0.803300\ttrain-logloss:0.190356\ttrain-auc:0.808317\n",
"[19]\teval-logloss:0.196430\teval-auc:0.803652\ttrain-logloss:0.190241\ttrain-auc:0.808611\n",
"[20]\teval-logloss:0.196436\teval-auc:0.804174\ttrain-logloss:0.190326\ttrain-auc:0.809188\n",
"[21]\teval-logloss:0.196494\teval-auc:0.804444\ttrain-logloss:0.190349\ttrain-auc:0.809547\n",
"[22]\teval-logloss:0.196617\teval-auc:0.804801\ttrain-logloss:0.190343\ttrain-auc:0.810022\n",
"[23]\teval-logloss:0.196617\teval-auc:0.805046\ttrain-logloss:0.190262\ttrain-auc:0.810345\n",
"[24]\teval-logloss:0.196569\teval-auc:0.805390\ttrain-logloss:0.190094\ttrain-auc:0.810903\n",
"[25]\teval-logloss:0.196540\teval-auc:0.805762\ttrain-logloss:0.189940\ttrain-auc:0.811338\n",
"[26]\teval-logloss:0.196496\teval-auc:0.806066\ttrain-logloss:0.189812\ttrain-auc:0.811782\n",
"[27]\teval-logloss:0.196562\teval-auc:0.806010\ttrain-logloss:0.189837\ttrain-auc:0.811808\n",
"[28]\teval-logloss:0.196723\teval-auc:0.806168\ttrain-logloss:0.190012\ttrain-auc:0.812114\n",
"[29]\teval-logloss:0.197192\teval-auc:0.806515\ttrain-logloss:0.190270\ttrain-auc:0.812461\n",
"[30]\teval-logloss:0.197457\teval-auc:0.806607\ttrain-logloss:0.190326\ttrain-auc:0.812706\n",
"[31]\teval-logloss:0.197358\teval-auc:0.806797\ttrain-logloss:0.190127\ttrain-auc:0.813055\n",
"[32]\teval-logloss:0.197435\teval-auc:0.807031\ttrain-logloss:0.190063\ttrain-auc:0.813546\n",
"[33]\teval-logloss:0.197510\teval-auc:0.807228\ttrain-logloss:0.190088\ttrain-auc:0.813798\n",
"[34]\teval-logloss:0.197452\teval-auc:0.807467\ttrain-logloss:0.190039\ttrain-auc:0.814018\n",
"[35]\teval-logloss:0.197579\teval-auc:0.807707\ttrain-logloss:0.190044\ttrain-auc:0.814438\n",
"[36]\teval-logloss:0.197523\teval-auc:0.807915\ttrain-logloss:0.189938\ttrain-auc:0.814649\n",
"[37]\teval-logloss:0.197562\teval-auc:0.808144\ttrain-logloss:0.189920\ttrain-auc:0.815010\n",
"[38]\teval-logloss:0.197583\teval-auc:0.808244\ttrain-logloss:0.189900\ttrain-auc:0.815211\n",
"[39]\teval-logloss:0.197524\teval-auc:0.808328\ttrain-logloss:0.189815\ttrain-auc:0.815430\n"
]
}
],
"source": [
"num_round = 40\n",
"bst = xgb.train( plst, dtrain, num_round, evallist )"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"bst.save_model(\"0001.model\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tr_leafindex = bst.predict(dtrain, pred_leaf=True)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"va_leafindex = bst.predict(dvalidation, pred_leaf=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"tr_y = dtrain.get_label()\n",
"va_y = dvalidation.get_label()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"np.save(\"tr.xgb\", tr_leafindex)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"np.save(\"va.xgb\", va_leafindex)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"np.save(\"tr.label\", tr_y)\n",
"np.save(\"va.label\", va_y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment