Skip to content

Instantly share code, notes, and snippets.

@daxiongshu
Last active July 13, 2019 01:05
Show Gist options
  • Save daxiongshu/d46a45d22c167419964f71f22315148c to your computer and use it in GitHub Desktop.
Save daxiongshu/d46a45d22c167419964f71f22315148c to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(200000, 202)\n"
]
}
],
"source": [
"import cudf as gd\n",
"import pandas as pd\n",
"import time\n",
"import xgboost as xgb\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"PATH = '../input'\n",
"cols = ['ID_code', 'target'] + ['var_%d'%i for i in range(200)]\n",
"dtypes = ['int32', 'int32'] + ['float32' for i in range(200)]\n",
"train_gd = gd.read_csv('%s/train.csv'%PATH,names=cols,dtype=dtypes,skiprows=1)\n",
"print(train_gd.shape)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"train,valid = train_gd[:-10000],train_gd[-10000:]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"x_train = train.drop(['target','ID_code'])\n",
"y_train = train['target']\n",
"x_valid = valid.drop(['target','ID_code'])\n",
"y_valid = valid['target']"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 9.17 s, sys: 1.68 s, total: 10.9 s\n",
"Wall time: 1.8 s\n"
]
}
],
"source": [
"%%time\n",
"xgb_params = {\n",
" 'objective': 'binary:logistic',\n",
" 'tree_method': 'gpu_hist',\n",
" 'max_depth': 1, \n",
" 'eta':0.1,\n",
" 'silent':1,\n",
" 'subsample':0.5,\n",
" 'colsample_bytree': 0.05, \n",
" 'eval_metric':'auc',\n",
"}\n",
"dtrain = xgb.DMatrix(data=x_train.to_pandas(), label=y_train.to_pandas())\n",
"dvalid = xgb.DMatrix(data=x_valid.to_pandas(), label=y_valid.to_pandas())"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0]\teval-auc:0.523106\ttrain-auc:0.524118\n",
"Multiple eval metrics have been passed: 'train-auc' will be used for early stopping.\n",
"\n",
"Will train until train-auc hasn't improved in 30 rounds.\n",
"[1000]\teval-auc:0.877147\ttrain-auc:0.886773\n",
"[2000]\teval-auc:0.889509\ttrain-auc:0.90243\n",
"[3000]\teval-auc:0.89368\ttrain-auc:0.907889\n",
"[4000]\teval-auc:0.895291\ttrain-auc:0.910346\n",
"Stopping. Best iteration:\n",
"[4876]\teval-auc:0.895474\ttrain-auc:0.911888\n",
"\n"
]
}
],
"source": [
"watchlist = [(dvalid, 'eval'), (dtrain, 'train')]\n",
"clf = xgb.train(xgb_params, dtrain=dtrain,\n",
" num_boost_round=10000,evals=watchlist,\n",
" early_stopping_rounds=30,maximize=True,\n",
" verbose_eval=1000)\n",
"yp = clf.predict(dvalid)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment