Skip to content

Instantly share code, notes, and snippets.

@phonism
Last active March 17, 2016 11:20
Show Gist options
  • Save phonism/d550d059eb9270a026ff to your computer and use it in GitHub Desktop.
Save phonism/d550d059eb9270a026ff to your computer and use it in GitHub Desktop.
test.ipynb
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import xgboost as xgb\n",
"from sklearn import preprocessing, linear_model\n",
"import math"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train = pd.read_csv('../data/train/PPD_Training_Master_GBK_3_1_Training_Set.csv')\n",
"test = pd.read_csv('../data/test/PPD_Master_GBK_2_Test_Set.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"all_data = train.append(test)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cat_cols = [\"UserInfo_2\", \"UserInfo_4\", \"UserInfo_7\", \"UserInfo_8\", \"UserInfo_19\", \"UserInfo_20\", \"UserInfo_1\", \\\n",
" \"UserInfo_3\", \"UserInfo_5\", \"UserInfo_6\", \"UserInfo_9\", \"UserInfo_2\", \"UserInfo_4\", \\\n",
" \"UserInfo_7\", \"UserInfo_8\", \"UserInfo_19\", \"UserInfo_20\", \"UserInfo_11\", \"UserInfo_12\", \"UserInfo_13\", \\\n",
" \"UserInfo_14\", \"UserInfo_15\", \"UserInfo_16\", \"UserInfo_18\", \"UserInfo_21\", \"UserInfo_22\", \"UserInfo_23\", \\\n",
" \"UserInfo_24\", \"Education_Info1\", \"Education_Info2\", \"Education_Info3\", \"Education_Info4\", \\\n",
" \"Education_Info5\", \"Education_Info6\", \"Education_Info7\", \"Education_Info8\", \"WeblogInfo_19\", \\\n",
" \"WeblogInfo_20\", \"WeblogInfo_21\", \"SocialNetwork_1\", \"SocialNetwork_2\", \"SocialNetwork_7\", \\\n",
" \"ListingInfo\", \"SocialNetwork_12\"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"for col in cat_cols:\n",
" if col in all_data.columns.values:\n",
" all_data[col] = pd.factorize(all_data[col])[0]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"all_data.fillna(-1, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train = all_data[all_data['target'] > -1].copy()\n",
"test = all_data[all_data['target'] == -1].copy()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"dtrain = xgb.DMatrix(train.drop([\"Idx\", \"target\"], axis=1), train[\"target\"].values)\n",
"dtest = xgb.DMatrix(test.drop([\"Idx\", \"target\"], axis=1), label=test[\"target\"].values)\n",
"\n",
"params = {}\n",
"params['objective'] = 'binary:logistic'\n",
"params['eta'] = 0.02\n",
"params['min_child_weight'] = 1\n",
"params['subsample'] = 0.8\n",
"params['colsample_bytree'] = 0.8\n",
"params['max_depth'] = 8\n",
"params['eval_metric'] = 'auc'\n",
"params['nthread'] = 3\n",
"params['silent'] = 1\n",
"num_rounds = 120"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"model = xgb.train(params, dtrain, num_rounds)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_pred = model.predict(dtest, ntree_limit=model.best_iteration)\n",
"pd.DataFrame({'Idx': test['Idx'].values, 'score': y_pred}).to_csv('submit.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment