ocoyawale/Coupon Purchase Prediction - BTB Script.ipynb

## Coupon Purchase Prediction - BTB Script.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import os\n",
    "from sklearn.base import BaseEstimator, TransformerMixin\n",
    "from sklearn.pipeline import FeatureUnion\n",
    "from sklearn.linear_model.logistic import LogisticRegression\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "sns.set_style('whitegrid')\n",
    "%matplotlib inline\n",
    "import sklearn\n",
    "from sklearn.linear_model import LinearRegression\n",
    "from sklearn import ensemble, preprocessing, cross_validation\n",
    "from sklearn.metrics import roc_auc_score as auc\n",
    "from time import time\n",
    "from sklearn import linear_model\n",
    "from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC\n",
    "from sklearn import datasets\n",
    "from sklearn.linear_model import SGDClassifier\n",
    "from sklearn.linear_model import ElasticNet\n",
    "from sklearn.preprocessing import PolynomialFeatures\n",
    "from sklearn import svm\n",
    "from sklearn.cross_validation import cross_val_score\n",
    "from sklearn.datasets import make_blobs\n",
    "from sklearn.metrics import mean_squared_error\n",
    "from sklearn.datasets import make_friedman1\n",
    "from sklearn.ensemble import GradientBoostingRegressor\n",
    "from sklearn import gaussian_process\n",
    "from sklearn.linear_model import SGDClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.ensemble import ExtraTreesClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "from sklearn import neighbors, datasets\n",
    "from sklearn import tree\n",
    "from sklearn.datasets import make_hastie_10_2\n",
    "from sklearn.ensemble import GradientBoostingClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "script_path = os.path.abspath(os.path.dirname(\"__file__\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Get_Price_Rate(BaseEstimator, TransformerMixin):\n",
    "    '''\n",
    "    get price rate\n",
    "    '''\n",
    "\n",
    "    def get_feature_names(self):\n",
    "\n",
    "        return [self.__class__.__name__]\n",
    "\n",
    "    def fit(self, date_frame, y=None):\n",
    "        '''\n",
    "        fit\n",
    "\n",
    "        :param pandas.DataFrame: all data\n",
    "        :rtype: Get_Price_Rate\n",
    "        '''\n",
    "\n",
    "        return self\n",
    "\n",
    "    def transform(self, date_frame):\n",
    "        '''\n",
    "        transform\n",
    "\n",
    "        :param pandas.DataFrame: all data\n",
    "        :rtype: array\n",
    "        '''\n",
    "\n",
    "        return date_frame[\"PRICE_RATE\"].as_matrix()[None].T.astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Get_Match_Pref(BaseEstimator, TransformerMixin):\n",
    "    '''\n",
    "    get user pref is match coupon area\n",
    "    '''\n",
    "\n",
    "    def get_feature_names(self):\n",
    "\n",
    "        return [self.__class__.__name__]\n",
    "\n",
    "    def fit(self, date_frame, y=None):\n",
    "        '''\n",
    "        fit\n",
    "\n",
    "        :param pandas.DataFrame: all data\n",
    "        :rtype: Get_Price_Rate\n",
    "        '''\n",
    "\n",
    "        return self\n",
    "\n",
    "    def transform(self, date_frame):\n",
    "        '''\n",
    "        transform\n",
    "\n",
    "        :param pandas.DataFrame: all data\n",
    "        :rtype: array\n",
    "        '''\n",
    "        res_sr = date_frame[\"PREF_NAME\"] == date_frame[\"ken_name\"]\n",
    "\n",
    "        return res_sr.as_matrix()[None].T.astype(np.float)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def top_merge(df, n=10, column=\"predict\", merge_column=\"COUPON_ID_hash\"):\n",
    "    '''\n",
    "    get top n row\n",
    "\n",
    "    :param pandas.DataFrame df:\n",
    "    :param int n:\n",
    "    :param str column:\n",
    "    :rtype: pandas.DataFrame\n",
    "    '''\n",
    "\n",
    "    return \" \".join(df.sort_index(by=column)[-n:][merge_column])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "feature_list = [\n",
    "    ('PRICE_RATE', Get_Price_Rate()),\n",
    "    ('MATCH_PREF', Get_Match_Pref()),\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "if __name__ == '__main__':\n",
    "    # import csv\n",
    "    user_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\user_list.csv\")\n",
    "    train_coupon_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\coupon_list_train.csv\")\n",
    "    train_visit_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\coupon_visit_train.csv\")\n",
    "    test_coupon_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\coupon_list_test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# create train_df\n",
    "train_df = pd.merge(train_visit_df, train_coupon_df,\n",
    "                        left_on=\"VIEW_COUPON_ID_hash\", right_on=\"COUPON_ID_hash\")\n",
    "train_df = pd.merge(train_df, user_df,\n",
    "                        left_on=\"USER_ID_hash\", right_on=\"USER_ID_hash\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "    # create train feature\n",
    "    fu_obj = FeatureUnion(transformer_list=feature_list)\n",
    "    X_train = fu_obj.fit_transform(train_df)\n",
    "    y_train = train_df[\"PURCHASE_FLG\"]\n",
    "    assert X_train.shape[0] == y_train.size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "GradientBoostingClassifier(init=None, learning_rate=1.0, loss='deviance',\n",
       "              max_depth=3, max_features=None, max_leaf_nodes=None,\n",
       "              min_samples_leaf=1, min_samples_split=2,\n",
       "              min_weight_fraction_leaf=0.0, n_estimators=100,\n",
       "              random_state=50, subsample=1.0, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "    # fit model\n",
    "    #clf = LogisticRegression()\n",
    "    #clf = ensemble.RandomForestClassifier(n_jobs=4, n_estimators = 20, random_state = 11)\n",
    "    #clf = ensemble.RandomForestClassifier(n_jobs=500, n_estimators = 1000, random_state = 15)\n",
    "    #SVC\n",
    "    #clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)\n",
    "    #clf = svm.SVC(C=2.0, cache_size=200, class_weight=1, coef0=0.0, degree=3, gamma=0.0, kernel='rbf', max_iter=-1, probability=True, random_state=4, shrinking=True, tol=0.001, verbose=False)\n",
    "    #Stochastic Gradient Descent\n",
    "    #clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False)\n",
    "    #clf=SGDClassifier(loss='log',alpha=0.000001,n_iter=100)\n",
    "    #clf = DecisionTreeClassifier(max_depth=5, min_samples_split=1, random_state=20)\n",
    "    #RandomForestClassifier - 0.75985\n",
    "    #clf = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=1, random_state=50)\n",
    "    #clf = RandomForestClassifier(n_estimators=400, max_depth=20, min_samples_split=1, random_state=200)\n",
    "    #ExtraTreesClassifier - 0.76\n",
    "    #clf = ExtraTreesClassifier(n_estimators=150, max_depth=20, min_samples_split=2, random_state=100)\n",
    "    #clf = ExtraTreesClassifier(n_estimators=200, max_depth=30, min_samples_split=4, random_state=200)\n",
    "    #Nearest Neighbors Classifier\n",
    "    #clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)\n",
    "    #clf = neighbors.KNeighborsClassifier()\n",
    "    #Decision Tree Classifier\n",
    "    #clf = tree.DecisionTreeClassifier()\n",
    "    #Adaboost\n",
    "    #clf = AdaBoostClassifier(n_estimators=100)\n",
    "    #GradientBoostingClassifier# - 0.76858\n",
    "    #clf = GradientBoostingClassifier(n_estimators=400, learning_rate=1.0, max_depth=3, random_state=200)\n",
    "    clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=3, random_state=50)\n",
    "    clf.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "    # create test_df\n",
    "    test_coupon_df[\"cross\"] = 1\n",
    "    user_df[\"cross\"] = 1\n",
    "    test_df = pd.merge(test_coupon_df, user_df, on=\"cross\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "    # create test Feature\n",
    "    X_test = fu_obj.transform(test_df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "    # predict test data\n",
    "    predict_proba = clf.predict_proba(X_test)\n",
    "    pos_idx = np.where(clf.classes_ == True)[0][0]\n",
    "    test_df[\"predict\"] = predict_proba[:, pos_idx]\n",
    "    top10_coupon = test_df.groupby(\"USER_ID_hash\").apply(top_merge)\n",
    "    top10_coupon.name = \"PURCHASED_COUPONS\"\n",
    "    top10_coupon.to_csv(\"submission.csv\", header=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

## Coupon Purchase Prediction - First Script.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Coupon Purchase Prediction - First Script.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Flavor of Physics - Classification.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Flavor of Physics - Classification.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Flavor of Physics - Mix of models.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Flavor of Physics - Mix of models.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf - Experiments with Classification.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Springleaf - Experiments with Classification.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf - Experiments with Random Forest.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Springleaf - Experiments with Random Forest.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## SpringLeaf -Kaggle 18AUG15 (1).ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              SpringLeaf -Kaggle 18AUG15 (1).ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## SpringLeaf -Kaggle 18AUG15.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              SpringLeaf -Kaggle 18AUG15.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf with xgb.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Springleaf with xgb.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf with xgb1.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Springleaf with xgb1.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"import os\n",
	"from sklearn.base import BaseEstimator, TransformerMixin\n",
	"from sklearn.pipeline import FeatureUnion\n",
	"from sklearn.linear_model.logistic import LogisticRegression\n",
	"import matplotlib.pyplot as plt\n",
	"import seaborn as sns\n",
	"sns.set_style('whitegrid')\n",
	"%matplotlib inline\n",
	"import sklearn\n",
	"from sklearn.linear_model import LinearRegression\n",
	"from sklearn import ensemble, preprocessing, cross_validation\n",
	"from sklearn.metrics import roc_auc_score as auc\n",
	"from time import time\n",
	"from sklearn import linear_model\n",
	"from sklearn.linear_model import LassoCV, LassoLarsCV, LassoLarsIC\n",
	"from sklearn import datasets\n",
	"from sklearn.linear_model import SGDClassifier\n",
	"from sklearn.linear_model import ElasticNet\n",
	"from sklearn.preprocessing import PolynomialFeatures\n",
	"from sklearn import svm\n",
	"from sklearn.cross_validation import cross_val_score\n",
	"from sklearn.datasets import make_blobs\n",
	"from sklearn.metrics import mean_squared_error\n",
	"from sklearn.datasets import make_friedman1\n",
	"from sklearn.ensemble import GradientBoostingRegressor\n",
	"from sklearn import gaussian_process\n",
	"from sklearn.linear_model import SGDClassifier\n",
	"from sklearn.ensemble import RandomForestClassifier\n",
	"from sklearn.ensemble import ExtraTreesClassifier\n",
	"from sklearn.tree import DecisionTreeClassifier\n",
	"from sklearn.ensemble import AdaBoostClassifier\n",
	"from sklearn import neighbors, datasets\n",
	"from sklearn import tree\n",
	"from sklearn.datasets import make_hastie_10_2\n",
	"from sklearn.ensemble import GradientBoostingClassifier"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"script_path = os.path.abspath(os.path.dirname(\"__file__\"))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"class Get_Price_Rate(BaseEstimator, TransformerMixin):\n",
	" '''\n",
	" get price rate\n",
	" '''\n",
	"\n",
	" def get_feature_names(self):\n",
	"\n",
	" return [self.__class__.__name__]\n",
	"\n",
	" def fit(self, date_frame, y=None):\n",
	" '''\n",
	" fit\n",
	"\n",
	" :param pandas.DataFrame: all data\n",
	" :rtype: Get_Price_Rate\n",
	" '''\n",
	"\n",
	" return self\n",
	"\n",
	" def transform(self, date_frame):\n",
	" '''\n",
	" transform\n",
	"\n",
	" :param pandas.DataFrame: all data\n",
	" :rtype: array\n",
	" '''\n",
	"\n",
	" return date_frame[\"PRICE_RATE\"].as_matrix()[None].T.astype(np.float)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"class Get_Match_Pref(BaseEstimator, TransformerMixin):\n",
	" '''\n",
	" get user pref is match coupon area\n",
	" '''\n",
	"\n",
	" def get_feature_names(self):\n",
	"\n",
	" return [self.__class__.__name__]\n",
	"\n",
	" def fit(self, date_frame, y=None):\n",
	" '''\n",
	" fit\n",
	"\n",
	" :param pandas.DataFrame: all data\n",
	" :rtype: Get_Price_Rate\n",
	" '''\n",
	"\n",
	" return self\n",
	"\n",
	" def transform(self, date_frame):\n",
	" '''\n",
	" transform\n",
	"\n",
	" :param pandas.DataFrame: all data\n",
	" :rtype: array\n",
	" '''\n",
	" res_sr = date_frame[\"PREF_NAME\"] == date_frame[\"ken_name\"]\n",
	"\n",
	" return res_sr.as_matrix()[None].T.astype(np.float)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"def top_merge(df, n=10, column=\"predict\", merge_column=\"COUPON_ID_hash\"):\n",
	" '''\n",
	" get top n row\n",
	"\n",
	" :param pandas.DataFrame df:\n",
	" :param int n:\n",
	" :param str column:\n",
	" :rtype: pandas.DataFrame\n",
	" '''\n",
	"\n",
	" return \" \".join(df.sort_index(by=column)[-n:][merge_column])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"feature_list = [\n",
	" ('PRICE_RATE', Get_Price_Rate()),\n",
	" ('MATCH_PREF', Get_Match_Pref()),\n",
	"]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"if __name__ == '__main__':\n",
	" # import csv\n",
	" user_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\user_list.csv\")\n",
	" train_coupon_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\coupon_list_train.csv\")\n",
	" train_visit_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\coupon_visit_train.csv\")\n",
	" test_coupon_df = pd.read_csv(\"C:\\\\Users\\\\Vikrant\\\\Coupon\\\\coupon_list_test.csv\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# create train_df\n",
	"train_df = pd.merge(train_visit_df, train_coupon_df,\n",
	" left_on=\"VIEW_COUPON_ID_hash\", right_on=\"COUPON_ID_hash\")\n",
	"train_df = pd.merge(train_df, user_df,\n",
	" left_on=\"USER_ID_hash\", right_on=\"USER_ID_hash\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	" # create train feature\n",
	" fu_obj = FeatureUnion(transformer_list=feature_list)\n",
	" X_train = fu_obj.fit_transform(train_df)\n",
	" y_train = train_df[\"PURCHASE_FLG\"]\n",
	" assert X_train.shape[0] == y_train.size"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"GradientBoostingClassifier(init=None, learning_rate=1.0, loss='deviance',\n",
	" max_depth=3, max_features=None, max_leaf_nodes=None,\n",
	" min_samples_leaf=1, min_samples_split=2,\n",
	" min_weight_fraction_leaf=0.0, n_estimators=100,\n",
	" random_state=50, subsample=1.0, verbose=0, warm_start=False)"
	]
	},
	"execution_count": 10,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	" # fit model\n",
	" #clf = LogisticRegression()\n",
	" #clf = ensemble.RandomForestClassifier(n_jobs=4, n_estimators = 20, random_state = 11)\n",
	" #clf = ensemble.RandomForestClassifier(n_jobs=500, n_estimators = 1000, random_state = 15)\n",
	" #SVC\n",
	" #clf = svm.SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3, gamma=0.0, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)\n",
	" #clf = svm.SVC(C=2.0, cache_size=200, class_weight=1, coef0=0.0, degree=3, gamma=0.0, kernel='rbf', max_iter=-1, probability=True, random_state=4, shrinking=True, tol=0.001, verbose=False)\n",
	" #Stochastic Gradient Descent\n",
	" #clf = SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1, eta0=0.0, fit_intercept=True, l1_ratio=0.15, learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1, penalty='l2', power_t=0.5, random_state=None, shuffle=True, verbose=0, warm_start=False)\n",
	" #clf=SGDClassifier(loss='log',alpha=0.000001,n_iter=100)\n",
	" #clf = DecisionTreeClassifier(max_depth=5, min_samples_split=1, random_state=20)\n",
	" #RandomForestClassifier - 0.75985\n",
	" #clf = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=1, random_state=50)\n",
	" #clf = RandomForestClassifier(n_estimators=400, max_depth=20, min_samples_split=1, random_state=200)\n",
	" #ExtraTreesClassifier - 0.76\n",
	" #clf = ExtraTreesClassifier(n_estimators=150, max_depth=20, min_samples_split=2, random_state=100)\n",
	" #clf = ExtraTreesClassifier(n_estimators=200, max_depth=30, min_samples_split=4, random_state=200)\n",
	" #Nearest Neighbors Classifier\n",
	" #clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)\n",
	" #clf = neighbors.KNeighborsClassifier()\n",
	" #Decision Tree Classifier\n",
	" #clf = tree.DecisionTreeClassifier()\n",
	" #Adaboost\n",
	" #clf = AdaBoostClassifier(n_estimators=100)\n",
	" #GradientBoostingClassifier# - 0.76858\n",
	" #clf = GradientBoostingClassifier(n_estimators=400, learning_rate=1.0, max_depth=3, random_state=200)\n",
	" clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=3, random_state=50)\n",
	" clf.fit(X_train, y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	" # create test_df\n",
	" test_coupon_df[\"cross\"] = 1\n",
	" user_df[\"cross\"] = 1\n",
	" test_df = pd.merge(test_coupon_df, user_df, on=\"cross\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	" # create test Feature\n",
	" X_test = fu_obj.transform(test_df)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	" # predict test data\n",
	" predict_proba = clf.predict_proba(X_test)\n",
	" pos_idx = np.where(clf.classes_ == True)[0][0]\n",
	" test_df[\"predict\"] = predict_proba[:, pos_idx]\n",
	" top10_coupon = test_df.groupby(\"USER_ID_hash\").apply(top_merge)\n",
	" top10_coupon.name = \"PURCHASED_COUPONS\"\n",
	" top10_coupon.to_csv(\"submission.csv\", header=True)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}