ocoyawale/Coupon Purchase Prediction - BTB Script.ipynb

## Coupon Purchase Prediction - BTB Script.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Coupon Purchase Prediction - BTB Script.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Coupon Purchase Prediction - First Script.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Coupon Purchase Prediction - First Script.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Flavor of Physics - Classification.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Flavor of Physics - Classification.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Flavor of Physics - Mix of models.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Flavor of Physics - Mix of models.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf - Experiments with Classification.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Springleaf - Experiments with Classification.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf - Experiments with Random Forest.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn import ensemble, preprocessing, cross_validation\n",
    "from sklearn.metrics import roc_auc_score as auc\n",
    "from time import time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# PREPARE DATA\n",
    "data = pd.read_csv('train_Spring.csv').set_index(\"ID\")\n",
    "test = pd.read_csv('test_Spring.csv').set_index(\"ID\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# remove constants\n",
    "nunique = pd.Series([data[col].nunique() for col in data.columns], index = data.columns)\n",
    "constants = nunique[nunique<2].index.tolist()\n",
    "data = data.drop(constants,axis=1)\n",
    "test = test.drop(constants,axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:198: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
      "  flag = np.concatenate(([True], aux[1:] != aux[:-1]))\n",
      "C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:251: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
      "  return aux[:-1][aux[1:] == aux[:-1]]\n",
      "C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:384: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
      "  bool_ar = (sar[1:] == sar[:-1])\n"
     ]
    }
   ],
   "source": [
    "# encode string\n",
    "strings = data.dtypes == 'object'; strings = strings[strings].index.tolist(); encoders = {}\n",
    "for col in strings:\n",
    "    encoders[col] = preprocessing.LabelEncoder()\n",
    "    data[col] = encoders[col].fit_transform(data[col])\n",
    "    try:\n",
    "        test[col] = encoders[col].transform(test[col])\n",
    "    except:\n",
    "        # lazy way to incorporate the feature only if can be encoded in the test set\n",
    "        del test[col]\n",
    "        del data[col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# DATA ready\n",
    "X = data.drop('target',1).fillna(0); y = data.target"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# RF FTW :)\n",
    "rf = ensemble.RandomForestClassifier(n_jobs=4, n_estimators = 20, random_state = 11)\n",
    "#rf = ensemble.RandomForestClassifier(n_jobs=500, n_estimators = 1000, random_state = 15)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# CROSS VALIDATE AND PRINT TRAIN AND TEST SCORE\n",
    "kf = cross_validation.StratifiedKFold(y, n_folds=5, shuffle=True, random_state=11)\n",
    "trscores, cvscores, times = [], [], []\n",
    "for itr, icv in kf:\n",
    "    t = time()\n",
    "    trscore = auc(y.iloc[itr], rf.fit(X.iloc[itr], y.iloc[itr]).predict_proba(X.iloc[itr])[:,1])\n",
    "    cvscore = auc(y.iloc[icv], rf.predict_proba(X.iloc[icv])[:,1])\n",
    "    trscores.append(trscore); cvscores.append(cvscore); times.append(time()-t)\n",
    "print \"TRAIN %.4f | TEST %.4f | TIME %.2fm (1-fold)\" % (np.mean(trscores), np.mean(cvscores), np.mean(times)/60)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# MAKING SUBMISSION\n",
    "submission = pd.DataFrame(rf.fit(X,y).predict_proba(test.fillna(0))[:,1], index=test.index, columns=['target'])\n",
    "submission.index.name = 'ID'\n",
    "submission.to_csv('Springleaf5.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}

## SpringLeaf -Kaggle 18AUG15 (1).ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              SpringLeaf -Kaggle 18AUG15 (1).ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## SpringLeaf -Kaggle 18AUG15.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              SpringLeaf -Kaggle 18AUG15.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf with xgb.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Springleaf with xgb.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## Springleaf with xgb1.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Springleaf with xgb1.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"from sklearn import ensemble, preprocessing, cross_validation\n",
	"from sklearn.metrics import roc_auc_score as auc\n",
	"from time import time"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# PREPARE DATA\n",
	"data = pd.read_csv('train_Spring.csv').set_index(\"ID\")\n",
	"test = pd.read_csv('test_Spring.csv').set_index(\"ID\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# remove constants\n",
	"nunique = pd.Series([data[col].nunique() for col in data.columns], index = data.columns)\n",
	"constants = nunique[nunique<2].index.tolist()\n",
	"data = data.drop(constants,axis=1)\n",
	"test = test.drop(constants,axis=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {
	"collapsed": false
	},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:198: FutureWarning: numpy not_equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
	" flag = np.concatenate(([True], aux[1:] != aux[:-1]))\n",
	"C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:251: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
	" return aux[:-1][aux[1:] == aux[:-1]]\n",
	"C:\\Users\\Vikrant\\Anaconda\\lib\\site-packages\\numpy\\lib\\arraysetops.py:384: FutureWarning: numpy equal will not check object identity in the future. The comparison did not return the same result as suggested by the identity (`is`)) and will change.\n",
	" bool_ar = (sar[1:] == sar[:-1])\n"
	]
	}
	],
	"source": [
	"# encode string\n",
	"strings = data.dtypes == 'object'; strings = strings[strings].index.tolist(); encoders = {}\n",
	"for col in strings:\n",
	" encoders[col] = preprocessing.LabelEncoder()\n",
	" data[col] = encoders[col].fit_transform(data[col])\n",
	" try:\n",
	" test[col] = encoders[col].transform(test[col])\n",
	" except:\n",
	" # lazy way to incorporate the feature only if can be encoded in the test set\n",
	" del test[col]\n",
	" del data[col]"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# DATA ready\n",
	"X = data.drop('target',1).fillna(0); y = data.target"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# RF FTW :)\n",
	"rf = ensemble.RandomForestClassifier(n_jobs=4, n_estimators = 20, random_state = 11)\n",
	"#rf = ensemble.RandomForestClassifier(n_jobs=500, n_estimators = 1000, random_state = 15)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": false
	},
	"outputs": [],
	"source": [
	"# CROSS VALIDATE AND PRINT TRAIN AND TEST SCORE\n",
	"kf = cross_validation.StratifiedKFold(y, n_folds=5, shuffle=True, random_state=11)\n",
	"trscores, cvscores, times = [], [], []\n",
	"for itr, icv in kf:\n",
	" t = time()\n",
	" trscore = auc(y.iloc[itr], rf.fit(X.iloc[itr], y.iloc[itr]).predict_proba(X.iloc[itr])[:,1])\n",
	" cvscore = auc(y.iloc[icv], rf.predict_proba(X.iloc[icv])[:,1])\n",
	" trscores.append(trscore); cvscores.append(cvscore); times.append(time()-t)\n",
	"print \"TRAIN %.4f \| TEST %.4f \| TIME %.2fm (1-fold)\" % (np.mean(trscores), np.mean(cvscores), np.mean(times)/60)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# MAKING SUBMISSION\n",
	"submission = pd.DataFrame(rf.fit(X,y).predict_proba(test.fillna(0))[:,1], index=test.index, columns=['target'])\n",
	"submission.index.name = 'ID'\n",
	"submission.to_csv('Springleaf5.csv')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 0
	}