Skip to content

Instantly share code, notes, and snippets.

@bbzzzz
Created November 3, 2015 18:18
Show Gist options
  • Save bbzzzz/ffeea767a6bfe0cdfe49 to your computer and use it in GitHub Desktop.
Save bbzzzz/ffeea767a6bfe0cdfe49 to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This code is for ZestFinance modeling team interview homework assisgnment. ML algorithms including Regularized Logistic Regression, Elastic Net, Random Fores and Gradient Boosting (xgboost) are applied."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import xgboost as xgb\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn import pipeline, metrics, grid_search, cross_validation,preprocessing, feature_extraction\n",
"import pandas as pd\n",
"import numpy as np \n",
"import scipy as sc\n",
"import time\n",
"from datetime import datetime\n",
"import math\n",
"import random"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"#### Part I. Define Functions"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Function for grid search\n",
"def search_model(train_x, train_y, est, param_grid, n_jobs, cv):\n",
" model = grid_search.GridSearchCV(estimator = est,\n",
" param_grid = param_grid,\n",
" scoring = 'roc_auc',\n",
" verbose = 10,\n",
" n_jobs = n_jobs,\n",
" iid = True,\n",
" refit = False,\n",
" cv = cv)\n",
" # Fit Grid Search Model\n",
" model.fit(train_x, train_y)\n",
" print(\"Best score: %0.3f\" % model.best_score_)\n",
" print(\"Best parameters set:\", model.best_params_)\n",
" return model.best_score_, model.best_params_"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def xgb_fit(train_x,train_y,val_x,val_y,params,num_boost_round,early_stopping_rounds,missing):\n",
" plst = list(params.items())\n",
" xgtrain = xgb.DMatrix(train_x, label=train_y, missing=missing)\n",
" xgval = xgb.DMatrix(val_x,val_y, missing=missing)\n",
" #train using early stopping and predict\n",
" watchlist = [(xgtrain, 'train'),(xgval, 'val')]\n",
" model = xgb.train(params=plst\n",
" , dtrain=xgtrain\n",
" , num_boost_round=num_boost_round\n",
" , evals=watchlist\n",
" , early_stopping_rounds=early_stopping_rounds\n",
" # , feval = AUC\n",
" )\n",
" print (\"Best score:\", model.best_score)\n",
" print (\"Best iteration:\", model.best_iteration)\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def xgb_predict(model,testX,params,missing):\n",
" xgtest = xgb.DMatrix(testX, missing=missing)\n",
" preds = model.predict(xgtest,ntree_limit=model.best_iteration)\n",
" return preds"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Part II. Data Processing"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading data ...... 2015-11-03 13:05:11\n"
]
}
],
"source": [
"print (\"Loading data ...... %s\" % (datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S')))\n",
"start = time.time() \n",
"full_df = pd.read_csv('revised_data_2.csv')"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amount_requested</th>\n",
" <th>amount_approved</th>\n",
" <th>request_approve_ratio</th>\n",
" <th>age_at_application</th>\n",
" <th>year_of_application</th>\n",
" <th>month_of_application</th>\n",
" <th>day_of_application</th>\n",
" <th>day_of_week</th>\n",
" <th>residence_rent_or_own</th>\n",
" <th>monthly_rent_amount</th>\n",
" <th>...</th>\n",
" <th>more_than_2_phones</th>\n",
" <th>raw_l2c_score</th>\n",
" <th>raw_FICO_telecom</th>\n",
" <th>FICO_telecom_gradea_ordered</th>\n",
" <th>raw_FICO_retail</th>\n",
" <th>FICO_retail_grade_ordered</th>\n",
" <th>raw_FICO_bank_card</th>\n",
" <th>FICO_bank_grade_ordered</th>\n",
" <th>raw_FICO_money</th>\n",
" <th>FICO_money_grade_ordered</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643</td>\n",
" <td>643.000000</td>\n",
" <td>...</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" <td>643.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>598.678072</td>\n",
" <td>525.972006</td>\n",
" <td>0.905511</td>\n",
" <td>42.121306</td>\n",
" <td>2010.758942</td>\n",
" <td>4.640747</td>\n",
" <td>15.612753</td>\n",
" <td>4.049767</td>\n",
" <td>0.6656299</td>\n",
" <td>583.097978</td>\n",
" <td>...</td>\n",
" <td>0.562986</td>\n",
" <td>594.474339</td>\n",
" <td>568.754277</td>\n",
" <td>1.076205</td>\n",
" <td>596.648523</td>\n",
" <td>1.337481</td>\n",
" <td>665.046656</td>\n",
" <td>2.097978</td>\n",
" <td>603.027994</td>\n",
" <td>1.125972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>167.960908</td>\n",
" <td>131.497886</td>\n",
" <td>0.153976</td>\n",
" <td>12.359399</td>\n",
" <td>0.428058</td>\n",
" <td>3.951815</td>\n",
" <td>8.532746</td>\n",
" <td>1.791468</td>\n",
" <td>0.4721371</td>\n",
" <td>437.012338</td>\n",
" <td>...</td>\n",
" <td>0.496403</td>\n",
" <td>122.853738</td>\n",
" <td>42.807749</td>\n",
" <td>0.293401</td>\n",
" <td>52.119748</td>\n",
" <td>0.571611</td>\n",
" <td>39.939296</td>\n",
" <td>0.715676</td>\n",
" <td>27.774108</td>\n",
" <td>0.332076</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>250.000000</td>\n",
" <td>250.000000</td>\n",
" <td>0.625000</td>\n",
" <td>18.000000</td>\n",
" <td>2010.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>False</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>50.000000</td>\n",
" <td>222.000000</td>\n",
" <td>1.000000</td>\n",
" <td>222.000000</td>\n",
" <td>1.000000</td>\n",
" <td>222.000000</td>\n",
" <td>1.000000</td>\n",
" <td>222.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>500.000000</td>\n",
" <td>500.000000</td>\n",
" <td>0.833333</td>\n",
" <td>32.000000</td>\n",
" <td>2011.000000</td>\n",
" <td>2.000000</td>\n",
" <td>9.000000</td>\n",
" <td>3.000000</td>\n",
" <td>0</td>\n",
" <td>270.000000</td>\n",
" <td>...</td>\n",
" <td>0.000000</td>\n",
" <td>539.500000</td>\n",
" <td>537.000000</td>\n",
" <td>1.000000</td>\n",
" <td>556.000000</td>\n",
" <td>1.000000</td>\n",
" <td>645.500000</td>\n",
" <td>2.000000</td>\n",
" <td>588.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>500.000000</td>\n",
" <td>500.000000</td>\n",
" <td>1.000000</td>\n",
" <td>41.000000</td>\n",
" <td>2011.000000</td>\n",
" <td>3.000000</td>\n",
" <td>16.000000</td>\n",
" <td>4.000000</td>\n",
" <td>1</td>\n",
" <td>550.000000</td>\n",
" <td>...</td>\n",
" <td>1.000000</td>\n",
" <td>589.000000</td>\n",
" <td>568.000000</td>\n",
" <td>1.000000</td>\n",
" <td>594.000000</td>\n",
" <td>1.000000</td>\n",
" <td>671.000000</td>\n",
" <td>2.000000</td>\n",
" <td>605.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>800.000000</td>\n",
" <td>500.000000</td>\n",
" <td>1.000000</td>\n",
" <td>51.000000</td>\n",
" <td>2011.000000</td>\n",
" <td>4.000000</td>\n",
" <td>23.000000</td>\n",
" <td>5.000000</td>\n",
" <td>1</td>\n",
" <td>827.000000</td>\n",
" <td>...</td>\n",
" <td>1.000000</td>\n",
" <td>669.000000</td>\n",
" <td>597.000000</td>\n",
" <td>1.000000</td>\n",
" <td>637.500000</td>\n",
" <td>2.000000</td>\n",
" <td>680.000000</td>\n",
" <td>2.000000</td>\n",
" <td>620.000000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>800.000000</td>\n",
" <td>800.000000</td>\n",
" <td>1.000000</td>\n",
" <td>79.000000</td>\n",
" <td>2011.000000</td>\n",
" <td>12.000000</td>\n",
" <td>31.000000</td>\n",
" <td>7.000000</td>\n",
" <td>True</td>\n",
" <td>3018.000000</td>\n",
" <td>...</td>\n",
" <td>1.000000</td>\n",
" <td>808.000000</td>\n",
" <td>712.000000</td>\n",
" <td>3.000000</td>\n",
" <td>756.000000</td>\n",
" <td>4.000000</td>\n",
" <td>797.000000</td>\n",
" <td>4.000000</td>\n",
" <td>662.000000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 39 columns</p>\n",
"</div>"
],
"text/plain": [
" amount_requested amount_approved request_approve_ratio \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 598.678072 525.972006 0.905511 \n",
"std 167.960908 131.497886 0.153976 \n",
"min 250.000000 250.000000 0.625000 \n",
"25% 500.000000 500.000000 0.833333 \n",
"50% 500.000000 500.000000 1.000000 \n",
"75% 800.000000 500.000000 1.000000 \n",
"max 800.000000 800.000000 1.000000 \n",
"\n",
" age_at_application year_of_application month_of_application \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 42.121306 2010.758942 4.640747 \n",
"std 12.359399 0.428058 3.951815 \n",
"min 18.000000 2010.000000 1.000000 \n",
"25% 32.000000 2011.000000 2.000000 \n",
"50% 41.000000 2011.000000 3.000000 \n",
"75% 51.000000 2011.000000 4.000000 \n",
"max 79.000000 2011.000000 12.000000 \n",
"\n",
" day_of_application day_of_week residence_rent_or_own \\\n",
"count 643.000000 643.000000 643 \n",
"mean 15.612753 4.049767 0.6656299 \n",
"std 8.532746 1.791468 0.4721371 \n",
"min 1.000000 1.000000 False \n",
"25% 9.000000 3.000000 0 \n",
"50% 16.000000 4.000000 1 \n",
"75% 23.000000 5.000000 1 \n",
"max 31.000000 7.000000 True \n",
"\n",
" monthly_rent_amount ... more_than_2_phones \\\n",
"count 643.000000 ... 643.000000 \n",
"mean 583.097978 ... 0.562986 \n",
"std 437.012338 ... 0.496403 \n",
"min 0.000000 ... 0.000000 \n",
"25% 270.000000 ... 0.000000 \n",
"50% 550.000000 ... 1.000000 \n",
"75% 827.000000 ... 1.000000 \n",
"max 3018.000000 ... 1.000000 \n",
"\n",
" raw_l2c_score raw_FICO_telecom FICO_telecom_gradea_ordered \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 594.474339 568.754277 1.076205 \n",
"std 122.853738 42.807749 0.293401 \n",
"min 50.000000 222.000000 1.000000 \n",
"25% 539.500000 537.000000 1.000000 \n",
"50% 589.000000 568.000000 1.000000 \n",
"75% 669.000000 597.000000 1.000000 \n",
"max 808.000000 712.000000 3.000000 \n",
"\n",
" raw_FICO_retail FICO_retail_grade_ordered raw_FICO_bank_card \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 596.648523 1.337481 665.046656 \n",
"std 52.119748 0.571611 39.939296 \n",
"min 222.000000 1.000000 222.000000 \n",
"25% 556.000000 1.000000 645.500000 \n",
"50% 594.000000 1.000000 671.000000 \n",
"75% 637.500000 2.000000 680.000000 \n",
"max 756.000000 4.000000 797.000000 \n",
"\n",
" FICO_bank_grade_ordered raw_FICO_money FICO_money_grade_ordered \n",
"count 643.000000 643.000000 643.000000 \n",
"mean 2.097978 603.027994 1.125972 \n",
"std 0.715676 27.774108 0.332076 \n",
"min 1.000000 222.000000 1.000000 \n",
"25% 2.000000 588.000000 1.000000 \n",
"50% 2.000000 605.000000 1.000000 \n",
"75% 2.000000 620.000000 1.000000 \n",
"max 4.000000 662.000000 2.000000 \n",
"\n",
"[8 rows x 39 columns]"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Convert performance code to binary numbers\n",
"full_df['performance']=full_df['performance'].apply(lambda x: 1 if x=='Bad' else 0)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
"5 1\n",
"6 0\n",
"7 1\n",
"8 1\n",
"9 1\n",
"10 0\n",
"11 0\n",
"12 0\n",
"13 1\n",
"14 1\n",
"15 0\n",
"16 0\n",
"17 1\n",
"18 1\n",
"19 1\n",
"20 1\n",
"21 0\n",
"22 1\n",
"23 1\n",
"24 1\n",
"25 0\n",
"26 1\n",
"27 1\n",
"28 1\n",
"29 0\n",
" ..\n",
"613 0\n",
"614 0\n",
"615 0\n",
"616 1\n",
"617 1\n",
"618 0\n",
"619 0\n",
"620 1\n",
"621 0\n",
"622 0\n",
"623 1\n",
"624 1\n",
"625 0\n",
"626 1\n",
"627 0\n",
"628 1\n",
"629 0\n",
"630 1\n",
"631 1\n",
"632 1\n",
"633 0\n",
"634 0\n",
"635 0\n",
"636 0\n",
"637 0\n",
"638 0\n",
"639 1\n",
"640 1\n",
"641 1\n",
"642 0\n",
"Name: performance, dtype: int64"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_df['performance']"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Convert date variable to date type\n",
"full_df['birth_date']=pd.to_datetime(full_df['birth_date'],format=\"%Y/%m/%d %H:%M\")\n",
"full_df['application_when']=pd.to_datetime(full_df['application_when'],format=\"%Y/%m/%d %H:%M\")"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"9 563\n",
"8 79\n",
"5 1\n",
"dtype: int64"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Check bank_routing_number\n",
"full_df['bank_routing_number'].apply(lambda x:len(str(x))).value_counts()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"bank_routing_number format:\n",
"\n",
"XXXXYYYYC \n",
"\n",
"XXXX - Federal Reserve Routing Symbol \n",
"YYYY - ABA Institution Identifier \n",
" C - the Check Digit "
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Define Routing Symbol and Institution Number\n",
"full_df['bank_routing_number']=full_df['bank_routing_number'].apply(lambda x:str(x))\n",
"full_df['routing_symbol']=full_df['bank_routing_number'].apply(lambda x:x[:4])\n",
"full_df['institution']=full_df['bank_routing_number'].apply(lambda x:x[4:])"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cat_cols = ['age_at_application', 'residence_rent_or_own','bank_account_direct_deposit',\n",
" 'year_of_application', 'month_of_application', 'day_of_application', 'day_of_week',\n",
" 'payment_frequency','address_zip','State', 'more_than_2_phones',\n",
" 'routing_symbol','institution','home_phone_type','other_phone_type','how_use_money',\n",
" 'FICO_telecom_grade','FICO_retail_grade','FICO_bank_grade','FICO_money_grade']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Categorical variables 'status', 'paymentach' and 'email_provider' are removed for low variation. \n"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"num_cols = ['amount_requested', 'amount_approved', 'age_at_application', 'request_approve_ratio',\n",
" 'monthly_rent_amount', 'loan_duration', 'payment_amount','payment_amount_approved', \n",
" 'num_payments', 'duration_approved', 'total_payment', \n",
" 'total_payment_principal_ratio', 'monthly_income_amount', 'monthly_pay_ratio', \n",
" 'monthly_payment', 'payment_approve_ratio',\n",
" 'email_duration_ordered', 'residence_duration_ordered', 'bank_account_duration_ordered', \n",
" 'raw_l2c_score','raw_FICO_telecom', 'FICO_telecom_gradea_ordered',\n",
" 'raw_FICO_retail', 'FICO_retail_grade_ordered', 'raw_FICO_bank_card', \n",
" 'FICO_bank_grade_ordered', 'raw_FICO_money', 'FICO_money_grade_ordered']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Numeric varialbe 'income_expense_ratio' is removed after testing."
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"target_col = 'performance'"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Leave-One-Out Encoding age_at_application\n",
"Leave-One-Out Encoding residence_rent_or_own\n",
"Leave-One-Out Encoding bank_account_direct_deposit\n",
"Leave-One-Out Encoding year_of_application\n",
"Leave-One-Out Encoding month_of_application\n",
"Leave-One-Out Encoding day_of_application\n",
"Leave-One-Out Encoding day_of_week\n",
"Leave-One-Out Encoding payment_frequency\n",
"Leave-One-Out Encoding address_zip\n",
"Leave-One-Out Encoding State\n",
"Leave-One-Out Encoding more_than_2_phones\n",
"Leave-One-Out Encoding routing_symbol\n",
"Leave-One-Out Encoding institution\n",
"Leave-One-Out Encoding home_phone_type\n",
"Leave-One-Out Encoding other_phone_type\n",
"Leave-One-Out Encoding how_use_money\n",
"Leave-One-Out Encoding FICO_telecom_grade\n",
"Leave-One-Out Encoding FICO_retail_grade\n",
"Leave-One-Out Encoding FICO_bank_grade\n",
"Leave-One-Out Encoding FICO_money_grade\n"
]
}
],
"source": [
"# Leave-One-Out Encoding\n",
"# Convert Catigorical Features to Numeric\n",
"oneway_cat_aggr_cols = list()\n",
"for col in cat_cols:\n",
" print (\"Leave-One-Out Encoding %s\" % (col))\n",
" # mean label\n",
" aggr = []\n",
" aggr = full_df[[col]+[target_col]].groupby(col)[target_col].agg([np.mean]).join(full_df.groupby(col)[target_col].agg([np.sum,np.size]),how='left') \n",
" # aggr.columns=aggr.columns.droplevel()\n",
" meanTagetAggr = np.mean(aggr['mean'].values)\n",
" #aggr=full_df.join(aggr,how='left', on=col)[list(aggr.columns)+target_col]\n",
" full_df['MEAN_BY_'+col] = full_df[[col]+[target_col]].join(aggr,how='left', on=col)[list(aggr.columns)+[target_col]].apply(lambda row: row['mean'] if math.isnan(row[target_col]) else (row['sum']-row[target_col])/(row['size']-1)*random.uniform(0.95, 1.05) , axis=1)\n",
" oneway_cat_aggr_cols.append('MEAN_BY_'+col)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MEAN_BY_age_at_application</th>\n",
" <th>MEAN_BY_residence_rent_or_own</th>\n",
" <th>MEAN_BY_bank_account_direct_deposit</th>\n",
" <th>MEAN_BY_year_of_application</th>\n",
" <th>MEAN_BY_month_of_application</th>\n",
" <th>MEAN_BY_day_of_application</th>\n",
" <th>MEAN_BY_day_of_week</th>\n",
" <th>MEAN_BY_payment_frequency</th>\n",
" <th>MEAN_BY_address_zip</th>\n",
" <th>MEAN_BY_State</th>\n",
" <th>MEAN_BY_more_than_2_phones</th>\n",
" <th>MEAN_BY_routing_symbol</th>\n",
" <th>MEAN_BY_institution</th>\n",
" <th>MEAN_BY_home_phone_type</th>\n",
" <th>MEAN_BY_other_phone_type</th>\n",
" <th>MEAN_BY_how_use_money</th>\n",
" <th>MEAN_BY_FICO_telecom_grade</th>\n",
" <th>MEAN_BY_FICO_retail_grade</th>\n",
" <th>MEAN_BY_FICO_bank_grade</th>\n",
" <th>MEAN_BY_FICO_money_grade</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.514408</td>\n",
" <td>0.480607</td>\n",
" <td>0.557439</td>\n",
" <td>0.552045</td>\n",
" <td>0.470198</td>\n",
" <td>0.628789</td>\n",
" <td>0.569511</td>\n",
" <td>0.417236</td>\n",
" <td>NaN</td>\n",
" <td>0.558668</td>\n",
" <td>0.590247</td>\n",
" <td>0.555412</td>\n",
" <td>NaN</td>\n",
" <td>0.565923</td>\n",
" <td>0.589919</td>\n",
" <td>0.552626</td>\n",
" <td>0.567241</td>\n",
" <td>0.618311</td>\n",
" <td>0.539634</td>\n",
" <td>0.592753</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.399595</td>\n",
" <td>0.471629</td>\n",
" <td>0.531704</td>\n",
" <td>0.586804</td>\n",
" <td>0.533293</td>\n",
" <td>0.386507</td>\n",
" <td>0.487430</td>\n",
" <td>0.621101</td>\n",
" <td>0.954736</td>\n",
" <td>0.592603</td>\n",
" <td>0.601562</td>\n",
" <td>0.575870</td>\n",
" <td>0.662334</td>\n",
" <td>0.494463</td>\n",
" <td>0.637252</td>\n",
" <td>0.560027</td>\n",
" <td>0.535656</td>\n",
" <td>0.412217</td>\n",
" <td>0.553798</td>\n",
" <td>0.300362</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.600451</td>\n",
" <td>0.558710</td>\n",
" <td>0.528031</td>\n",
" <td>0.533861</td>\n",
" <td>0.474686</td>\n",
" <td>0.695447</td>\n",
" <td>0.565312</td>\n",
" <td>0.623777</td>\n",
" <td>0.452136</td>\n",
" <td>0.584222</td>\n",
" <td>0.524641</td>\n",
" <td>0.425068</td>\n",
" <td>0.411461</td>\n",
" <td>0.540582</td>\n",
" <td>NaN</td>\n",
" <td>0.478960</td>\n",
" <td>0.555833</td>\n",
" <td>0.450497</td>\n",
" <td>0.527799</td>\n",
" <td>0.613247</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.621116</td>\n",
" <td>0.560215</td>\n",
" <td>0.553142</td>\n",
" <td>0.512958</td>\n",
" <td>0.493165</td>\n",
" <td>0.557131</td>\n",
" <td>0.558673</td>\n",
" <td>0.621989</td>\n",
" <td>0.258950</td>\n",
" <td>0.569920</td>\n",
" <td>0.590752</td>\n",
" <td>0.400652</td>\n",
" <td>NaN</td>\n",
" <td>0.592421</td>\n",
" <td>0.600287</td>\n",
" <td>0.522023</td>\n",
" <td>0.547506</td>\n",
" <td>0.599910</td>\n",
" <td>0.775117</td>\n",
" <td>0.610444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.480745</td>\n",
" <td>0.498787</td>\n",
" <td>0.534709</td>\n",
" <td>0.538767</td>\n",
" <td>0.468859</td>\n",
" <td>0.381862</td>\n",
" <td>0.507195</td>\n",
" <td>0.440845</td>\n",
" <td>NaN</td>\n",
" <td>0.601495</td>\n",
" <td>0.563782</td>\n",
" <td>0.638486</td>\n",
" <td>0.000000</td>\n",
" <td>0.572539</td>\n",
" <td>0.575014</td>\n",
" <td>0.467768</td>\n",
" <td>0.537748</td>\n",
" <td>0.409804</td>\n",
" <td>0.435375</td>\n",
" <td>0.598573</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>0.689922</td>\n",
" <td>0.565133</td>\n",
" <td>0.538604</td>\n",
" <td>0.505781</td>\n",
" <td>0.592780</td>\n",
" <td>0.632656</td>\n",
" <td>0.580893</td>\n",
" <td>0.660443</td>\n",
" <td>0.000000</td>\n",
" <td>0.566118</td>\n",
" <td>0.565535</td>\n",
" <td>0.560903</td>\n",
" <td>0.577396</td>\n",
" <td>0.460358</td>\n",
" <td>0.610350</td>\n",
" <td>0.643994</td>\n",
" <td>0.555780</td>\n",
" <td>0.597999</td>\n",
" <td>0.716393</td>\n",
" <td>0.607516</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>0.596039</td>\n",
" <td>0.481462</td>\n",
" <td>0.556548</td>\n",
" <td>0.527089</td>\n",
" <td>0.479543</td>\n",
" <td>0.748344</td>\n",
" <td>0.522008</td>\n",
" <td>0.450600</td>\n",
" <td>0.588842</td>\n",
" <td>0.572307</td>\n",
" <td>0.563785</td>\n",
" <td>0.420617</td>\n",
" <td>0.393934</td>\n",
" <td>0.538669</td>\n",
" <td>0.550049</td>\n",
" <td>0.597296</td>\n",
" <td>0.569755</td>\n",
" <td>0.641614</td>\n",
" <td>0.538617</td>\n",
" <td>0.573143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>0.341107</td>\n",
" <td>0.478082</td>\n",
" <td>0.596506</td>\n",
" <td>0.605590</td>\n",
" <td>0.529023</td>\n",
" <td>0.412390</td>\n",
" <td>0.540931</td>\n",
" <td>0.435653</td>\n",
" <td>0.722505</td>\n",
" <td>0.576835</td>\n",
" <td>0.607650</td>\n",
" <td>0.560163</td>\n",
" <td>0.629736</td>\n",
" <td>0.564795</td>\n",
" <td>0.539674</td>\n",
" <td>0.575041</td>\n",
" <td>0.560729</td>\n",
" <td>0.616100</td>\n",
" <td>0.530884</td>\n",
" <td>0.311064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>0.567303</td>\n",
" <td>0.567633</td>\n",
" <td>0.521030</td>\n",
" <td>0.526159</td>\n",
" <td>0.444900</td>\n",
" <td>0.616942</td>\n",
" <td>0.576096</td>\n",
" <td>0.405253</td>\n",
" <td>0.500642</td>\n",
" <td>0.585887</td>\n",
" <td>0.516612</td>\n",
" <td>0.399882</td>\n",
" <td>0.359226</td>\n",
" <td>0.551036</td>\n",
" <td>NaN</td>\n",
" <td>0.600635</td>\n",
" <td>0.537139</td>\n",
" <td>0.588834</td>\n",
" <td>0.557432</td>\n",
" <td>0.590692</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>0.446034</td>\n",
" <td>0.562476</td>\n",
" <td>0.635182</td>\n",
" <td>0.551405</td>\n",
" <td>0.590705</td>\n",
" <td>0.411524</td>\n",
" <td>0.555246</td>\n",
" <td>0.436040</td>\n",
" <td>0.751080</td>\n",
" <td>0.582929</td>\n",
" <td>0.577663</td>\n",
" <td>0.597556</td>\n",
" <td>0.474470</td>\n",
" <td>0.544683</td>\n",
" <td>0.532315</td>\n",
" <td>0.529764</td>\n",
" <td>0.588623</td>\n",
" <td>0.412665</td>\n",
" <td>0.560242</td>\n",
" <td>0.610674</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>0.718677</td>\n",
" <td>0.554815</td>\n",
" <td>0.547714</td>\n",
" <td>0.593125</td>\n",
" <td>0.575141</td>\n",
" <td>0.515210</td>\n",
" <td>0.522176</td>\n",
" <td>0.819258</td>\n",
" <td>0.961284</td>\n",
" <td>0.557430</td>\n",
" <td>0.571925</td>\n",
" <td>0.709487</td>\n",
" <td>0.975306</td>\n",
" <td>0.493997</td>\n",
" <td>0.629467</td>\n",
" <td>0.548483</td>\n",
" <td>0.555668</td>\n",
" <td>0.421022</td>\n",
" <td>0.411439</td>\n",
" <td>0.304808</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>0.505640</td>\n",
" <td>0.480934</td>\n",
" <td>0.630337</td>\n",
" <td>0.532345</td>\n",
" <td>0.653475</td>\n",
" <td>0.636792</td>\n",
" <td>0.552102</td>\n",
" <td>0.465760</td>\n",
" <td>0.000000</td>\n",
" <td>0.593045</td>\n",
" <td>0.488866</td>\n",
" <td>0.519531</td>\n",
" <td>0.693718</td>\n",
" <td>0.460013</td>\n",
" <td>NaN</td>\n",
" <td>0.712129</td>\n",
" <td>0.536216</td>\n",
" <td>0.433989</td>\n",
" <td>0.522623</td>\n",
" <td>0.599720</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>0.491589</td>\n",
" <td>0.562877</td>\n",
" <td>0.665407</td>\n",
" <td>0.548860</td>\n",
" <td>0.512891</td>\n",
" <td>0.463449</td>\n",
" <td>0.527565</td>\n",
" <td>0.439071</td>\n",
" <td>NaN</td>\n",
" <td>0.540502</td>\n",
" <td>0.594014</td>\n",
" <td>0.511023</td>\n",
" <td>NaN</td>\n",
" <td>0.459673</td>\n",
" <td>0.611954</td>\n",
" <td>0.477853</td>\n",
" <td>0.587451</td>\n",
" <td>0.622980</td>\n",
" <td>0.572420</td>\n",
" <td>0.570228</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.662177</td>\n",
" <td>0.596797</td>\n",
" <td>0.553217</td>\n",
" <td>0.608458</td>\n",
" <td>0.675574</td>\n",
" <td>0.416718</td>\n",
" <td>0.511088</td>\n",
" <td>0.613518</td>\n",
" <td>0.000000</td>\n",
" <td>0.577043</td>\n",
" <td>0.492403</td>\n",
" <td>0.636578</td>\n",
" <td>0.563900</td>\n",
" <td>0.592165</td>\n",
" <td>NaN</td>\n",
" <td>0.566640</td>\n",
" <td>0.555148</td>\n",
" <td>0.628035</td>\n",
" <td>0.543847</td>\n",
" <td>0.555817</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>0.435939</td>\n",
" <td>0.574792</td>\n",
" <td>0.525004</td>\n",
" <td>0.593447</td>\n",
" <td>0.571646</td>\n",
" <td>0.771404</td>\n",
" <td>0.579338</td>\n",
" <td>0.411885</td>\n",
" <td>0.000000</td>\n",
" <td>0.582626</td>\n",
" <td>0.570385</td>\n",
" <td>0.593333</td>\n",
" <td>0.448368</td>\n",
" <td>0.544599</td>\n",
" <td>0.579861</td>\n",
" <td>0.585532</td>\n",
" <td>0.576668</td>\n",
" <td>0.582027</td>\n",
" <td>0.565928</td>\n",
" <td>0.563533</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>0.385495</td>\n",
" <td>0.562662</td>\n",
" <td>0.635971</td>\n",
" <td>0.525332</td>\n",
" <td>0.592978</td>\n",
" <td>0.378778</td>\n",
" <td>0.556035</td>\n",
" <td>0.639218</td>\n",
" <td>0.598988</td>\n",
" <td>0.576741</td>\n",
" <td>0.574569</td>\n",
" <td>0.344212</td>\n",
" <td>0.320037</td>\n",
" <td>0.553074</td>\n",
" <td>0.608352</td>\n",
" <td>0.566250</td>\n",
" <td>0.584040</td>\n",
" <td>0.445670</td>\n",
" <td>0.529391</td>\n",
" <td>0.594761</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>0.000000</td>\n",
" <td>0.495067</td>\n",
" <td>0.534000</td>\n",
" <td>0.546389</td>\n",
" <td>0.486124</td>\n",
" <td>0.654226</td>\n",
" <td>0.528173</td>\n",
" <td>0.445896</td>\n",
" <td>0.780894</td>\n",
" <td>0.513742</td>\n",
" <td>0.524364</td>\n",
" <td>0.525246</td>\n",
" <td>NaN</td>\n",
" <td>0.559541</td>\n",
" <td>NaN</td>\n",
" <td>0.468477</td>\n",
" <td>0.554427</td>\n",
" <td>0.418101</td>\n",
" <td>0.567481</td>\n",
" <td>0.309794</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>0.544643</td>\n",
" <td>0.590473</td>\n",
" <td>0.551103</td>\n",
" <td>0.538717</td>\n",
" <td>0.629473</td>\n",
" <td>0.353057</td>\n",
" <td>0.510359</td>\n",
" <td>0.457843</td>\n",
" <td>0.504682</td>\n",
" <td>0.501220</td>\n",
" <td>0.602103</td>\n",
" <td>0.475611</td>\n",
" <td>1.021742</td>\n",
" <td>0.471475</td>\n",
" <td>0.617559</td>\n",
" <td>0.554895</td>\n",
" <td>0.548434</td>\n",
" <td>0.633648</td>\n",
" <td>0.538408</td>\n",
" <td>0.594591</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NaN</td>\n",
" <td>0.570495</td>\n",
" <td>0.551690</td>\n",
" <td>0.526175</td>\n",
" <td>0.585401</td>\n",
" <td>0.669037</td>\n",
" <td>0.517079</td>\n",
" <td>0.430175</td>\n",
" <td>0.499156</td>\n",
" <td>0.542228</td>\n",
" <td>0.497908</td>\n",
" <td>0.552060</td>\n",
" <td>0.574923</td>\n",
" <td>0.480287</td>\n",
" <td>NaN</td>\n",
" <td>0.601119</td>\n",
" <td>0.562567</td>\n",
" <td>0.605356</td>\n",
" <td>0.760283</td>\n",
" <td>0.586534</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>0.442071</td>\n",
" <td>0.552913</td>\n",
" <td>0.537860</td>\n",
" <td>0.509681</td>\n",
" <td>0.564043</td>\n",
" <td>0.562240</td>\n",
" <td>0.521888</td>\n",
" <td>0.441127</td>\n",
" <td>0.482127</td>\n",
" <td>0.463432</td>\n",
" <td>0.495090</td>\n",
" <td>0.457132</td>\n",
" <td>0.425526</td>\n",
" <td>0.542427</td>\n",
" <td>NaN</td>\n",
" <td>0.546786</td>\n",
" <td>0.570758</td>\n",
" <td>0.410279</td>\n",
" <td>0.390257</td>\n",
" <td>0.600965</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>0.705710</td>\n",
" <td>0.583758</td>\n",
" <td>0.535857</td>\n",
" <td>0.602523</td>\n",
" <td>0.542614</td>\n",
" <td>0.403363</td>\n",
" <td>0.569330</td>\n",
" <td>0.602155</td>\n",
" <td>0.504850</td>\n",
" <td>0.577456</td>\n",
" <td>0.577150</td>\n",
" <td>0.566408</td>\n",
" <td>0.623221</td>\n",
" <td>0.544153</td>\n",
" <td>0.535300</td>\n",
" <td>0.570590</td>\n",
" <td>0.564524</td>\n",
" <td>0.440377</td>\n",
" <td>0.407099</td>\n",
" <td>0.595441</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>0.492529</td>\n",
" <td>0.574036</td>\n",
" <td>0.557160</td>\n",
" <td>0.583704</td>\n",
" <td>0.691635</td>\n",
" <td>0.740104</td>\n",
" <td>0.532544</td>\n",
" <td>0.465642</td>\n",
" <td>0.440931</td>\n",
" <td>0.551528</td>\n",
" <td>0.608329</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.492788</td>\n",
" <td>0.613752</td>\n",
" <td>0.567690</td>\n",
" <td>0.572480</td>\n",
" <td>0.640679</td>\n",
" <td>0.540953</td>\n",
" <td>0.558285</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>0.594290</td>\n",
" <td>0.576684</td>\n",
" <td>0.513610</td>\n",
" <td>0.507039</td>\n",
" <td>0.445850</td>\n",
" <td>0.519270</td>\n",
" <td>0.538449</td>\n",
" <td>0.601308</td>\n",
" <td>0.651846</td>\n",
" <td>0.557393</td>\n",
" <td>0.586974</td>\n",
" <td>0.689011</td>\n",
" <td>0.825342</td>\n",
" <td>0.491692</td>\n",
" <td>0.601148</td>\n",
" <td>0.550004</td>\n",
" <td>0.384344</td>\n",
" <td>0.433631</td>\n",
" <td>0.377150</td>\n",
" <td>0.557978</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>0.479365</td>\n",
" <td>0.558846</td>\n",
" <td>0.549372</td>\n",
" <td>0.632456</td>\n",
" <td>0.549878</td>\n",
" <td>0.628234</td>\n",
" <td>0.503599</td>\n",
" <td>0.447530</td>\n",
" <td>0.638196</td>\n",
" <td>0.583670</td>\n",
" <td>0.499517</td>\n",
" <td>0.489131</td>\n",
" <td>0.466024</td>\n",
" <td>0.540318</td>\n",
" <td>NaN</td>\n",
" <td>0.548223</td>\n",
" <td>0.551038</td>\n",
" <td>0.631408</td>\n",
" <td>0.523355</td>\n",
" <td>0.599886</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>0.380163</td>\n",
" <td>0.593435</td>\n",
" <td>0.549068</td>\n",
" <td>0.550198</td>\n",
" <td>0.629128</td>\n",
" <td>0.583356</td>\n",
" <td>0.534258</td>\n",
" <td>0.638170</td>\n",
" <td>1.007131</td>\n",
" <td>0.476231</td>\n",
" <td>0.570582</td>\n",
" <td>0.560521</td>\n",
" <td>0.579371</td>\n",
" <td>0.591506</td>\n",
" <td>0.592371</td>\n",
" <td>0.655137</td>\n",
" <td>0.586518</td>\n",
" <td>0.583810</td>\n",
" <td>0.557405</td>\n",
" <td>0.561100</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>0.488809</td>\n",
" <td>0.586816</td>\n",
" <td>0.532223</td>\n",
" <td>0.618709</td>\n",
" <td>0.566640</td>\n",
" <td>0.419143</td>\n",
" <td>0.590778</td>\n",
" <td>0.817721</td>\n",
" <td>0.482605</td>\n",
" <td>0.555521</td>\n",
" <td>0.589153</td>\n",
" <td>0.497214</td>\n",
" <td>0.421516</td>\n",
" <td>0.585126</td>\n",
" <td>0.559047</td>\n",
" <td>0.590415</td>\n",
" <td>0.550432</td>\n",
" <td>0.614652</td>\n",
" <td>0.567158</td>\n",
" <td>0.594082</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>0.566315</td>\n",
" <td>0.585008</td>\n",
" <td>0.525041</td>\n",
" <td>0.528135</td>\n",
" <td>0.607601</td>\n",
" <td>0.416148</td>\n",
" <td>0.541798</td>\n",
" <td>0.649801</td>\n",
" <td>0.392664</td>\n",
" <td>0.487621</td>\n",
" <td>0.579807</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>0.571235</td>\n",
" <td>0.595780</td>\n",
" <td>0.547550</td>\n",
" <td>0.574565</td>\n",
" <td>0.587794</td>\n",
" <td>0.539674</td>\n",
" <td>0.584159</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>0.715942</td>\n",
" <td>0.580126</td>\n",
" <td>0.619733</td>\n",
" <td>0.614030</td>\n",
" <td>0.640734</td>\n",
" <td>0.503685</td>\n",
" <td>0.623574</td>\n",
" <td>0.434004</td>\n",
" <td>0.900800</td>\n",
" <td>0.537346</td>\n",
" <td>0.507690</td>\n",
" <td>1.043889</td>\n",
" <td>0.966189</td>\n",
" <td>0.537135</td>\n",
" <td>NaN</td>\n",
" <td>0.550534</td>\n",
" <td>0.579043</td>\n",
" <td>0.606582</td>\n",
" <td>0.532812</td>\n",
" <td>0.613500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>0.857092</td>\n",
" <td>0.556828</td>\n",
" <td>0.517209</td>\n",
" <td>0.541466</td>\n",
" <td>0.492155</td>\n",
" <td>0.483721</td>\n",
" <td>0.525214</td>\n",
" <td>0.422937</td>\n",
" <td>0.744138</td>\n",
" <td>0.586310</td>\n",
" <td>0.595786</td>\n",
" <td>0.604487</td>\n",
" <td>0.607578</td>\n",
" <td>0.581054</td>\n",
" <td>0.576121</td>\n",
" <td>0.593059</td>\n",
" <td>0.537032</td>\n",
" <td>0.414923</td>\n",
" <td>0.544408</td>\n",
" <td>0.567776</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>0.604164</td>\n",
" <td>0.492078</td>\n",
" <td>0.613843</td>\n",
" <td>0.542200</td>\n",
" <td>0.458047</td>\n",
" <td>0.414791</td>\n",
" <td>0.574064</td>\n",
" <td>0.431095</td>\n",
" <td>NaN</td>\n",
" <td>0.483030</td>\n",
" <td>0.535320</td>\n",
" <td>0.509116</td>\n",
" <td>NaN</td>\n",
" <td>0.545880</td>\n",
" <td>NaN</td>\n",
" <td>0.526714</td>\n",
" <td>0.547180</td>\n",
" <td>0.415401</td>\n",
" <td>0.529267</td>\n",
" <td>0.575914</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613</th>\n",
" <td>0.489738</td>\n",
" <td>0.607306</td>\n",
" <td>0.526408</td>\n",
" <td>0.510586</td>\n",
" <td>0.486561</td>\n",
" <td>0.466992</td>\n",
" <td>0.532574</td>\n",
" <td>0.465821</td>\n",
" <td>0.658530</td>\n",
" <td>0.516283</td>\n",
" <td>0.531519</td>\n",
" <td>0.333474</td>\n",
" <td>0.318690</td>\n",
" <td>0.484521</td>\n",
" <td>NaN</td>\n",
" <td>0.476779</td>\n",
" <td>0.591249</td>\n",
" <td>0.412393</td>\n",
" <td>0.565121</td>\n",
" <td>0.559084</td>\n",
" </tr>\n",
" <tr>\n",
" <th>614</th>\n",
" <td>0.499932</td>\n",
" <td>0.480113</td>\n",
" <td>0.521480</td>\n",
" <td>0.545987</td>\n",
" <td>0.562860</td>\n",
" <td>0.673133</td>\n",
" <td>0.609090</td>\n",
" <td>0.428061</td>\n",
" <td>0.451645</td>\n",
" <td>0.540044</td>\n",
" <td>0.565831</td>\n",
" <td>0.568201</td>\n",
" <td>0.597024</td>\n",
" <td>0.548924</td>\n",
" <td>0.560376</td>\n",
" <td>0.198304</td>\n",
" <td>0.441306</td>\n",
" <td>0.452562</td>\n",
" <td>0.408135</td>\n",
" <td>0.305424</td>\n",
" </tr>\n",
" <tr>\n",
" <th>615</th>\n",
" <td>0.639261</td>\n",
" <td>0.569018</td>\n",
" <td>0.523072</td>\n",
" <td>0.518019</td>\n",
" <td>0.481627</td>\n",
" <td>0.477079</td>\n",
" <td>0.652049</td>\n",
" <td>0.647790</td>\n",
" <td>0.967413</td>\n",
" <td>0.571714</td>\n",
" <td>0.561509</td>\n",
" <td>0.558305</td>\n",
" <td>0.676916</td>\n",
" <td>0.540087</td>\n",
" <td>0.582379</td>\n",
" <td>0.586912</td>\n",
" <td>0.540434</td>\n",
" <td>0.422486</td>\n",
" <td>0.529921</td>\n",
" <td>0.561793</td>\n",
" </tr>\n",
" <tr>\n",
" <th>616</th>\n",
" <td>0.647552</td>\n",
" <td>0.606052</td>\n",
" <td>0.522578</td>\n",
" <td>0.514015</td>\n",
" <td>0.451750</td>\n",
" <td>0.644271</td>\n",
" <td>0.544792</td>\n",
" <td>0.619109</td>\n",
" <td>NaN</td>\n",
" <td>0.585580</td>\n",
" <td>0.580609</td>\n",
" <td>0.573894</td>\n",
" <td>0.611168</td>\n",
" <td>0.588212</td>\n",
" <td>0.586592</td>\n",
" <td>0.444821</td>\n",
" <td>0.549915</td>\n",
" <td>0.599909</td>\n",
" <td>0.729253</td>\n",
" <td>0.601741</td>\n",
" </tr>\n",
" <tr>\n",
" <th>617</th>\n",
" <td>0.396613</td>\n",
" <td>0.495291</td>\n",
" <td>0.552300</td>\n",
" <td>0.538635</td>\n",
" <td>0.476567</td>\n",
" <td>0.489383</td>\n",
" <td>0.496252</td>\n",
" <td>0.608552</td>\n",
" <td>0.558447</td>\n",
" <td>0.555392</td>\n",
" <td>0.559328</td>\n",
" <td>0.548135</td>\n",
" <td>0.448466</td>\n",
" <td>0.541644</td>\n",
" <td>0.535462</td>\n",
" <td>0.574870</td>\n",
" <td>0.565676</td>\n",
" <td>0.604189</td>\n",
" <td>0.544326</td>\n",
" <td>0.601249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>618</th>\n",
" <td>0.442142</td>\n",
" <td>0.510878</td>\n",
" <td>0.559499</td>\n",
" <td>0.553444</td>\n",
" <td>0.464544</td>\n",
" <td>0.405325</td>\n",
" <td>0.495378</td>\n",
" <td>0.441151</td>\n",
" <td>NaN</td>\n",
" <td>0.572182</td>\n",
" <td>0.569837</td>\n",
" <td>0.523172</td>\n",
" <td>NaN</td>\n",
" <td>0.544301</td>\n",
" <td>0.600027</td>\n",
" <td>0.586354</td>\n",
" <td>0.563971</td>\n",
" <td>0.593425</td>\n",
" <td>0.555855</td>\n",
" <td>0.582305</td>\n",
" </tr>\n",
" <tr>\n",
" <th>619</th>\n",
" <td>0.560929</td>\n",
" <td>0.482807</td>\n",
" <td>0.514957</td>\n",
" <td>0.528409</td>\n",
" <td>0.451145</td>\n",
" <td>0.476352</td>\n",
" <td>0.528477</td>\n",
" <td>0.424144</td>\n",
" <td>0.261048</td>\n",
" <td>0.554442</td>\n",
" <td>0.501135</td>\n",
" <td>0.415267</td>\n",
" <td>0.000000</td>\n",
" <td>0.556455</td>\n",
" <td>NaN</td>\n",
" <td>0.492099</td>\n",
" <td>0.580763</td>\n",
" <td>0.621656</td>\n",
" <td>0.523891</td>\n",
" <td>0.615307</td>\n",
" </tr>\n",
" <tr>\n",
" <th>620</th>\n",
" <td>0.524681</td>\n",
" <td>0.588951</td>\n",
" <td>0.525971</td>\n",
" <td>0.599641</td>\n",
" <td>0.636821</td>\n",
" <td>0.630957</td>\n",
" <td>0.483778</td>\n",
" <td>0.609505</td>\n",
" <td>0.706871</td>\n",
" <td>0.569021</td>\n",
" <td>0.554770</td>\n",
" <td>0.388871</td>\n",
" <td>NaN</td>\n",
" <td>0.546556</td>\n",
" <td>0.520093</td>\n",
" <td>0.601826</td>\n",
" <td>0.559367</td>\n",
" <td>0.613836</td>\n",
" <td>0.746185</td>\n",
" <td>0.587612</td>\n",
" </tr>\n",
" <tr>\n",
" <th>621</th>\n",
" <td>0.409775</td>\n",
" <td>0.493831</td>\n",
" <td>0.665191</td>\n",
" <td>0.634709</td>\n",
" <td>0.564583</td>\n",
" <td>0.510501</td>\n",
" <td>0.582646</td>\n",
" <td>0.435809</td>\n",
" <td>0.496392</td>\n",
" <td>0.555776</td>\n",
" <td>0.528116</td>\n",
" <td>0.576457</td>\n",
" <td>0.475315</td>\n",
" <td>0.539152</td>\n",
" <td>NaN</td>\n",
" <td>0.604688</td>\n",
" <td>0.588426</td>\n",
" <td>0.417169</td>\n",
" <td>0.415797</td>\n",
" <td>0.300263</td>\n",
" </tr>\n",
" <tr>\n",
" <th>622</th>\n",
" <td>0.581404</td>\n",
" <td>0.474697</td>\n",
" <td>0.524079</td>\n",
" <td>0.533822</td>\n",
" <td>0.635228</td>\n",
" <td>0.591886</td>\n",
" <td>0.603514</td>\n",
" <td>0.420264</td>\n",
" <td>NaN</td>\n",
" <td>0.551805</td>\n",
" <td>0.504692</td>\n",
" <td>0.558201</td>\n",
" <td>0.638536</td>\n",
" <td>0.546526</td>\n",
" <td>NaN</td>\n",
" <td>0.598287</td>\n",
" <td>0.537669</td>\n",
" <td>0.410538</td>\n",
" <td>0.403266</td>\n",
" <td>0.614773</td>\n",
" </tr>\n",
" <tr>\n",
" <th>623</th>\n",
" <td>0.638957</td>\n",
" <td>0.579473</td>\n",
" <td>0.510750</td>\n",
" <td>0.558146</td>\n",
" <td>0.443033</td>\n",
" <td>0.638644</td>\n",
" <td>0.578666</td>\n",
" <td>0.400220</td>\n",
" <td>0.338235</td>\n",
" <td>0.501817</td>\n",
" <td>0.595843</td>\n",
" <td>0.462059</td>\n",
" <td>0.414028</td>\n",
" <td>0.591599</td>\n",
" <td>0.600706</td>\n",
" <td>0.462593</td>\n",
" <td>0.557431</td>\n",
" <td>0.415880</td>\n",
" <td>0.410764</td>\n",
" <td>0.577759</td>\n",
" </tr>\n",
" <tr>\n",
" <th>624</th>\n",
" <td>0.617777</td>\n",
" <td>0.567094</td>\n",
" <td>0.557383</td>\n",
" <td>0.550777</td>\n",
" <td>0.434485</td>\n",
" <td>0.512607</td>\n",
" <td>0.494634</td>\n",
" <td>0.441296</td>\n",
" <td>0.334417</td>\n",
" <td>0.557469</td>\n",
" <td>0.605213</td>\n",
" <td>0.586361</td>\n",
" <td>0.617611</td>\n",
" <td>0.453971</td>\n",
" <td>0.552165</td>\n",
" <td>0.573536</td>\n",
" <td>0.536661</td>\n",
" <td>0.445189</td>\n",
" <td>0.518879</td>\n",
" <td>0.589612</td>\n",
" </tr>\n",
" <tr>\n",
" <th>625</th>\n",
" <td>0.726766</td>\n",
" <td>0.599458</td>\n",
" <td>0.520104</td>\n",
" <td>0.635877</td>\n",
" <td>0.667738</td>\n",
" <td>0.696782</td>\n",
" <td>0.555221</td>\n",
" <td>0.625752</td>\n",
" <td>0.672060</td>\n",
" <td>0.552562</td>\n",
" <td>0.578641</td>\n",
" <td>0.857838</td>\n",
" <td>0.852483</td>\n",
" <td>0.545230</td>\n",
" <td>0.598777</td>\n",
" <td>0.564248</td>\n",
" <td>0.553893</td>\n",
" <td>0.631958</td>\n",
" <td>0.562895</td>\n",
" <td>0.569576</td>\n",
" </tr>\n",
" <tr>\n",
" <th>626</th>\n",
" <td>0.368907</td>\n",
" <td>0.600838</td>\n",
" <td>0.549067</td>\n",
" <td>0.519785</td>\n",
" <td>0.455055</td>\n",
" <td>0.583306</td>\n",
" <td>0.491012</td>\n",
" <td>0.644441</td>\n",
" <td>0.854962</td>\n",
" <td>0.544977</td>\n",
" <td>0.553244</td>\n",
" <td>0.586986</td>\n",
" <td>0.649442</td>\n",
" <td>0.588366</td>\n",
" <td>0.606696</td>\n",
" <td>0.439303</td>\n",
" <td>0.588955</td>\n",
" <td>0.629191</td>\n",
" <td>0.550540</td>\n",
" <td>0.588179</td>\n",
" </tr>\n",
" <tr>\n",
" <th>627</th>\n",
" <td>0.393864</td>\n",
" <td>0.577731</td>\n",
" <td>0.619000</td>\n",
" <td>0.528406</td>\n",
" <td>0.637928</td>\n",
" <td>0.728211</td>\n",
" <td>0.500405</td>\n",
" <td>0.412765</td>\n",
" <td>0.684010</td>\n",
" <td>0.566216</td>\n",
" <td>0.560937</td>\n",
" <td>0.414482</td>\n",
" <td>0.418930</td>\n",
" <td>0.544376</td>\n",
" <td>0.577874</td>\n",
" <td>0.656223</td>\n",
" <td>0.439602</td>\n",
" <td>0.449674</td>\n",
" <td>0.396117</td>\n",
" <td>0.320492</td>\n",
" </tr>\n",
" <tr>\n",
" <th>628</th>\n",
" <td>0.733891</td>\n",
" <td>0.569683</td>\n",
" <td>0.633168</td>\n",
" <td>0.539526</td>\n",
" <td>0.503580</td>\n",
" <td>0.634131</td>\n",
" <td>0.488279</td>\n",
" <td>0.454340</td>\n",
" <td>0.623472</td>\n",
" <td>0.489291</td>\n",
" <td>0.573058</td>\n",
" <td>0.525830</td>\n",
" <td>0.361336</td>\n",
" <td>0.578805</td>\n",
" <td>0.564222</td>\n",
" <td>0.572273</td>\n",
" <td>0.564077</td>\n",
" <td>0.630056</td>\n",
" <td>0.750471</td>\n",
" <td>0.586600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>629</th>\n",
" <td>0.429432</td>\n",
" <td>0.507270</td>\n",
" <td>0.508477</td>\n",
" <td>0.535929</td>\n",
" <td>0.606834</td>\n",
" <td>0.537523</td>\n",
" <td>0.540221</td>\n",
" <td>0.428355</td>\n",
" <td>0.963309</td>\n",
" <td>0.573690</td>\n",
" <td>0.590146</td>\n",
" <td>0.397022</td>\n",
" <td>0.394955</td>\n",
" <td>0.636669</td>\n",
" <td>0.615851</td>\n",
" <td>0.499661</td>\n",
" <td>0.590612</td>\n",
" <td>0.603043</td>\n",
" <td>0.528987</td>\n",
" <td>0.576071</td>\n",
" </tr>\n",
" <tr>\n",
" <th>630</th>\n",
" <td>0.643677</td>\n",
" <td>0.600904</td>\n",
" <td>0.552133</td>\n",
" <td>0.552062</td>\n",
" <td>0.562155</td>\n",
" <td>0.636712</td>\n",
" <td>0.577490</td>\n",
" <td>0.636691</td>\n",
" <td>NaN</td>\n",
" <td>0.539733</td>\n",
" <td>0.590087</td>\n",
" <td>0.998130</td>\n",
" <td>0.977529</td>\n",
" <td>0.579997</td>\n",
" <td>0.588206</td>\n",
" <td>0.575473</td>\n",
" <td>0.550830</td>\n",
" <td>0.425518</td>\n",
" <td>0.567338</td>\n",
" <td>0.579465</td>\n",
" </tr>\n",
" <tr>\n",
" <th>631</th>\n",
" <td>0.685566</td>\n",
" <td>0.564104</td>\n",
" <td>0.525149</td>\n",
" <td>0.532051</td>\n",
" <td>0.448672</td>\n",
" <td>0.729569</td>\n",
" <td>0.608351</td>\n",
" <td>0.409222</td>\n",
" <td>0.824516</td>\n",
" <td>0.567209</td>\n",
" <td>0.527055</td>\n",
" <td>0.675554</td>\n",
" <td>0.534375</td>\n",
" <td>0.577602</td>\n",
" <td>NaN</td>\n",
" <td>0.598330</td>\n",
" <td>0.548583</td>\n",
" <td>0.428353</td>\n",
" <td>0.553263</td>\n",
" <td>0.311792</td>\n",
" </tr>\n",
" <tr>\n",
" <th>632</th>\n",
" <td>0.835372</td>\n",
" <td>0.484628</td>\n",
" <td>0.548178</td>\n",
" <td>0.508186</td>\n",
" <td>0.553379</td>\n",
" <td>0.544059</td>\n",
" <td>0.592746</td>\n",
" <td>0.652745</td>\n",
" <td>0.480330</td>\n",
" <td>0.578384</td>\n",
" <td>0.579015</td>\n",
" <td>0.548860</td>\n",
" <td>0.458966</td>\n",
" <td>0.581298</td>\n",
" <td>0.593308</td>\n",
" <td>0.560393</td>\n",
" <td>0.567801</td>\n",
" <td>0.611434</td>\n",
" <td>0.537901</td>\n",
" <td>0.599544</td>\n",
" </tr>\n",
" <tr>\n",
" <th>633</th>\n",
" <td>0.444968</td>\n",
" <td>0.503748</td>\n",
" <td>0.664022</td>\n",
" <td>0.528182</td>\n",
" <td>0.510456</td>\n",
" <td>0.511410</td>\n",
" <td>0.575047</td>\n",
" <td>0.429100</td>\n",
" <td>NaN</td>\n",
" <td>0.516718</td>\n",
" <td>0.598426</td>\n",
" <td>0.501549</td>\n",
" <td>0.500581</td>\n",
" <td>0.575729</td>\n",
" <td>0.561384</td>\n",
" <td>0.569385</td>\n",
" <td>0.551586</td>\n",
" <td>0.605934</td>\n",
" <td>0.565047</td>\n",
" <td>0.612339</td>\n",
" </tr>\n",
" <tr>\n",
" <th>634</th>\n",
" <td>0.566937</td>\n",
" <td>0.569804</td>\n",
" <td>0.550783</td>\n",
" <td>0.552824</td>\n",
" <td>0.445611</td>\n",
" <td>0.636642</td>\n",
" <td>0.522260</td>\n",
" <td>0.426933</td>\n",
" <td>0.600997</td>\n",
" <td>0.484959</td>\n",
" <td>0.537635</td>\n",
" <td>0.550702</td>\n",
" <td>0.445528</td>\n",
" <td>0.502752</td>\n",
" <td>NaN</td>\n",
" <td>0.470227</td>\n",
" <td>0.550655</td>\n",
" <td>0.628123</td>\n",
" <td>0.565608</td>\n",
" <td>0.558727</td>\n",
" </tr>\n",
" <tr>\n",
" <th>635</th>\n",
" <td>0.418039</td>\n",
" <td>0.473949</td>\n",
" <td>0.556652</td>\n",
" <td>0.512724</td>\n",
" <td>0.479377</td>\n",
" <td>0.572998</td>\n",
" <td>0.540253</td>\n",
" <td>0.422829</td>\n",
" <td>0.396142</td>\n",
" <td>0.563431</td>\n",
" <td>0.580206</td>\n",
" <td>0.487207</td>\n",
" <td>0.487191</td>\n",
" <td>0.545337</td>\n",
" <td>0.539735</td>\n",
" <td>0.584241</td>\n",
" <td>0.567713</td>\n",
" <td>0.609148</td>\n",
" <td>0.549375</td>\n",
" <td>0.604597</td>\n",
" </tr>\n",
" <tr>\n",
" <th>636</th>\n",
" <td>0.945199</td>\n",
" <td>0.499975</td>\n",
" <td>0.520603</td>\n",
" <td>0.540285</td>\n",
" <td>0.471651</td>\n",
" <td>0.494162</td>\n",
" <td>0.560343</td>\n",
" <td>0.607832</td>\n",
" <td>NaN</td>\n",
" <td>0.555692</td>\n",
" <td>0.511512</td>\n",
" <td>0.555793</td>\n",
" <td>0.000000</td>\n",
" <td>0.549386</td>\n",
" <td>NaN</td>\n",
" <td>0.527056</td>\n",
" <td>0.418480</td>\n",
" <td>0.275811</td>\n",
" <td>0.427600</td>\n",
" <td>0.323582</td>\n",
" </tr>\n",
" <tr>\n",
" <th>637</th>\n",
" <td>0.409891</td>\n",
" <td>0.466968</td>\n",
" <td>0.521661</td>\n",
" <td>0.538057</td>\n",
" <td>0.457353</td>\n",
" <td>0.379463</td>\n",
" <td>0.531872</td>\n",
" <td>0.426646</td>\n",
" <td>NaN</td>\n",
" <td>0.565371</td>\n",
" <td>0.590817</td>\n",
" <td>0.694911</td>\n",
" <td>1.000346</td>\n",
" <td>0.490250</td>\n",
" <td>0.588388</td>\n",
" <td>0.195205</td>\n",
" <td>0.564184</td>\n",
" <td>0.586099</td>\n",
" <td>0.535352</td>\n",
" <td>0.582300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>638</th>\n",
" <td>0.412502</td>\n",
" <td>0.469243</td>\n",
" <td>0.515885</td>\n",
" <td>0.520676</td>\n",
" <td>0.599277</td>\n",
" <td>0.486696</td>\n",
" <td>0.527526</td>\n",
" <td>0.659744</td>\n",
" <td>0.604016</td>\n",
" <td>0.471002</td>\n",
" <td>0.556931</td>\n",
" <td>0.517694</td>\n",
" <td>NaN</td>\n",
" <td>0.542737</td>\n",
" <td>0.571940</td>\n",
" <td>0.603734</td>\n",
" <td>0.581519</td>\n",
" <td>0.585005</td>\n",
" <td>0.535068</td>\n",
" <td>0.577468</td>\n",
" </tr>\n",
" <tr>\n",
" <th>639</th>\n",
" <td>0.607184</td>\n",
" <td>0.508483</td>\n",
" <td>0.535171</td>\n",
" <td>0.521519</td>\n",
" <td>0.598296</td>\n",
" <td>0.434427</td>\n",
" <td>0.490729</td>\n",
" <td>0.659464</td>\n",
" <td>1.047342</td>\n",
" <td>0.540837</td>\n",
" <td>0.512710</td>\n",
" <td>0.587504</td>\n",
" <td>0.615949</td>\n",
" <td>0.563711</td>\n",
" <td>NaN</td>\n",
" <td>0.564084</td>\n",
" <td>0.567587</td>\n",
" <td>0.605438</td>\n",
" <td>0.535939</td>\n",
" <td>0.285601</td>\n",
" </tr>\n",
" <tr>\n",
" <th>640</th>\n",
" <td>0.543586</td>\n",
" <td>0.468032</td>\n",
" <td>0.513752</td>\n",
" <td>0.543056</td>\n",
" <td>0.434559</td>\n",
" <td>0.610012</td>\n",
" <td>0.619537</td>\n",
" <td>0.400926</td>\n",
" <td>0.000000</td>\n",
" <td>0.571274</td>\n",
" <td>0.568197</td>\n",
" <td>0.664080</td>\n",
" <td>0.819278</td>\n",
" <td>0.578948</td>\n",
" <td>0.583322</td>\n",
" <td>0.542119</td>\n",
" <td>0.573814</td>\n",
" <td>0.604734</td>\n",
" <td>0.546835</td>\n",
" <td>0.599933</td>\n",
" </tr>\n",
" <tr>\n",
" <th>641</th>\n",
" <td>0.686936</td>\n",
" <td>0.499588</td>\n",
" <td>0.606866</td>\n",
" <td>0.522949</td>\n",
" <td>0.485452</td>\n",
" <td>0.746731</td>\n",
" <td>0.513659</td>\n",
" <td>0.654159</td>\n",
" <td>0.336987</td>\n",
" <td>0.586772</td>\n",
" <td>0.523272</td>\n",
" <td>0.592889</td>\n",
" <td>0.572729</td>\n",
" <td>0.566553</td>\n",
" <td>NaN</td>\n",
" <td>0.433131</td>\n",
" <td>0.549640</td>\n",
" <td>0.414026</td>\n",
" <td>0.518199</td>\n",
" <td>0.587879</td>\n",
" </tr>\n",
" <tr>\n",
" <th>642</th>\n",
" <td>0.460198</td>\n",
" <td>0.592736</td>\n",
" <td>0.534289</td>\n",
" <td>0.592368</td>\n",
" <td>0.582349</td>\n",
" <td>0.686744</td>\n",
" <td>0.551060</td>\n",
" <td>0.425988</td>\n",
" <td>0.000000</td>\n",
" <td>0.546650</td>\n",
" <td>0.518340</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.544594</td>\n",
" <td>NaN</td>\n",
" <td>0.579511</td>\n",
" <td>0.563272</td>\n",
" <td>0.580991</td>\n",
" <td>0.532227</td>\n",
" <td>0.564708</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>643 rows × 20 columns</p>\n",
"</div>"
],
"text/plain": [
" MEAN_BY_age_at_application MEAN_BY_residence_rent_or_own \\\n",
"0 0.514408 0.480607 \n",
"1 0.399595 0.471629 \n",
"2 0.600451 0.558710 \n",
"3 0.621116 0.560215 \n",
"4 0.480745 0.498787 \n",
"5 0.689922 0.565133 \n",
"6 0.596039 0.481462 \n",
"7 0.341107 0.478082 \n",
"8 0.567303 0.567633 \n",
"9 0.446034 0.562476 \n",
"10 0.718677 0.554815 \n",
"11 0.505640 0.480934 \n",
"12 0.491589 0.562877 \n",
"13 0.662177 0.596797 \n",
"14 0.435939 0.574792 \n",
"15 0.385495 0.562662 \n",
"16 0.000000 0.495067 \n",
"17 0.544643 0.590473 \n",
"18 NaN 0.570495 \n",
"19 0.442071 0.552913 \n",
"20 0.705710 0.583758 \n",
"21 0.492529 0.574036 \n",
"22 0.594290 0.576684 \n",
"23 0.479365 0.558846 \n",
"24 0.380163 0.593435 \n",
"25 0.488809 0.586816 \n",
"26 0.566315 0.585008 \n",
"27 0.715942 0.580126 \n",
"28 0.857092 0.556828 \n",
"29 0.604164 0.492078 \n",
".. ... ... \n",
"613 0.489738 0.607306 \n",
"614 0.499932 0.480113 \n",
"615 0.639261 0.569018 \n",
"616 0.647552 0.606052 \n",
"617 0.396613 0.495291 \n",
"618 0.442142 0.510878 \n",
"619 0.560929 0.482807 \n",
"620 0.524681 0.588951 \n",
"621 0.409775 0.493831 \n",
"622 0.581404 0.474697 \n",
"623 0.638957 0.579473 \n",
"624 0.617777 0.567094 \n",
"625 0.726766 0.599458 \n",
"626 0.368907 0.600838 \n",
"627 0.393864 0.577731 \n",
"628 0.733891 0.569683 \n",
"629 0.429432 0.507270 \n",
"630 0.643677 0.600904 \n",
"631 0.685566 0.564104 \n",
"632 0.835372 0.484628 \n",
"633 0.444968 0.503748 \n",
"634 0.566937 0.569804 \n",
"635 0.418039 0.473949 \n",
"636 0.945199 0.499975 \n",
"637 0.409891 0.466968 \n",
"638 0.412502 0.469243 \n",
"639 0.607184 0.508483 \n",
"640 0.543586 0.468032 \n",
"641 0.686936 0.499588 \n",
"642 0.460198 0.592736 \n",
"\n",
" MEAN_BY_bank_account_direct_deposit MEAN_BY_year_of_application \\\n",
"0 0.557439 0.552045 \n",
"1 0.531704 0.586804 \n",
"2 0.528031 0.533861 \n",
"3 0.553142 0.512958 \n",
"4 0.534709 0.538767 \n",
"5 0.538604 0.505781 \n",
"6 0.556548 0.527089 \n",
"7 0.596506 0.605590 \n",
"8 0.521030 0.526159 \n",
"9 0.635182 0.551405 \n",
"10 0.547714 0.593125 \n",
"11 0.630337 0.532345 \n",
"12 0.665407 0.548860 \n",
"13 0.553217 0.608458 \n",
"14 0.525004 0.593447 \n",
"15 0.635971 0.525332 \n",
"16 0.534000 0.546389 \n",
"17 0.551103 0.538717 \n",
"18 0.551690 0.526175 \n",
"19 0.537860 0.509681 \n",
"20 0.535857 0.602523 \n",
"21 0.557160 0.583704 \n",
"22 0.513610 0.507039 \n",
"23 0.549372 0.632456 \n",
"24 0.549068 0.550198 \n",
"25 0.532223 0.618709 \n",
"26 0.525041 0.528135 \n",
"27 0.619733 0.614030 \n",
"28 0.517209 0.541466 \n",
"29 0.613843 0.542200 \n",
".. ... ... \n",
"613 0.526408 0.510586 \n",
"614 0.521480 0.545987 \n",
"615 0.523072 0.518019 \n",
"616 0.522578 0.514015 \n",
"617 0.552300 0.538635 \n",
"618 0.559499 0.553444 \n",
"619 0.514957 0.528409 \n",
"620 0.525971 0.599641 \n",
"621 0.665191 0.634709 \n",
"622 0.524079 0.533822 \n",
"623 0.510750 0.558146 \n",
"624 0.557383 0.550777 \n",
"625 0.520104 0.635877 \n",
"626 0.549067 0.519785 \n",
"627 0.619000 0.528406 \n",
"628 0.633168 0.539526 \n",
"629 0.508477 0.535929 \n",
"630 0.552133 0.552062 \n",
"631 0.525149 0.532051 \n",
"632 0.548178 0.508186 \n",
"633 0.664022 0.528182 \n",
"634 0.550783 0.552824 \n",
"635 0.556652 0.512724 \n",
"636 0.520603 0.540285 \n",
"637 0.521661 0.538057 \n",
"638 0.515885 0.520676 \n",
"639 0.535171 0.521519 \n",
"640 0.513752 0.543056 \n",
"641 0.606866 0.522949 \n",
"642 0.534289 0.592368 \n",
"\n",
" MEAN_BY_month_of_application MEAN_BY_day_of_application \\\n",
"0 0.470198 0.628789 \n",
"1 0.533293 0.386507 \n",
"2 0.474686 0.695447 \n",
"3 0.493165 0.557131 \n",
"4 0.468859 0.381862 \n",
"5 0.592780 0.632656 \n",
"6 0.479543 0.748344 \n",
"7 0.529023 0.412390 \n",
"8 0.444900 0.616942 \n",
"9 0.590705 0.411524 \n",
"10 0.575141 0.515210 \n",
"11 0.653475 0.636792 \n",
"12 0.512891 0.463449 \n",
"13 0.675574 0.416718 \n",
"14 0.571646 0.771404 \n",
"15 0.592978 0.378778 \n",
"16 0.486124 0.654226 \n",
"17 0.629473 0.353057 \n",
"18 0.585401 0.669037 \n",
"19 0.564043 0.562240 \n",
"20 0.542614 0.403363 \n",
"21 0.691635 0.740104 \n",
"22 0.445850 0.519270 \n",
"23 0.549878 0.628234 \n",
"24 0.629128 0.583356 \n",
"25 0.566640 0.419143 \n",
"26 0.607601 0.416148 \n",
"27 0.640734 0.503685 \n",
"28 0.492155 0.483721 \n",
"29 0.458047 0.414791 \n",
".. ... ... \n",
"613 0.486561 0.466992 \n",
"614 0.562860 0.673133 \n",
"615 0.481627 0.477079 \n",
"616 0.451750 0.644271 \n",
"617 0.476567 0.489383 \n",
"618 0.464544 0.405325 \n",
"619 0.451145 0.476352 \n",
"620 0.636821 0.630957 \n",
"621 0.564583 0.510501 \n",
"622 0.635228 0.591886 \n",
"623 0.443033 0.638644 \n",
"624 0.434485 0.512607 \n",
"625 0.667738 0.696782 \n",
"626 0.455055 0.583306 \n",
"627 0.637928 0.728211 \n",
"628 0.503580 0.634131 \n",
"629 0.606834 0.537523 \n",
"630 0.562155 0.636712 \n",
"631 0.448672 0.729569 \n",
"632 0.553379 0.544059 \n",
"633 0.510456 0.511410 \n",
"634 0.445611 0.636642 \n",
"635 0.479377 0.572998 \n",
"636 0.471651 0.494162 \n",
"637 0.457353 0.379463 \n",
"638 0.599277 0.486696 \n",
"639 0.598296 0.434427 \n",
"640 0.434559 0.610012 \n",
"641 0.485452 0.746731 \n",
"642 0.582349 0.686744 \n",
"\n",
" MEAN_BY_day_of_week MEAN_BY_payment_frequency MEAN_BY_address_zip \\\n",
"0 0.569511 0.417236 NaN \n",
"1 0.487430 0.621101 0.954736 \n",
"2 0.565312 0.623777 0.452136 \n",
"3 0.558673 0.621989 0.258950 \n",
"4 0.507195 0.440845 NaN \n",
"5 0.580893 0.660443 0.000000 \n",
"6 0.522008 0.450600 0.588842 \n",
"7 0.540931 0.435653 0.722505 \n",
"8 0.576096 0.405253 0.500642 \n",
"9 0.555246 0.436040 0.751080 \n",
"10 0.522176 0.819258 0.961284 \n",
"11 0.552102 0.465760 0.000000 \n",
"12 0.527565 0.439071 NaN \n",
"13 0.511088 0.613518 0.000000 \n",
"14 0.579338 0.411885 0.000000 \n",
"15 0.556035 0.639218 0.598988 \n",
"16 0.528173 0.445896 0.780894 \n",
"17 0.510359 0.457843 0.504682 \n",
"18 0.517079 0.430175 0.499156 \n",
"19 0.521888 0.441127 0.482127 \n",
"20 0.569330 0.602155 0.504850 \n",
"21 0.532544 0.465642 0.440931 \n",
"22 0.538449 0.601308 0.651846 \n",
"23 0.503599 0.447530 0.638196 \n",
"24 0.534258 0.638170 1.007131 \n",
"25 0.590778 0.817721 0.482605 \n",
"26 0.541798 0.649801 0.392664 \n",
"27 0.623574 0.434004 0.900800 \n",
"28 0.525214 0.422937 0.744138 \n",
"29 0.574064 0.431095 NaN \n",
".. ... ... ... \n",
"613 0.532574 0.465821 0.658530 \n",
"614 0.609090 0.428061 0.451645 \n",
"615 0.652049 0.647790 0.967413 \n",
"616 0.544792 0.619109 NaN \n",
"617 0.496252 0.608552 0.558447 \n",
"618 0.495378 0.441151 NaN \n",
"619 0.528477 0.424144 0.261048 \n",
"620 0.483778 0.609505 0.706871 \n",
"621 0.582646 0.435809 0.496392 \n",
"622 0.603514 0.420264 NaN \n",
"623 0.578666 0.400220 0.338235 \n",
"624 0.494634 0.441296 0.334417 \n",
"625 0.555221 0.625752 0.672060 \n",
"626 0.491012 0.644441 0.854962 \n",
"627 0.500405 0.412765 0.684010 \n",
"628 0.488279 0.454340 0.623472 \n",
"629 0.540221 0.428355 0.963309 \n",
"630 0.577490 0.636691 NaN \n",
"631 0.608351 0.409222 0.824516 \n",
"632 0.592746 0.652745 0.480330 \n",
"633 0.575047 0.429100 NaN \n",
"634 0.522260 0.426933 0.600997 \n",
"635 0.540253 0.422829 0.396142 \n",
"636 0.560343 0.607832 NaN \n",
"637 0.531872 0.426646 NaN \n",
"638 0.527526 0.659744 0.604016 \n",
"639 0.490729 0.659464 1.047342 \n",
"640 0.619537 0.400926 0.000000 \n",
"641 0.513659 0.654159 0.336987 \n",
"642 0.551060 0.425988 0.000000 \n",
"\n",
" MEAN_BY_State MEAN_BY_more_than_2_phones MEAN_BY_routing_symbol \\\n",
"0 0.558668 0.590247 0.555412 \n",
"1 0.592603 0.601562 0.575870 \n",
"2 0.584222 0.524641 0.425068 \n",
"3 0.569920 0.590752 0.400652 \n",
"4 0.601495 0.563782 0.638486 \n",
"5 0.566118 0.565535 0.560903 \n",
"6 0.572307 0.563785 0.420617 \n",
"7 0.576835 0.607650 0.560163 \n",
"8 0.585887 0.516612 0.399882 \n",
"9 0.582929 0.577663 0.597556 \n",
"10 0.557430 0.571925 0.709487 \n",
"11 0.593045 0.488866 0.519531 \n",
"12 0.540502 0.594014 0.511023 \n",
"13 0.577043 0.492403 0.636578 \n",
"14 0.582626 0.570385 0.593333 \n",
"15 0.576741 0.574569 0.344212 \n",
"16 0.513742 0.524364 0.525246 \n",
"17 0.501220 0.602103 0.475611 \n",
"18 0.542228 0.497908 0.552060 \n",
"19 0.463432 0.495090 0.457132 \n",
"20 0.577456 0.577150 0.566408 \n",
"21 0.551528 0.608329 0.000000 \n",
"22 0.557393 0.586974 0.689011 \n",
"23 0.583670 0.499517 0.489131 \n",
"24 0.476231 0.570582 0.560521 \n",
"25 0.555521 0.589153 0.497214 \n",
"26 0.487621 0.579807 NaN \n",
"27 0.537346 0.507690 1.043889 \n",
"28 0.586310 0.595786 0.604487 \n",
"29 0.483030 0.535320 0.509116 \n",
".. ... ... ... \n",
"613 0.516283 0.531519 0.333474 \n",
"614 0.540044 0.565831 0.568201 \n",
"615 0.571714 0.561509 0.558305 \n",
"616 0.585580 0.580609 0.573894 \n",
"617 0.555392 0.559328 0.548135 \n",
"618 0.572182 0.569837 0.523172 \n",
"619 0.554442 0.501135 0.415267 \n",
"620 0.569021 0.554770 0.388871 \n",
"621 0.555776 0.528116 0.576457 \n",
"622 0.551805 0.504692 0.558201 \n",
"623 0.501817 0.595843 0.462059 \n",
"624 0.557469 0.605213 0.586361 \n",
"625 0.552562 0.578641 0.857838 \n",
"626 0.544977 0.553244 0.586986 \n",
"627 0.566216 0.560937 0.414482 \n",
"628 0.489291 0.573058 0.525830 \n",
"629 0.573690 0.590146 0.397022 \n",
"630 0.539733 0.590087 0.998130 \n",
"631 0.567209 0.527055 0.675554 \n",
"632 0.578384 0.579015 0.548860 \n",
"633 0.516718 0.598426 0.501549 \n",
"634 0.484959 0.537635 0.550702 \n",
"635 0.563431 0.580206 0.487207 \n",
"636 0.555692 0.511512 0.555793 \n",
"637 0.565371 0.590817 0.694911 \n",
"638 0.471002 0.556931 0.517694 \n",
"639 0.540837 0.512710 0.587504 \n",
"640 0.571274 0.568197 0.664080 \n",
"641 0.586772 0.523272 0.592889 \n",
"642 0.546650 0.518340 0.000000 \n",
"\n",
" MEAN_BY_institution MEAN_BY_home_phone_type MEAN_BY_other_phone_type \\\n",
"0 NaN 0.565923 0.589919 \n",
"1 0.662334 0.494463 0.637252 \n",
"2 0.411461 0.540582 NaN \n",
"3 NaN 0.592421 0.600287 \n",
"4 0.000000 0.572539 0.575014 \n",
"5 0.577396 0.460358 0.610350 \n",
"6 0.393934 0.538669 0.550049 \n",
"7 0.629736 0.564795 0.539674 \n",
"8 0.359226 0.551036 NaN \n",
"9 0.474470 0.544683 0.532315 \n",
"10 0.975306 0.493997 0.629467 \n",
"11 0.693718 0.460013 NaN \n",
"12 NaN 0.459673 0.611954 \n",
"13 0.563900 0.592165 NaN \n",
"14 0.448368 0.544599 0.579861 \n",
"15 0.320037 0.553074 0.608352 \n",
"16 NaN 0.559541 NaN \n",
"17 1.021742 0.471475 0.617559 \n",
"18 0.574923 0.480287 NaN \n",
"19 0.425526 0.542427 NaN \n",
"20 0.623221 0.544153 0.535300 \n",
"21 0.000000 0.492788 0.613752 \n",
"22 0.825342 0.491692 0.601148 \n",
"23 0.466024 0.540318 NaN \n",
"24 0.579371 0.591506 0.592371 \n",
"25 0.421516 0.585126 0.559047 \n",
"26 NaN 0.571235 0.595780 \n",
"27 0.966189 0.537135 NaN \n",
"28 0.607578 0.581054 0.576121 \n",
"29 NaN 0.545880 NaN \n",
".. ... ... ... \n",
"613 0.318690 0.484521 NaN \n",
"614 0.597024 0.548924 0.560376 \n",
"615 0.676916 0.540087 0.582379 \n",
"616 0.611168 0.588212 0.586592 \n",
"617 0.448466 0.541644 0.535462 \n",
"618 NaN 0.544301 0.600027 \n",
"619 0.000000 0.556455 NaN \n",
"620 NaN 0.546556 0.520093 \n",
"621 0.475315 0.539152 NaN \n",
"622 0.638536 0.546526 NaN \n",
"623 0.414028 0.591599 0.600706 \n",
"624 0.617611 0.453971 0.552165 \n",
"625 0.852483 0.545230 0.598777 \n",
"626 0.649442 0.588366 0.606696 \n",
"627 0.418930 0.544376 0.577874 \n",
"628 0.361336 0.578805 0.564222 \n",
"629 0.394955 0.636669 0.615851 \n",
"630 0.977529 0.579997 0.588206 \n",
"631 0.534375 0.577602 NaN \n",
"632 0.458966 0.581298 0.593308 \n",
"633 0.500581 0.575729 0.561384 \n",
"634 0.445528 0.502752 NaN \n",
"635 0.487191 0.545337 0.539735 \n",
"636 0.000000 0.549386 NaN \n",
"637 1.000346 0.490250 0.588388 \n",
"638 NaN 0.542737 0.571940 \n",
"639 0.615949 0.563711 NaN \n",
"640 0.819278 0.578948 0.583322 \n",
"641 0.572729 0.566553 NaN \n",
"642 0.000000 0.544594 NaN \n",
"\n",
" MEAN_BY_how_use_money MEAN_BY_FICO_telecom_grade \\\n",
"0 0.552626 0.567241 \n",
"1 0.560027 0.535656 \n",
"2 0.478960 0.555833 \n",
"3 0.522023 0.547506 \n",
"4 0.467768 0.537748 \n",
"5 0.643994 0.555780 \n",
"6 0.597296 0.569755 \n",
"7 0.575041 0.560729 \n",
"8 0.600635 0.537139 \n",
"9 0.529764 0.588623 \n",
"10 0.548483 0.555668 \n",
"11 0.712129 0.536216 \n",
"12 0.477853 0.587451 \n",
"13 0.566640 0.555148 \n",
"14 0.585532 0.576668 \n",
"15 0.566250 0.584040 \n",
"16 0.468477 0.554427 \n",
"17 0.554895 0.548434 \n",
"18 0.601119 0.562567 \n",
"19 0.546786 0.570758 \n",
"20 0.570590 0.564524 \n",
"21 0.567690 0.572480 \n",
"22 0.550004 0.384344 \n",
"23 0.548223 0.551038 \n",
"24 0.655137 0.586518 \n",
"25 0.590415 0.550432 \n",
"26 0.547550 0.574565 \n",
"27 0.550534 0.579043 \n",
"28 0.593059 0.537032 \n",
"29 0.526714 0.547180 \n",
".. ... ... \n",
"613 0.476779 0.591249 \n",
"614 0.198304 0.441306 \n",
"615 0.586912 0.540434 \n",
"616 0.444821 0.549915 \n",
"617 0.574870 0.565676 \n",
"618 0.586354 0.563971 \n",
"619 0.492099 0.580763 \n",
"620 0.601826 0.559367 \n",
"621 0.604688 0.588426 \n",
"622 0.598287 0.537669 \n",
"623 0.462593 0.557431 \n",
"624 0.573536 0.536661 \n",
"625 0.564248 0.553893 \n",
"626 0.439303 0.588955 \n",
"627 0.656223 0.439602 \n",
"628 0.572273 0.564077 \n",
"629 0.499661 0.590612 \n",
"630 0.575473 0.550830 \n",
"631 0.598330 0.548583 \n",
"632 0.560393 0.567801 \n",
"633 0.569385 0.551586 \n",
"634 0.470227 0.550655 \n",
"635 0.584241 0.567713 \n",
"636 0.527056 0.418480 \n",
"637 0.195205 0.564184 \n",
"638 0.603734 0.581519 \n",
"639 0.564084 0.567587 \n",
"640 0.542119 0.573814 \n",
"641 0.433131 0.549640 \n",
"642 0.579511 0.563272 \n",
"\n",
" MEAN_BY_FICO_retail_grade MEAN_BY_FICO_bank_grade \\\n",
"0 0.618311 0.539634 \n",
"1 0.412217 0.553798 \n",
"2 0.450497 0.527799 \n",
"3 0.599910 0.775117 \n",
"4 0.409804 0.435375 \n",
"5 0.597999 0.716393 \n",
"6 0.641614 0.538617 \n",
"7 0.616100 0.530884 \n",
"8 0.588834 0.557432 \n",
"9 0.412665 0.560242 \n",
"10 0.421022 0.411439 \n",
"11 0.433989 0.522623 \n",
"12 0.622980 0.572420 \n",
"13 0.628035 0.543847 \n",
"14 0.582027 0.565928 \n",
"15 0.445670 0.529391 \n",
"16 0.418101 0.567481 \n",
"17 0.633648 0.538408 \n",
"18 0.605356 0.760283 \n",
"19 0.410279 0.390257 \n",
"20 0.440377 0.407099 \n",
"21 0.640679 0.540953 \n",
"22 0.433631 0.377150 \n",
"23 0.631408 0.523355 \n",
"24 0.583810 0.557405 \n",
"25 0.614652 0.567158 \n",
"26 0.587794 0.539674 \n",
"27 0.606582 0.532812 \n",
"28 0.414923 0.544408 \n",
"29 0.415401 0.529267 \n",
".. ... ... \n",
"613 0.412393 0.565121 \n",
"614 0.452562 0.408135 \n",
"615 0.422486 0.529921 \n",
"616 0.599909 0.729253 \n",
"617 0.604189 0.544326 \n",
"618 0.593425 0.555855 \n",
"619 0.621656 0.523891 \n",
"620 0.613836 0.746185 \n",
"621 0.417169 0.415797 \n",
"622 0.410538 0.403266 \n",
"623 0.415880 0.410764 \n",
"624 0.445189 0.518879 \n",
"625 0.631958 0.562895 \n",
"626 0.629191 0.550540 \n",
"627 0.449674 0.396117 \n",
"628 0.630056 0.750471 \n",
"629 0.603043 0.528987 \n",
"630 0.425518 0.567338 \n",
"631 0.428353 0.553263 \n",
"632 0.611434 0.537901 \n",
"633 0.605934 0.565047 \n",
"634 0.628123 0.565608 \n",
"635 0.609148 0.549375 \n",
"636 0.275811 0.427600 \n",
"637 0.586099 0.535352 \n",
"638 0.585005 0.535068 \n",
"639 0.605438 0.535939 \n",
"640 0.604734 0.546835 \n",
"641 0.414026 0.518199 \n",
"642 0.580991 0.532227 \n",
"\n",
" MEAN_BY_FICO_money_grade \n",
"0 0.592753 \n",
"1 0.300362 \n",
"2 0.613247 \n",
"3 0.610444 \n",
"4 0.598573 \n",
"5 0.607516 \n",
"6 0.573143 \n",
"7 0.311064 \n",
"8 0.590692 \n",
"9 0.610674 \n",
"10 0.304808 \n",
"11 0.599720 \n",
"12 0.570228 \n",
"13 0.555817 \n",
"14 0.563533 \n",
"15 0.594761 \n",
"16 0.309794 \n",
"17 0.594591 \n",
"18 0.586534 \n",
"19 0.600965 \n",
"20 0.595441 \n",
"21 0.558285 \n",
"22 0.557978 \n",
"23 0.599886 \n",
"24 0.561100 \n",
"25 0.594082 \n",
"26 0.584159 \n",
"27 0.613500 \n",
"28 0.567776 \n",
"29 0.575914 \n",
".. ... \n",
"613 0.559084 \n",
"614 0.305424 \n",
"615 0.561793 \n",
"616 0.601741 \n",
"617 0.601249 \n",
"618 0.582305 \n",
"619 0.615307 \n",
"620 0.587612 \n",
"621 0.300263 \n",
"622 0.614773 \n",
"623 0.577759 \n",
"624 0.589612 \n",
"625 0.569576 \n",
"626 0.588179 \n",
"627 0.320492 \n",
"628 0.586600 \n",
"629 0.576071 \n",
"630 0.579465 \n",
"631 0.311792 \n",
"632 0.599544 \n",
"633 0.612339 \n",
"634 0.558727 \n",
"635 0.604597 \n",
"636 0.323582 \n",
"637 0.582300 \n",
"638 0.577468 \n",
"639 0.285601 \n",
"640 0.599933 \n",
"641 0.587879 \n",
"642 0.564708 \n",
"\n",
"[643 rows x 20 columns]"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_df[oneway_cat_aggr_cols]"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Full list of training features\n",
"full_cols=num_cols+oneway_cat_aggr_cols"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"trainx=full_df[full_cols].fillna(-1).values\n",
"trainy=full_df[target_col].fillna(-1).values.reshape(len(full_df))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Part III. Modeling & Model Selection"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Regularized Logistic Regression and Elastic Net"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching.... \n",
"Fitting 10 folds for each of 6 candidates, totalling 60 fits\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.709770 - 0.2s\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.617816 - 0.2s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 0.2s\n",
"[Parallel(n_jobs=1)]: Done 2 jobs | elapsed: 0.4s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.593870 - 0.2s\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.665709 - 0.2s\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.533005 - 0.2s\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.594089 - 0.2s\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.480788 - 0.2s\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.584236 - 0.2s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 jobs | elapsed: 1.0s\n",
"[Parallel(n_jobs=1)]: Done 8 jobs | elapsed: 1.7s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.667980 - 0.2s\n",
"[CV] penalty=l1, C=0.045 .............................................\n",
"[CV] .................... penalty=l1, C=0.045, score=0.624490 - 0.2s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.711686 - 0.1s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.655172 - 0.1s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.585249 - 0.1s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.616858 - 0.1s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.552709 - 0.1s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.594089 - 0.1s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.500493 - 0.0s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.627586 - 0.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 13 jobs | elapsed: 2.4s\n",
"[Parallel(n_jobs=1)]: Done 18 jobs | elapsed: 2.7s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.646305 - 0.1s\n",
"[CV] penalty=l2, C=0.045 .............................................\n",
"[CV] .................... penalty=l2, C=0.045, score=0.665306 - 0.1s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.712644 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.619732 - 0.3s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.594828 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.657088 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.532020 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.589163 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.479803 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.586207 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.664039 - 0.2s\n",
"[CV] penalty=l1, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.05, score=0.627551 - 0.2s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.712644 - 0.1s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.658046 - 0.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 25 jobs | elapsed: 4.1s\n",
"[Parallel(n_jobs=1)]: Done 32 jobs | elapsed: 5.3s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.583333 - 0.1s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.613985 - 0.1s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.555665 - 0.1s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.596059 - 0.0s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.509360 - 0.0s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.629557 - 0.1s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.644335 - 0.1s\n",
"[CV] penalty=l2, C=0.05 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.05, score=0.667347 - 0.1s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.711686 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.616858 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.593870 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.652299 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.533990 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.586207 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.483744 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.587192 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.665025 - 0.2s\n",
"[CV] penalty=l1, C=0.055 .............................................\n",
"[CV] .................... penalty=l1, C=0.055, score=0.626531 - 0.2s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 41 jobs | elapsed: 6.2s\n",
"[Parallel(n_jobs=1)]: Done 50 jobs | elapsed: 8.2s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.712644 - 0.1s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.656130 - 0.1s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.583333 - 0.1s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.614943 - 0.0s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.557635 - 0.0s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.594089 - 0.0s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.505419 - 0.0s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.634483 - 0.1s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.644335 - 0.0s\n",
"[CV] penalty=l2, C=0.055 .............................................\n",
"[CV] .................... penalty=l2, C=0.055, score=0.671429 - 0.0s\n",
"Best score: 0.618\n",
"('Best parameters set:', {'penalty': 'l2', 'C': 0.055})\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 60 out of 60 | elapsed: 8.8s finished\n"
]
}
],
"source": [
"from sklearn.linear_model import LogisticRegression,ElasticNet\n",
"print (\"Searching.... \")\n",
"param_grid = {'C': [0.045,0.05,0.055]\n",
" , 'penalty': ['l1','l2']\n",
" }\n",
"(best_score, best_params) = search_model(trainx\n",
" , trainy\n",
" , LogisticRegression()\n",
" , param_grid\n",
" , n_jobs=1\n",
" , cv=10)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.617511221431 {'penalty': 'l2', 'C': 0.055}\n"
]
}
],
"source": [
"print best_score, best_params"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Elastic Net"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching.... \n",
"Fitting 10 folds for each of 6 candidates, totalling 60 fits\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.641098 - 0.0s\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.653475 - 0.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 2 jobs | elapsed: 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.695402 - 0.0s\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.564706 - 0.0s\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.546537 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.655208 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.595897 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.641026 - 0.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 jobs | elapsed: 0.2s\n",
"[Parallel(n_jobs=1)]: Done 8 jobs | elapsed: 0.5s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.758789 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0 .............................................\n",
"[CV] .................... alpha=0, l1_ratio=0, score=0.731373 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.641098 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.653475 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.695402 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.564706 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.546537 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.655208 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.595897 - 0.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 13 jobs | elapsed: 1.0s\n",
"[Parallel(n_jobs=1)]: Done 18 jobs | elapsed: 1.4s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.641026 - 0.0s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.758789 - 0.1s\n",
"[CV] alpha=0, l1_ratio=0.1 ...........................................\n",
"[CV] .................. alpha=0, l1_ratio=0.1, score=0.731373 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.714962 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.666023 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.632184 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.517647 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.610390 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.583333 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.536410 - 0.0s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.608205 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.604492 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.1, l1_ratio=0, score=0.728431 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.725379 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.658301 - 0.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 25 jobs | elapsed: 2.1s\n",
"[Parallel(n_jobs=1)]: Done 32 jobs | elapsed: 2.6s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.643678 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.548039 - 0.1s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.601732 - 0.0s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.584375 - 0.0s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.527179 - 0.0s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.591795 - 0.0s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.601562 - 0.0s\n",
"[CV] alpha=0.1, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.1, l1_ratio=0.1, score=0.741176 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.717803 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.658301 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.644636 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.550980 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.610390 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.586458 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.522051 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.593846 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.595703 - 0.0s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 41 jobs | elapsed: 2.8s\n",
"[Parallel(n_jobs=1)]: Done 50 jobs | elapsed: 3.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] alpha=0.5, l1_ratio=0 ...........................................\n",
"[CV] .................. alpha=0.5, l1_ratio=0, score=0.735294 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.714015 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.639961 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.650383 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.556863 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.581169 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.617708 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.526154 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.586667 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.572266 - 0.0s\n",
"[CV] alpha=0.5, l1_ratio=0.1 .........................................\n",
"[CV] ................ alpha=0.5, l1_ratio=0.1, score=0.711765 - 0.0s\n",
"Best score: 0.648\n",
"('Best parameters set:', {'alpha': 0, 'l1_ratio': 0})\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 60 out of 60 | elapsed: 3.2s finished\n"
]
}
],
"source": [
"print (\"Searching.... \")\n",
"param_grid = {'l1_ratio': [0,0.1],\n",
" 'alpha': [0,0.1,0.5]\n",
" }\n",
"(best_score, best_params) = search_model(trainx\n",
" , trainy\n",
" , ElasticNet()\n",
" , param_grid\n",
" , n_jobs=1\n",
" , cv=10)"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.648421002347 {'alpha': 0, 'l1_ratio': 0}\n"
]
}
],
"source": [
"print best_score, best_params"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Random Forest"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching.... \n",
"Fitting 10 folds for each of 1 candidates, totalling 10 fits\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.822797 - 13.5s\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.733716 - 13.9s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 13.5s\n",
"[Parallel(n_jobs=1)]: Done 2 jobs | elapsed: 27.4s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.773946 - 14.6s\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.778736 - 15.7s\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.641379 - 14.3s\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.760591 - 13.6s\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.816749 - 14.4s\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.750739 - 13.5s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 jobs | elapsed: 1.2min\n",
"[Parallel(n_jobs=1)]: Done 8 jobs | elapsed: 1.9min\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.785222 - 13.6s\n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500 \n",
"[CV] max_features=sqrt, min_samples_split=1, criterion=entropy, max_depth=20, n_estimators=3500, score=0.771429 - 13.4s\n",
"Best score: 0.764\n",
"('Best parameters set:', {'max_features': 'sqrt', 'min_samples_split': 1, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 3500})\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 10 out of 10 | elapsed: 2.3min finished\n"
]
}
],
"source": [
"print (\"Searching.... \")\n",
"param_grid = { 'n_estimators':[3500],\n",
" 'criterion': ['entropy'],\n",
" 'max_features': ['sqrt'], \n",
" 'max_depth':[20],\n",
" 'min_samples_split':[1] \n",
" }\n",
" \n",
"(best_score, best_params) = search_model(trainx\n",
" , trainy\n",
" , RandomForestClassifier()\n",
" , param_grid\n",
" , n_jobs=1\n",
" , cv=10)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.763603745783 {'max_features': 'sqrt', 'min_samples_split': 1, 'criterion': 'entropy', 'max_depth': 20, 'n_estimators': 3500}\n"
]
}
],
"source": [
"print best_score, best_params"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"forest = RandomForestClassifier(max_features = 'sqrt', \n",
" min_samples_split = 1, \n",
" criterion = 'entropy', \n",
" max_depth = 20, \n",
" n_estimators = 3000)\n",
"\n",
"forest.fit(trainx, trainy)\n",
"importances = forest.feature_importances_\n",
"\n",
"def get_forest_importances():\n",
" indices = np.argsort(importances)[::-1]\n",
" print(\"Feature ranking:\")\n",
" for f in range(len(full_cols)):\n",
" print(\"%d. %s (%f)\" % (f + 1, full_cols[indices[f]], importances[indices[f]]))"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Feature ranking:\n",
"1. MEAN_BY_FICO_bank_grade (0.049853)\n",
"2. MEAN_BY_payment_frequency (0.040009)\n",
"3. MEAN_BY_age_at_application (0.037141)\n",
"4. MEAN_BY_FICO_money_grade (0.036729)\n",
"5. raw_FICO_retail (0.035689)\n",
"6. MEAN_BY_State (0.035307)\n",
"7. MEAN_BY_bank_account_direct_deposit (0.032788)\n",
"8. MEAN_BY_day_of_application (0.032621)\n",
"9. MEAN_BY_FICO_telecom_grade (0.031983)\n",
"10. MEAN_BY_day_of_week (0.031650)\n",
"11. raw_FICO_bank_card (0.031436)\n",
"12. MEAN_BY_month_of_application (0.028042)\n",
"13. MEAN_BY_FICO_retail_grade (0.027844)\n",
"14. age_at_application (0.027753)\n",
"15. MEAN_BY_home_phone_type (0.027196)\n",
"16. MEAN_BY_how_use_money (0.027075)\n",
"17. MEAN_BY_year_of_application (0.027041)\n",
"18. MEAN_BY_residence_rent_or_own (0.026985)\n",
"19. raw_FICO_telecom (0.026439)\n",
"20. raw_FICO_money (0.026095)\n",
"21. raw_l2c_score (0.025029)\n",
"22. MEAN_BY_more_than_2_phones (0.024848)\n",
"23. MEAN_BY_institution (0.024217)\n",
"24. MEAN_BY_routing_symbol (0.023279)\n",
"25. monthly_pay_ratio (0.022396)\n",
"26. monthly_income_amount (0.021499)\n",
"27. MEAN_BY_address_zip (0.020642)\n",
"28. monthly_rent_amount (0.019230)\n",
"29. total_payment_principal_ratio (0.018885)\n",
"30. payment_amount_approved (0.016925)\n",
"31. monthly_payment (0.015874)\n",
"32. total_payment (0.015322)\n",
"33. MEAN_BY_other_phone_type (0.014855)\n",
"34. payment_amount (0.014716)\n",
"35. num_payments (0.011727)\n",
"36. payment_approve_ratio (0.008909)\n",
"37. amount_requested (0.008375)\n",
"38. bank_account_duration_ordered (0.007140)\n",
"39. residence_duration_ordered (0.006664)\n",
"40. amount_approved (0.006433)\n",
"41. FICO_bank_grade_ordered (0.006056)\n",
"42. loan_duration (0.005805)\n",
"43. duration_approved (0.005217)\n",
"44. FICO_money_grade_ordered (0.004530)\n",
"45. request_approve_ratio (0.004379)\n",
"46. FICO_retail_grade_ordered (0.004348)\n",
"47. email_duration_ordered (0.002055)\n",
"48. FICO_telecom_gradea_ordered (0.000971)\n"
]
}
],
"source": [
"get_forest_importances()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### XGboost"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching.... \n",
"Fitting 10 folds for each of 4 candidates, totalling 40 fits\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.851533 - 1.9s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.833333 - 2.6s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 1.9s\n",
"[Parallel(n_jobs=1)]: Done 2 jobs | elapsed: 4.5s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.840038 - 1.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.880268 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.728079 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.814778 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.910345 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.771429 - 1.4s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 jobs | elapsed: 8.9s\n",
"[Parallel(n_jobs=1)]: Done 8 jobs | elapsed: 13.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.868966 - 1.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.897959 - 1.6s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.860153 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.828544 - 1.3s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.847701 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.877395 - 1.6s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.681773 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.814778 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.905419 - 1.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.790148 - 1.4s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 13 jobs | elapsed: 20.3s\n",
"[Parallel(n_jobs=1)]: Done 18 jobs | elapsed: 27.6s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.881773 - 1.3s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.5, score=0.889796 - 1.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.834291 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.816092 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.846743 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.866858 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.708374 - 1.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.809852 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.908374 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.797044 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.865025 - 1.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.18, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.891837 - 1.3s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.873563 - 2.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.820881 - 1.6s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 25 jobs | elapsed: 37.6s\n",
"[Parallel(n_jobs=1)]: Done 32 jobs | elapsed: 48.7s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.853448 - 2.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.867816 - 2.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.738916 - 1.6s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.821675 - 2.7s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.909360 - 1.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.795074 - 1.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.885714 - 1.6s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.185, min_child_weight=1, n_estimators=2500, subsample=1, max_depth=2, gamma=0.55, score=0.897959 - 1.4s\n",
"Best score: 0.846\n",
"('Best parameters set:', {'colsample_bytree': 0.8, 'learning_rate': 0.185, 'min_child_weight': 1, 'n_estimators': 2500, 'subsample': 1, 'max_depth': 2, 'gamma': 0.55})\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 40 out of 40 | elapsed: 1.1min finished\n"
]
}
],
"source": [
"print (\"Searching.... \")\n",
"param_grid = {'learning_rate': [0.18,0.185]\n",
" , 'max_depth': [2]\n",
" , 'n_estimators': [2500]\n",
" , 'min_child_weight' : [1]\n",
" , 'subsample' : [1]\n",
" , 'colsample_bytree' : [0.8]\n",
" , 'gamma' : [0.5,0.55] \n",
" }\n",
"(best_score, best_params) = search_model(trainx\n",
" , trainy\n",
" , xgb.XGBClassifier(missing=-1, nthread=-1)\n",
" , param_grid\n",
" , n_jobs=1\n",
" , cv=10) "
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.846407141538 {'colsample_bytree': 0.8, 'learning_rate': 0.185, 'min_child_weight': 1, 'n_estimators': 2500, 'subsample': 1, 'max_depth': 2, 'gamma': 0.55}\n"
]
}
],
"source": [
"print best_score, best_params"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"params = { \"objective\": \"binary:logistic\"\n",
" , \"eval_metric\": \"auc\"\n",
" , \"eta\": 0.185\n",
" , \"min_child_weight\": 1\n",
" , \"subsample\": 1\n",
" , \"colsample_bytree\": 0.8\n",
" , \"max_depth\": 2\n",
" , \"gamma\": 0.5\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_importance(model,feature_names):\n",
" importance = model.get_fscore()\n",
" tuples = [(full_cols[int(k[1:])], importance[k]) for k in importance]\n",
" tuples = sorted(tuples, key=lambda x: x[1],reverse=True)\n",
" return pd.DataFrame(tuples, columns=['name','importance'])"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"plst = list(params.items())\n",
"xgtrain = xgb.DMatrix(trainx, label=trainy, missing=-1)\n",
"model = xgb.train(params=plst, \n",
" dtrain=xgtrain, \n",
" num_boost_round=2500\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" name importance\n",
"0 MEAN_BY_age_at_application 52\n",
"1 MEAN_BY_FICO_telecom_grade 49\n",
"2 MEAN_BY_institution 48\n",
"3 MEAN_BY_how_use_money 42\n",
"4 MEAN_BY_routing_symbol 42\n",
"5 MEAN_BY_State 39\n",
"6 age_at_application 38\n",
"7 MEAN_BY_day_of_week 37\n",
"8 MEAN_BY_payment_frequency 37\n",
"9 MEAN_BY_FICO_bank_grade 36\n",
"10 MEAN_BY_residence_rent_or_own 33\n",
"11 MEAN_BY_month_of_application 32\n",
"12 MEAN_BY_day_of_application 32\n",
"13 MEAN_BY_bank_account_direct_deposit 29\n",
"14 MEAN_BY_year_of_application 27\n",
"15 MEAN_BY_FICO_money_grade 26\n",
"16 raw_FICO_retail 26\n",
"17 raw_l2c_score 25\n",
"18 raw_FICO_bank_card 25\n",
"19 MEAN_BY_address_zip 25\n",
"20 MEAN_BY_home_phone_type 24\n",
"21 MEAN_BY_other_phone_type 19\n",
"22 raw_FICO_telecom 15\n",
"23 raw_FICO_money 14\n",
"24 payment_approve_ratio 13\n",
"25 total_payment_principal_ratio 12\n",
"26 MEAN_BY_FICO_retail_grade 11\n",
"27 MEAN_BY_more_than_2_phones 11\n",
"28 amount_requested 7\n",
"29 total_payment 7\n",
"30 monthly_rent_amount 6\n",
"31 payment_amount 6\n",
"32 num_payments 6\n",
"33 monthly_income_amount 6\n",
"34 payment_amount_approved 4\n",
"35 loan_duration 3\n",
"36 monthly_pay_ratio 3\n",
"37 monthly_payment 3\n",
"38 amount_approved 2\n",
"39 bank_account_duration_ordered 2\n",
"40 FICO_money_grade_ordered 2\n",
"41 FICO_telecom_gradea_ordered 1\n"
]
}
],
"source": [
"print get_importance(model,full_cols)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment