Skip to content

Instantly share code, notes, and snippets.

@bbzzzz
Created November 1, 2015 17:28
Show Gist options
  • Save bbzzzz/3ce7348a036b2b1a82db to your computer and use it in GitHub Desktop.
Save bbzzzz/3ce7348a036b2b1a82db to your computer and use it in GitHub Desktop.
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import sys\n",
"sys.path.append('C:\\\\Python27\\\\Lib\\\\xgboost\\\\wrapper')\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import xgboost as xgb\n",
"from sklearn.ensemble import RandomForestRegressor\n",
"from sklearn import pipeline, metrics, grid_search, cross_validation,preprocessing, feature_extraction\n",
"import pandas as pd\n",
"import numpy as np \n",
"import scipy as sc\n",
"import time\n",
"from datetime import datetime\n",
"import math\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def search_model(train_x, train_y, est, param_grid, n_jobs, cv):\n",
" model = grid_search.GridSearchCV(estimator = est,\n",
" param_grid = param_grid,\n",
" scoring = 'roc_auc',\n",
" verbose = 10,\n",
" n_jobs = n_jobs,\n",
" iid = True,\n",
" refit = False,\n",
" cv = cv)\n",
" # Fit Grid Search Model\n",
" model.fit(train_x, train_y)\n",
" print(\"Best score: %0.3f\" % model.best_score_)\n",
" print(\"Best parameters set:\", model.best_params_)\n",
" return model.best_score_, model.best_params_\n",
"\n",
"\n",
"def xgb_fit(train_x,train_y,val_x,val_y,params,num_boost_round,early_stopping_rounds,missing):\n",
" plst = list(params.items())\n",
" xgtrain = xgb.DMatrix(train_x, label=train_y, missing=missing)\n",
" xgval = xgb.DMatrix(val_x,val_y, missing=missing)\n",
" #train using early stopping and predict\n",
" watchlist = [(xgtrain, 'train'),(xgval, 'val')]\n",
" model = xgb.train(params=plst\n",
" , dtrain=xgtrain\n",
" , num_boost_round=num_boost_round\n",
" , evals=watchlist\n",
" , early_stopping_rounds=early_stopping_rounds\n",
" # , feval = AUC\n",
" )\n",
" print (\"Best score:\", model.best_score)\n",
" print (\"Best iteration:\", model.best_iteration)\n",
" return model\n",
"\n",
"def xgb_predict(model,testX,params,missing):\n",
" xgtest = xgb.DMatrix(testX, missing=missing)\n",
" preds = model.predict(xgtest,ntree_limit=model.best_iteration)\n",
" return preds\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading data ...... 2015-10-31 23:09:19\n"
]
}
],
"source": [
" print (\"Loading data ...... %s\" % (datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S')))\n",
" start = time.time() \n",
" full_df = pd.read_csv('revised_data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>amount_requested</th>\n",
" <th>amount_approved</th>\n",
" <th>request_approve_ratio</th>\n",
" <th>age_at_application</th>\n",
" <th>year_of_application</th>\n",
" <th>month_of_application</th>\n",
" <th>day_of_application</th>\n",
" <th>day_of_week</th>\n",
" <th>residence_rent_or_own</th>\n",
" <th>monthly_rent_amount</th>\n",
" <th>...</th>\n",
" <th>more_than_2_phones</th>\n",
" <th>raw_l2c_score</th>\n",
" <th>raw_FICO_telecom</th>\n",
" <th>FICO_telecom_gradea_ordered</th>\n",
" <th>raw_FICO_retail</th>\n",
" <th>FICO_retail_grade_ordered</th>\n",
" <th>raw_FICO_bank_card</th>\n",
" <th>FICO_bank_grade_ordered</th>\n",
" <th>raw_FICO_money</th>\n",
" <th>FICO_money_grade_ordered</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643</td>\n",
" <td> 643.000000</td>\n",
" <td>...</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" <td> 643.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td> 598.678072</td>\n",
" <td> 525.972006</td>\n",
" <td> 0.905511</td>\n",
" <td> 42.121306</td>\n",
" <td> 2010.758942</td>\n",
" <td> 4.640747</td>\n",
" <td> 15.612753</td>\n",
" <td> 4.049767</td>\n",
" <td> 0.6656299</td>\n",
" <td> 583.097978</td>\n",
" <td>...</td>\n",
" <td> 0.562986</td>\n",
" <td> 594.474339</td>\n",
" <td> 568.754277</td>\n",
" <td> 1.076205</td>\n",
" <td> 596.648523</td>\n",
" <td> 1.337481</td>\n",
" <td> 665.046656</td>\n",
" <td> 2.097978</td>\n",
" <td> 603.027994</td>\n",
" <td> 1.125972</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td> 167.960908</td>\n",
" <td> 131.497886</td>\n",
" <td> 0.153976</td>\n",
" <td> 12.359399</td>\n",
" <td> 0.428058</td>\n",
" <td> 3.951815</td>\n",
" <td> 8.532746</td>\n",
" <td> 1.791468</td>\n",
" <td> 0.4721371</td>\n",
" <td> 437.012338</td>\n",
" <td>...</td>\n",
" <td> 0.496403</td>\n",
" <td> 122.853738</td>\n",
" <td> 42.807749</td>\n",
" <td> 0.293401</td>\n",
" <td> 52.119748</td>\n",
" <td> 0.571611</td>\n",
" <td> 39.939296</td>\n",
" <td> 0.715676</td>\n",
" <td> 27.774108</td>\n",
" <td> 0.332076</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td> 250.000000</td>\n",
" <td> 250.000000</td>\n",
" <td> 0.625000</td>\n",
" <td> 18.000000</td>\n",
" <td> 2010.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> False</td>\n",
" <td> 0.000000</td>\n",
" <td>...</td>\n",
" <td> 0.000000</td>\n",
" <td> 50.000000</td>\n",
" <td> 222.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 222.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 222.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 222.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td> 500.000000</td>\n",
" <td> 500.000000</td>\n",
" <td> 0.833333</td>\n",
" <td> 32.000000</td>\n",
" <td> 2011.000000</td>\n",
" <td> 2.000000</td>\n",
" <td> 9.000000</td>\n",
" <td> 3.000000</td>\n",
" <td> 0</td>\n",
" <td> 270.000000</td>\n",
" <td>...</td>\n",
" <td> 0.000000</td>\n",
" <td> 539.500000</td>\n",
" <td> 537.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 556.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 645.500000</td>\n",
" <td> 2.000000</td>\n",
" <td> 588.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td> 500.000000</td>\n",
" <td> 500.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 41.000000</td>\n",
" <td> 2011.000000</td>\n",
" <td> 3.000000</td>\n",
" <td> 16.000000</td>\n",
" <td> 4.000000</td>\n",
" <td> 1</td>\n",
" <td> 550.000000</td>\n",
" <td>...</td>\n",
" <td> 1.000000</td>\n",
" <td> 589.000000</td>\n",
" <td> 568.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 594.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 671.000000</td>\n",
" <td> 2.000000</td>\n",
" <td> 605.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td> 800.000000</td>\n",
" <td> 500.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 51.000000</td>\n",
" <td> 2011.000000</td>\n",
" <td> 4.000000</td>\n",
" <td> 23.000000</td>\n",
" <td> 5.000000</td>\n",
" <td> 1</td>\n",
" <td> 827.000000</td>\n",
" <td>...</td>\n",
" <td> 1.000000</td>\n",
" <td> 669.000000</td>\n",
" <td> 597.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 637.500000</td>\n",
" <td> 2.000000</td>\n",
" <td> 680.000000</td>\n",
" <td> 2.000000</td>\n",
" <td> 620.000000</td>\n",
" <td> 1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td> 800.000000</td>\n",
" <td> 800.000000</td>\n",
" <td> 1.000000</td>\n",
" <td> 79.000000</td>\n",
" <td> 2011.000000</td>\n",
" <td> 12.000000</td>\n",
" <td> 31.000000</td>\n",
" <td> 7.000000</td>\n",
" <td> True</td>\n",
" <td> 3018.000000</td>\n",
" <td>...</td>\n",
" <td> 1.000000</td>\n",
" <td> 808.000000</td>\n",
" <td> 712.000000</td>\n",
" <td> 3.000000</td>\n",
" <td> 756.000000</td>\n",
" <td> 4.000000</td>\n",
" <td> 797.000000</td>\n",
" <td> 4.000000</td>\n",
" <td> 662.000000</td>\n",
" <td> 2.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 37 columns</p>\n",
"</div>"
],
"text/plain": [
" amount_requested amount_approved request_approve_ratio \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 598.678072 525.972006 0.905511 \n",
"std 167.960908 131.497886 0.153976 \n",
"min 250.000000 250.000000 0.625000 \n",
"25% 500.000000 500.000000 0.833333 \n",
"50% 500.000000 500.000000 1.000000 \n",
"75% 800.000000 500.000000 1.000000 \n",
"max 800.000000 800.000000 1.000000 \n",
"\n",
" age_at_application year_of_application month_of_application \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 42.121306 2010.758942 4.640747 \n",
"std 12.359399 0.428058 3.951815 \n",
"min 18.000000 2010.000000 1.000000 \n",
"25% 32.000000 2011.000000 2.000000 \n",
"50% 41.000000 2011.000000 3.000000 \n",
"75% 51.000000 2011.000000 4.000000 \n",
"max 79.000000 2011.000000 12.000000 \n",
"\n",
" day_of_application day_of_week residence_rent_or_own \\\n",
"count 643.000000 643.000000 643 \n",
"mean 15.612753 4.049767 0.6656299 \n",
"std 8.532746 1.791468 0.4721371 \n",
"min 1.000000 1.000000 False \n",
"25% 9.000000 3.000000 0 \n",
"50% 16.000000 4.000000 1 \n",
"75% 23.000000 5.000000 1 \n",
"max 31.000000 7.000000 True \n",
"\n",
" monthly_rent_amount ... more_than_2_phones \\\n",
"count 643.000000 ... 643.000000 \n",
"mean 583.097978 ... 0.562986 \n",
"std 437.012338 ... 0.496403 \n",
"min 0.000000 ... 0.000000 \n",
"25% 270.000000 ... 0.000000 \n",
"50% 550.000000 ... 1.000000 \n",
"75% 827.000000 ... 1.000000 \n",
"max 3018.000000 ... 1.000000 \n",
"\n",
" raw_l2c_score raw_FICO_telecom FICO_telecom_gradea_ordered \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 594.474339 568.754277 1.076205 \n",
"std 122.853738 42.807749 0.293401 \n",
"min 50.000000 222.000000 1.000000 \n",
"25% 539.500000 537.000000 1.000000 \n",
"50% 589.000000 568.000000 1.000000 \n",
"75% 669.000000 597.000000 1.000000 \n",
"max 808.000000 712.000000 3.000000 \n",
"\n",
" raw_FICO_retail FICO_retail_grade_ordered raw_FICO_bank_card \\\n",
"count 643.000000 643.000000 643.000000 \n",
"mean 596.648523 1.337481 665.046656 \n",
"std 52.119748 0.571611 39.939296 \n",
"min 222.000000 1.000000 222.000000 \n",
"25% 556.000000 1.000000 645.500000 \n",
"50% 594.000000 1.000000 671.000000 \n",
"75% 637.500000 2.000000 680.000000 \n",
"max 756.000000 4.000000 797.000000 \n",
"\n",
" FICO_bank_grade_ordered raw_FICO_money FICO_money_grade_ordered \n",
"count 643.000000 643.000000 643.000000 \n",
"mean 2.097978 603.027994 1.125972 \n",
"std 0.715676 27.774108 0.332076 \n",
"min 1.000000 222.000000 1.000000 \n",
"25% 2.000000 588.000000 1.000000 \n",
"50% 2.000000 605.000000 1.000000 \n",
"75% 2.000000 620.000000 1.000000 \n",
"max 4.000000 662.000000 2.000000 \n",
"\n",
"[8 rows x 37 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"full_df['performance']=full_df['performance'].apply(lambda x: 1 if x=='Bad' else 0)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 1\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
"5 1\n",
"6 0\n",
"7 1\n",
"8 1\n",
"9 1\n",
"10 0\n",
"11 0\n",
"12 0\n",
"13 1\n",
"14 1\n",
"...\n",
"628 1\n",
"629 0\n",
"630 1\n",
"631 1\n",
"632 1\n",
"633 0\n",
"634 0\n",
"635 0\n",
"636 0\n",
"637 0\n",
"638 0\n",
"639 1\n",
"640 1\n",
"641 1\n",
"642 0\n",
"Name: performance, Length: 643, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_df['performance']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"full_df['birth_date']=pd.to_datetime(full_df['birth_date'],format=\"%m/%d/%Y %H:%M\")\n",
"full_df['application_when']=pd.to_datetime(full_df['application_when'],format=\"%m/%d/%Y %H:%M\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# full_df['birth_date_month']=full_df['birth_date'].dt.month\n",
"# full_df['birth_date_year']=full_df['birth_date'].dt.year\n",
"# full_df['birth_date_day']=full_df['birth_date'].dt.day\n",
"# full_df['birth_date_day_of_week']=full_df['birth_date'].dt.dayofweek\n",
"# full_df['birth_date_week_of_year']=full_df['birth_date'].dt.weekofyear\n",
"\n",
"# full_df['application_when_month']=full_df['application_when'].dt.month\n",
"# full_df['application_when_year']=full_df['application_when'].dt.year\n",
"# full_df['application_when_day']=full_df['application_when'].dt.day\n",
"# full_df['application_when_day_of_week']=full_df['application_when'].dt.dayofweek\n",
"# full_df['application_when_week_of_year']=full_df['application_when'].dt.weekofyear\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"9 563\n",
"8 79\n",
"5 1\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_df['bank_routing_number'].apply(lambda x:len(str(x))).value_counts()\n",
"# 8 XXXXYYYYC\n",
"# 9 YYYY/XXXX\n",
"# 5"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"full_df['bank_routing_number']=full_df['bank_routing_number'].apply(lambda x:str(x))\n",
"full_df['routing_symbol']=full_df['bank_routing_number'].apply(lambda x:x[:4])\n",
"full_df['institution']=full_df['bank_routing_number'].apply(lambda x:x[4:])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"cat_cols = ['age_at_application', 'status','residence_rent_or_own','bank_account_direct_deposit'\n",
" ,'payment_ach','payment_frequency','address_zip','State','email_provider','routing_symbol','institution'\n",
" ,'home_phone_type','other_phone_type','how_use_money','FICO_telecom_grade','FICO_retail_grade','FICO_bank_grade'\n",
" ,'FICO_money_grade']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"num_cols = ['amount_requested', 'amount_approved', 'request_approve_ratio', 'age_at_application', 'year_of_application', \n",
" 'month_of_application', 'day_of_application', 'day_of_week', 'monthly_rent_amount', 'loan_duration', \n",
" 'payment_amount','payment_amount_approved', 'payment_approve_ratio', 'num_payments', 'duration_approved', \n",
" 'total_payment', 'total_payment_principal_ratio', 'monthly_income_amount', 'monthly_pay_ratio', \n",
" 'email_duration_ordered', 'residence_duration_ordered', 'bank_account_duration_ordered', 'more_than_2_phones'\n",
" ,'raw_l2c_score','raw_FICO_telecom', 'FICO_telecom_gradea_ordered', 'raw_FICO_retail', 'FICO_retail_grade_ordered'\n",
" , 'raw_FICO_bank_card', 'FICO_bank_grade_ordered', 'raw_FICO_money', 'FICO_money_grade_ordered']"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"target_col = 'performance'"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Leave-One-Out Encoding age_at_application\n",
"Leave-One-Out Encoding status\n",
"Leave-One-Out Encoding residence_rent_or_own\n",
"Leave-One-Out Encoding bank_account_direct_deposit\n",
"Leave-One-Out Encoding payment_ach\n",
"Leave-One-Out Encoding payment_frequency\n",
"Leave-One-Out Encoding address_zip\n",
"Leave-One-Out Encoding State\n",
"Leave-One-Out Encoding email_provider\n",
"Leave-One-Out Encoding routing_symbol\n",
"Leave-One-Out Encoding institution\n",
"Leave-One-Out Encoding home_phone_type\n",
"Leave-One-Out Encoding other_phone_type\n",
"Leave-One-Out Encoding how_use_money\n",
"Leave-One-Out Encoding FICO_telecom_grade\n",
"Leave-One-Out Encoding FICO_retail_grade\n",
"Leave-One-Out Encoding FICO_bank_grade\n",
"Leave-One-Out Encoding FICO_money_grade\n"
]
}
],
"source": [
" oneway_cat_aggr_cols = list()\n",
" for col in cat_cols:\n",
" print (\"Leave-One-Out Encoding %s\" % (col))\n",
" # mean label\n",
" aggr = []\n",
" aggr=full_df[[col]+[target_col]].groupby(col)[target_col].agg([np.mean]).join(full_df.groupby(col)[target_col].agg([np.sum,np.size]),how='left') \n",
" # aggr.columns=aggr.columns.droplevel()\n",
" meanTagetAggr = np.mean(aggr['mean'].values)\n",
"# aggr=full_df.join(aggr,how='left', on=col)[list(aggr.columns)+target_col]\n",
" full_df['MEAN_BY_'+col] = full_df[[col]+[target_col]].join(aggr,how='left', on=col)[list(aggr.columns)+[target_col]].apply(lambda row: row['mean'] if math.isnan(row[target_col]) else (row['sum']-row[target_col])/(row['size']-1)*random.uniform(0.95, 1.05) , axis=1)\n",
" oneway_cat_aggr_cols.append('MEAN_BY_'+col) \n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MEAN_BY_age_at_application</th>\n",
" <th>MEAN_BY_status</th>\n",
" <th>MEAN_BY_residence_rent_or_own</th>\n",
" <th>MEAN_BY_bank_account_direct_deposit</th>\n",
" <th>MEAN_BY_payment_ach</th>\n",
" <th>MEAN_BY_payment_frequency</th>\n",
" <th>MEAN_BY_address_zip</th>\n",
" <th>MEAN_BY_State</th>\n",
" <th>MEAN_BY_email_provider</th>\n",
" <th>MEAN_BY_routing_symbol</th>\n",
" <th>MEAN_BY_institution</th>\n",
" <th>MEAN_BY_home_phone_type</th>\n",
" <th>MEAN_BY_other_phone_type</th>\n",
" <th>MEAN_BY_how_use_money</th>\n",
" <th>MEAN_BY_FICO_telecom_grade</th>\n",
" <th>MEAN_BY_FICO_retail_grade</th>\n",
" <th>MEAN_BY_FICO_bank_grade</th>\n",
" <th>MEAN_BY_FICO_money_grade</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0 </th>\n",
" <td> 0.517542</td>\n",
" <td> 0.571829</td>\n",
" <td> 0.494244</td>\n",
" <td> 0.520542</td>\n",
" <td> 0.556772</td>\n",
" <td> 0.451417</td>\n",
" <td> NaN</td>\n",
" <td> 0.594170</td>\n",
" <td> 0.537133</td>\n",
" <td> 0.558437</td>\n",
" <td> NaN</td>\n",
" <td> 0.586874</td>\n",
" <td> 0.603728</td>\n",
" <td> 0.556030</td>\n",
" <td> 0.535966</td>\n",
" <td> 0.606974</td>\n",
" <td> 0.543644</td>\n",
" <td> 0.560367</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 </th>\n",
" <td> 0.399429</td>\n",
" <td> 0.554419</td>\n",
" <td> 0.469449</td>\n",
" <td> 0.544038</td>\n",
" <td> 0.566894</td>\n",
" <td> 0.658350</td>\n",
" <td> 1.003726</td>\n",
" <td> 0.571771</td>\n",
" <td> 0.521474</td>\n",
" <td> 0.604892</td>\n",
" <td> 0.632316</td>\n",
" <td> 0.459916</td>\n",
" <td> 0.600047</td>\n",
" <td> 0.548491</td>\n",
" <td> 0.582148</td>\n",
" <td> 0.446776</td>\n",
" <td> 0.559864</td>\n",
" <td> 0.310969</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2 </th>\n",
" <td> 0.627790</td>\n",
" <td> 0.536272</td>\n",
" <td> 0.608097</td>\n",
" <td> 0.514617</td>\n",
" <td> 0.524484</td>\n",
" <td> 0.646540</td>\n",
" <td> 0.436093</td>\n",
" <td> 0.571640</td>\n",
" <td> 0.540709</td>\n",
" <td> 0.403309</td>\n",
" <td> 0.396041</td>\n",
" <td> 0.546133</td>\n",
" <td> NaN</td>\n",
" <td> 0.466287</td>\n",
" <td> 0.539096</td>\n",
" <td> 0.432630</td>\n",
" <td> 0.562527</td>\n",
" <td> 0.588560</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3 </th>\n",
" <td> 0.652807</td>\n",
" <td> 0.540285</td>\n",
" <td> 0.589860</td>\n",
" <td> 0.540995</td>\n",
" <td> 0.552187</td>\n",
" <td> 0.648897</td>\n",
" <td> 0.260163</td>\n",
" <td> 0.564072</td>\n",
" <td> 0.507995</td>\n",
" <td> 0.423049</td>\n",
" <td> NaN</td>\n",
" <td> 0.544627</td>\n",
" <td> 0.556392</td>\n",
" <td> 0.534865</td>\n",
" <td> 0.538689</td>\n",
" <td> 0.611847</td>\n",
" <td> 0.769204</td>\n",
" <td> 0.572985</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4 </th>\n",
" <td> 0.475493</td>\n",
" <td> 0.570873</td>\n",
" <td> 0.473126</td>\n",
" <td> 0.508780</td>\n",
" <td> 0.531369</td>\n",
" <td> 0.444453</td>\n",
" <td> NaN</td>\n",
" <td> 0.559163</td>\n",
" <td> 0.546935</td>\n",
" <td> 0.668395</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.538942</td>\n",
" <td> 0.591167</td>\n",
" <td> 0.456890</td>\n",
" <td> 0.572069</td>\n",
" <td> 0.432686</td>\n",
" <td> 0.429499</td>\n",
" <td> 0.605833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5 </th>\n",
" <td> 0.688976</td>\n",
" <td> 0.550255</td>\n",
" <td> 0.585853</td>\n",
" <td> 0.515863</td>\n",
" <td> 0.541797</td>\n",
" <td> 0.603874</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.570112</td>\n",
" <td> 0.505125</td>\n",
" <td> 0.548543</td>\n",
" <td> 0.597531</td>\n",
" <td> 0.461261</td>\n",
" <td> 0.606653</td>\n",
" <td> 0.639479</td>\n",
" <td> 0.556825</td>\n",
" <td> 0.588563</td>\n",
" <td> 0.737572</td>\n",
" <td> 0.592148</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6 </th>\n",
" <td> 0.580870</td>\n",
" <td> 0.552593</td>\n",
" <td> 0.504688</td>\n",
" <td> 0.547754</td>\n",
" <td> 0.570103</td>\n",
" <td> 0.437464</td>\n",
" <td> 0.572063</td>\n",
" <td> 0.588385</td>\n",
" <td> 0.531421</td>\n",
" <td> 0.412530</td>\n",
" <td> 0.407121</td>\n",
" <td> 0.585024</td>\n",
" <td> 0.547060</td>\n",
" <td> 0.584413</td>\n",
" <td> 0.581124</td>\n",
" <td> 0.608334</td>\n",
" <td> 0.562271</td>\n",
" <td> 0.585503</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7 </th>\n",
" <td> 0.350655</td>\n",
" <td> 0.541036</td>\n",
" <td> 0.496740</td>\n",
" <td> 0.613502</td>\n",
" <td> 0.537937</td>\n",
" <td> 0.420623</td>\n",
" <td> 0.691819</td>\n",
" <td> 0.551646</td>\n",
" <td> 0.607223</td>\n",
" <td> 0.598742</td>\n",
" <td> 0.585584</td>\n",
" <td> 0.562681</td>\n",
" <td> 0.558898</td>\n",
" <td> 0.564348</td>\n",
" <td> 0.567462</td>\n",
" <td> 0.627346</td>\n",
" <td> 0.555130</td>\n",
" <td> 0.287170</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8 </th>\n",
" <td> 0.565863</td>\n",
" <td> 0.531753</td>\n",
" <td> 0.590477</td>\n",
" <td> 0.513816</td>\n",
" <td> 0.523466</td>\n",
" <td> 0.407679</td>\n",
" <td> 0.475578</td>\n",
" <td> 0.549040</td>\n",
" <td> 0.552063</td>\n",
" <td> 0.380950</td>\n",
" <td> 0.363018</td>\n",
" <td> 0.568953</td>\n",
" <td> NaN</td>\n",
" <td> 0.584258</td>\n",
" <td> 0.536501</td>\n",
" <td> 0.585919</td>\n",
" <td> 0.539063</td>\n",
" <td> 0.556564</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9 </th>\n",
" <td> 0.443644</td>\n",
" <td> 0.530240</td>\n",
" <td> 0.585058</td>\n",
" <td> 0.648754</td>\n",
" <td> 0.571370</td>\n",
" <td> 0.438148</td>\n",
" <td> 0.742308</td>\n",
" <td> 0.546638</td>\n",
" <td> 0.609696</td>\n",
" <td> 0.592947</td>\n",
" <td> 0.474031</td>\n",
" <td> 0.584009</td>\n",
" <td> 0.528087</td>\n",
" <td> 0.494747</td>\n",
" <td> 0.536497</td>\n",
" <td> 0.404297</td>\n",
" <td> 0.560831</td>\n",
" <td> 0.598067</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10 </th>\n",
" <td> 0.684632</td>\n",
" <td> 0.546995</td>\n",
" <td> 0.585351</td>\n",
" <td> 0.520896</td>\n",
" <td> 0.563226</td>\n",
" <td> 0.864418</td>\n",
" <td> 1.034278</td>\n",
" <td> 0.563414</td>\n",
" <td> 0.498338</td>\n",
" <td> 0.678406</td>\n",
" <td> 1.019597</td>\n",
" <td> 0.490938</td>\n",
" <td> 0.623438</td>\n",
" <td> 0.570159</td>\n",
" <td> 0.559526</td>\n",
" <td> 0.452262</td>\n",
" <td> 0.435517</td>\n",
" <td> 0.309912</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11 </th>\n",
" <td> 0.522674</td>\n",
" <td> 0.552124</td>\n",
" <td> 0.481081</td>\n",
" <td> 0.650700</td>\n",
" <td> 0.540793</td>\n",
" <td> 0.424293</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.560486</td>\n",
" <td> 0.524117</td>\n",
" <td> 0.493098</td>\n",
" <td> 0.646970</td>\n",
" <td> 0.498474</td>\n",
" <td> NaN</td>\n",
" <td> 0.670699</td>\n",
" <td> 0.549569</td>\n",
" <td> 0.410849</td>\n",
" <td> 0.571156</td>\n",
" <td> 0.567746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12 </th>\n",
" <td> 0.500464</td>\n",
" <td> 0.537395</td>\n",
" <td> 0.591601</td>\n",
" <td> 0.663655</td>\n",
" <td> 0.554640</td>\n",
" <td> 0.425866</td>\n",
" <td> NaN</td>\n",
" <td> 0.586036</td>\n",
" <td> 0.560867</td>\n",
" <td> 0.507782</td>\n",
" <td> NaN</td>\n",
" <td> 0.471243</td>\n",
" <td> 0.569160</td>\n",
" <td> 0.480000</td>\n",
" <td> 0.566595</td>\n",
" <td> 0.638426</td>\n",
" <td> 0.535304</td>\n",
" <td> 0.581685</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13 </th>\n",
" <td> 0.694648</td>\n",
" <td> 0.528359</td>\n",
" <td> 0.600319</td>\n",
" <td> 0.546374</td>\n",
" <td> 0.534522</td>\n",
" <td> 0.627331</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.538476</td>\n",
" <td> 0.498785</td>\n",
" <td> 0.666678</td>\n",
" <td> 0.523545</td>\n",
" <td> 0.579877</td>\n",
" <td> NaN</td>\n",
" <td> 0.578266</td>\n",
" <td> 0.537784</td>\n",
" <td> 0.589888</td>\n",
" <td> 0.519568</td>\n",
" <td> 0.601833</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14 </th>\n",
" <td> 0.435828</td>\n",
" <td> 0.528209</td>\n",
" <td> 0.589725</td>\n",
" <td> 0.530802</td>\n",
" <td> 0.558356</td>\n",
" <td> 0.401063</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.577762</td>\n",
" <td> 0.528266</td>\n",
" <td> 0.551812</td>\n",
" <td> 0.475112</td>\n",
" <td> 0.561278</td>\n",
" <td> 0.575929</td>\n",
" <td> 0.588612</td>\n",
" <td> 0.566749</td>\n",
" <td> 0.588757</td>\n",
" <td> 0.552344</td>\n",
" <td> 0.601179</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15 </th>\n",
" <td> 0.397929</td>\n",
" <td> 0.526489</td>\n",
" <td> 0.554701</td>\n",
" <td> 0.648695</td>\n",
" <td> 0.524565</td>\n",
" <td> 0.660230</td>\n",
" <td> 0.579327</td>\n",
" <td> 0.571377</td>\n",
" <td> 0.490050</td>\n",
" <td> 0.335807</td>\n",
" <td> 0.347081</td>\n",
" <td> 0.592872</td>\n",
" <td> 0.590621</td>\n",
" <td> 0.551785</td>\n",
" <td> 0.580684</td>\n",
" <td> 0.414970</td>\n",
" <td> 0.524187</td>\n",
" <td> 0.584980</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16 </th>\n",
" <td> 0.000000</td>\n",
" <td> 0.553516</td>\n",
" <td> 0.467175</td>\n",
" <td> 0.552598</td>\n",
" <td> 0.575798</td>\n",
" <td> 0.438324</td>\n",
" <td> 0.775670</td>\n",
" <td> 0.469365</td>\n",
" <td> 0.517239</td>\n",
" <td> 0.508184</td>\n",
" <td> NaN</td>\n",
" <td> 0.553595</td>\n",
" <td> NaN</td>\n",
" <td> 0.482620</td>\n",
" <td> 0.536571</td>\n",
" <td> 0.412741</td>\n",
" <td> 0.546004</td>\n",
" <td> 0.317705</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17 </th>\n",
" <td> 0.560746</td>\n",
" <td> 0.550512</td>\n",
" <td> 0.601912</td>\n",
" <td> 0.525263</td>\n",
" <td> 0.522458</td>\n",
" <td> 0.422459</td>\n",
" <td> 0.481897</td>\n",
" <td> 0.466428</td>\n",
" <td> 0.559929</td>\n",
" <td> 0.507928</td>\n",
" <td> 0.967398</td>\n",
" <td> 0.452353</td>\n",
" <td> 0.617179</td>\n",
" <td> 0.546840</td>\n",
" <td> 0.580085</td>\n",
" <td> 0.620475</td>\n",
" <td> 0.530691</td>\n",
" <td> 0.588687</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18 </th>\n",
" <td> NaN</td>\n",
" <td> 0.527371</td>\n",
" <td> 0.586456</td>\n",
" <td> 0.511822</td>\n",
" <td> 0.522281</td>\n",
" <td> 0.439009</td>\n",
" <td> 0.479763</td>\n",
" <td> 0.568433</td>\n",
" <td> 0.513343</td>\n",
" <td> 0.549666</td>\n",
" <td> 0.558839</td>\n",
" <td> 0.476717</td>\n",
" <td> NaN</td>\n",
" <td> 0.607441</td>\n",
" <td> 0.574126</td>\n",
" <td> 0.582103</td>\n",
" <td> 0.739016</td>\n",
" <td> 0.598383</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19 </th>\n",
" <td> 0.410384</td>\n",
" <td> 0.551518</td>\n",
" <td> 0.555333</td>\n",
" <td> 0.543057</td>\n",
" <td> 0.569918</td>\n",
" <td> 0.436761</td>\n",
" <td> 0.524987</td>\n",
" <td> 0.492089</td>\n",
" <td> 0.618675</td>\n",
" <td> 0.488143</td>\n",
" <td> 0.443218</td>\n",
" <td> 0.551421</td>\n",
" <td> NaN</td>\n",
" <td> 0.581054</td>\n",
" <td> 0.555595</td>\n",
" <td> 0.405775</td>\n",
" <td> 0.404795</td>\n",
" <td> 0.585047</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20 </th>\n",
" <td> 0.739656</td>\n",
" <td> 0.551889</td>\n",
" <td> 0.582182</td>\n",
" <td> 0.554088</td>\n",
" <td> 0.541690</td>\n",
" <td> 0.607340</td>\n",
" <td> 0.480076</td>\n",
" <td> 0.572875</td>\n",
" <td> 0.559797</td>\n",
" <td> 0.548827</td>\n",
" <td> 0.628056</td>\n",
" <td> 0.582939</td>\n",
" <td> 0.531679</td>\n",
" <td> 0.554225</td>\n",
" <td> 0.581215</td>\n",
" <td> 0.430140</td>\n",
" <td> 0.387715</td>\n",
" <td> 0.558890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21 </th>\n",
" <td> 0.507572</td>\n",
" <td> 0.561915</td>\n",
" <td> 0.574720</td>\n",
" <td> 0.515846</td>\n",
" <td> 0.524367</td>\n",
" <td> 0.441451</td>\n",
" <td> 0.445840</td>\n",
" <td> 0.548882</td>\n",
" <td> 0.535417</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.503453</td>\n",
" <td> 0.588748</td>\n",
" <td> 0.599674</td>\n",
" <td> 0.588519</td>\n",
" <td> 0.623244</td>\n",
" <td> 0.557579</td>\n",
" <td> 0.585748</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22 </th>\n",
" <td> 0.642662</td>\n",
" <td> 0.566429</td>\n",
" <td> 0.595979</td>\n",
" <td> 0.534712</td>\n",
" <td> 0.552531</td>\n",
" <td> 0.657346</td>\n",
" <td> 0.650982</td>\n",
" <td> 0.568618</td>\n",
" <td> 0.550232</td>\n",
" <td> 0.630204</td>\n",
" <td> 0.779326</td>\n",
" <td> 0.464207</td>\n",
" <td> 0.581757</td>\n",
" <td> 0.576969</td>\n",
" <td> 0.410455</td>\n",
" <td> 0.439073</td>\n",
" <td> 0.389809</td>\n",
" <td> 0.602730</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23 </th>\n",
" <td> 0.496317</td>\n",
" <td> 0.541307</td>\n",
" <td> 0.599506</td>\n",
" <td> 0.551721</td>\n",
" <td> 0.548648</td>\n",
" <td> 0.450843</td>\n",
" <td> 0.628880</td>\n",
" <td> 0.567121</td>\n",
" <td> 0.600018</td>\n",
" <td> 0.506350</td>\n",
" <td> 0.496482</td>\n",
" <td> 0.575000</td>\n",
" <td> NaN</td>\n",
" <td> 0.558388</td>\n",
" <td> 0.583921</td>\n",
" <td> 0.600051</td>\n",
" <td> 0.517963</td>\n",
" <td> 0.557051</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24 </th>\n",
" <td> 0.389090</td>\n",
" <td> 0.569904</td>\n",
" <td> 0.599323</td>\n",
" <td> 0.535003</td>\n",
" <td> 0.555871</td>\n",
" <td> 0.656791</td>\n",
" <td> 0.956358</td>\n",
" <td> 0.489777</td>\n",
" <td> 0.607980</td>\n",
" <td> 0.600240</td>\n",
" <td> 0.613187</td>\n",
" <td> 0.536199</td>\n",
" <td> 0.572101</td>\n",
" <td> 0.650682</td>\n",
" <td> 0.574680</td>\n",
" <td> 0.591002</td>\n",
" <td> 0.547826</td>\n",
" <td> 0.560444</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25 </th>\n",
" <td> 0.496467</td>\n",
" <td> 0.541628</td>\n",
" <td> 0.582262</td>\n",
" <td> 0.559240</td>\n",
" <td> 0.539800</td>\n",
" <td> 0.886367</td>\n",
" <td> 0.489787</td>\n",
" <td> 0.567950</td>\n",
" <td> 0.518568</td>\n",
" <td> 0.527640</td>\n",
" <td> 0.424441</td>\n",
" <td> 0.573632</td>\n",
" <td> 0.562991</td>\n",
" <td> 0.569694</td>\n",
" <td> 0.590968</td>\n",
" <td> 0.595584</td>\n",
" <td> 0.560051</td>\n",
" <td> 0.611709</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26 </th>\n",
" <td> 0.567400</td>\n",
" <td> 0.548106</td>\n",
" <td> 0.559774</td>\n",
" <td> 0.522028</td>\n",
" <td> 0.561494</td>\n",
" <td> 0.661338</td>\n",
" <td> 0.403418</td>\n",
" <td> 0.473217</td>\n",
" <td> 0.542250</td>\n",
" <td> NaN</td>\n",
" <td> NaN</td>\n",
" <td> 0.558707</td>\n",
" <td> 0.607349</td>\n",
" <td> 0.599586</td>\n",
" <td> 0.548574</td>\n",
" <td> 0.629289</td>\n",
" <td> 0.566470</td>\n",
" <td> 0.611868</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27 </th>\n",
" <td> 0.729398</td>\n",
" <td> 0.537536</td>\n",
" <td> 0.609720</td>\n",
" <td> 0.640785</td>\n",
" <td> 0.557616</td>\n",
" <td> 0.397256</td>\n",
" <td> 0.876264</td>\n",
" <td> 0.590294</td>\n",
" <td> 0.560552</td>\n",
" <td> 1.018300</td>\n",
" <td> 0.990338</td>\n",
" <td> 0.591778</td>\n",
" <td> NaN</td>\n",
" <td> 0.595892</td>\n",
" <td> 0.569365</td>\n",
" <td> 0.638073</td>\n",
" <td> 0.561568</td>\n",
" <td> 0.607779</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28 </th>\n",
" <td> 0.853946</td>\n",
" <td> 0.557129</td>\n",
" <td> 0.591020</td>\n",
" <td> 0.518054</td>\n",
" <td> 0.567708</td>\n",
" <td> 0.437493</td>\n",
" <td> 0.726672</td>\n",
" <td> 0.588861</td>\n",
" <td> 0.561366</td>\n",
" <td> 0.564045</td>\n",
" <td> 0.607998</td>\n",
" <td> 0.567359</td>\n",
" <td> 0.605553</td>\n",
" <td> 0.554384</td>\n",
" <td> 0.565209</td>\n",
" <td> 0.416839</td>\n",
" <td> 0.550760</td>\n",
" <td> 0.608390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29 </th>\n",
" <td> 0.628606</td>\n",
" <td> 0.577731</td>\n",
" <td> 0.482355</td>\n",
" <td> 0.604700</td>\n",
" <td> 0.548282</td>\n",
" <td> 0.435476</td>\n",
" <td> NaN</td>\n",
" <td> 0.470625</td>\n",
" <td> 0.561767</td>\n",
" <td> 0.558091</td>\n",
" <td> NaN</td>\n",
" <td> 0.590543</td>\n",
" <td> NaN</td>\n",
" <td> 0.547113</td>\n",
" <td> 0.549539</td>\n",
" <td> 0.421872</td>\n",
" <td> 0.525814</td>\n",
" <td> 0.579806</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>613</th>\n",
" <td> 0.444902</td>\n",
" <td> 0.531450</td>\n",
" <td> 0.581533</td>\n",
" <td> 0.543362</td>\n",
" <td> 0.576319</td>\n",
" <td> 0.460006</td>\n",
" <td> 0.686549</td>\n",
" <td> 0.514527</td>\n",
" <td> 0.628586</td>\n",
" <td> 0.339397</td>\n",
" <td> 0.335029</td>\n",
" <td> 0.483332</td>\n",
" <td> NaN</td>\n",
" <td> 0.453893</td>\n",
" <td> 0.568827</td>\n",
" <td> 0.445912</td>\n",
" <td> 0.527159</td>\n",
" <td> 0.611500</td>\n",
" </tr>\n",
" <tr>\n",
" <th>614</th>\n",
" <td> 0.512887</td>\n",
" <td> 0.573706</td>\n",
" <td> 0.500176</td>\n",
" <td> 0.510136</td>\n",
" <td> 0.577442</td>\n",
" <td> 0.447369</td>\n",
" <td> 0.422781</td>\n",
" <td> 0.556519</td>\n",
" <td> 0.603405</td>\n",
" <td> 0.566129</td>\n",
" <td> 0.653260</td>\n",
" <td> 0.584228</td>\n",
" <td> 0.604080</td>\n",
" <td> 0.191959</td>\n",
" <td> 0.404393</td>\n",
" <td> 0.427156</td>\n",
" <td> 0.417121</td>\n",
" <td> 0.312969</td>\n",
" </tr>\n",
" <tr>\n",
" <th>615</th>\n",
" <td> 0.636723</td>\n",
" <td> 0.566364</td>\n",
" <td> 0.579426</td>\n",
" <td> 0.540522</td>\n",
" <td> 0.576262</td>\n",
" <td> 0.654664</td>\n",
" <td> 0.974715</td>\n",
" <td> 0.555919</td>\n",
" <td> 0.569009</td>\n",
" <td> 0.596871</td>\n",
" <td> 0.649658</td>\n",
" <td> 0.548259</td>\n",
" <td> 0.602617</td>\n",
" <td> 0.607272</td>\n",
" <td> 0.588661</td>\n",
" <td> 0.430991</td>\n",
" <td> 0.524210</td>\n",
" <td> 0.570576</td>\n",
" </tr>\n",
" <tr>\n",
" <th>616</th>\n",
" <td> 0.655397</td>\n",
" <td> 0.568012</td>\n",
" <td> 0.558486</td>\n",
" <td> 0.548196</td>\n",
" <td> 0.525779</td>\n",
" <td> 0.628419</td>\n",
" <td> NaN</td>\n",
" <td> 0.570018</td>\n",
" <td> 0.527417</td>\n",
" <td> 0.580270</td>\n",
" <td> 0.644592</td>\n",
" <td> 0.579050</td>\n",
" <td> 0.564567</td>\n",
" <td> 0.474552</td>\n",
" <td> 0.585677</td>\n",
" <td> 0.580035</td>\n",
" <td> 0.714946</td>\n",
" <td> 0.599790</td>\n",
" </tr>\n",
" <tr>\n",
" <th>617</th>\n",
" <td> 0.380226</td>\n",
" <td> 0.569853</td>\n",
" <td> 0.486284</td>\n",
" <td> 0.556094</td>\n",
" <td> 0.560889</td>\n",
" <td> 0.631773</td>\n",
" <td> 0.592032</td>\n",
" <td> 0.577998</td>\n",
" <td> 0.535080</td>\n",
" <td> 0.575168</td>\n",
" <td> 0.450583</td>\n",
" <td> 0.577126</td>\n",
" <td> 0.547663</td>\n",
" <td> 0.587791</td>\n",
" <td> 0.552982</td>\n",
" <td> 0.606649</td>\n",
" <td> 0.531885</td>\n",
" <td> 0.597014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>618</th>\n",
" <td> 0.465282</td>\n",
" <td> 0.526812</td>\n",
" <td> 0.470117</td>\n",
" <td> 0.510534</td>\n",
" <td> 0.535306</td>\n",
" <td> 0.408610</td>\n",
" <td> NaN</td>\n",
" <td> 0.562159</td>\n",
" <td> 0.517129</td>\n",
" <td> 0.486302</td>\n",
" <td> NaN</td>\n",
" <td> 0.574824</td>\n",
" <td> 0.611634</td>\n",
" <td> 0.598125</td>\n",
" <td> 0.582344</td>\n",
" <td> 0.641199</td>\n",
" <td> 0.564118</td>\n",
" <td> 0.571125</td>\n",
" </tr>\n",
" <tr>\n",
" <th>619</th>\n",
" <td> 0.562686</td>\n",
" <td> 0.530329</td>\n",
" <td> 0.474786</td>\n",
" <td> 0.541422</td>\n",
" <td> 0.539314</td>\n",
" <td> 0.438840</td>\n",
" <td> 0.242610</td>\n",
" <td> 0.581739</td>\n",
" <td> 0.511096</td>\n",
" <td> 0.423126</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.575919</td>\n",
" <td> NaN</td>\n",
" <td> 0.489592</td>\n",
" <td> 0.579661</td>\n",
" <td> 0.590426</td>\n",
" <td> 0.531310</td>\n",
" <td> 0.558183</td>\n",
" </tr>\n",
" <tr>\n",
" <th>620</th>\n",
" <td> 0.540569</td>\n",
" <td> 0.564725</td>\n",
" <td> 0.600143</td>\n",
" <td> 0.515467</td>\n",
" <td> 0.552378</td>\n",
" <td> 0.651670</td>\n",
" <td> 0.723968</td>\n",
" <td> 0.585175</td>\n",
" <td> 0.507145</td>\n",
" <td> 0.380488</td>\n",
" <td> NaN</td>\n",
" <td> 0.547961</td>\n",
" <td> 0.555082</td>\n",
" <td> 0.574413</td>\n",
" <td> 0.541090</td>\n",
" <td> 0.607958</td>\n",
" <td> 0.769137</td>\n",
" <td> 0.570468</td>\n",
" </tr>\n",
" <tr>\n",
" <th>621</th>\n",
" <td> 0.417533</td>\n",
" <td> 0.534088</td>\n",
" <td> 0.501683</td>\n",
" <td> 0.651355</td>\n",
" <td> 0.546980</td>\n",
" <td> 0.456955</td>\n",
" <td> 0.514782</td>\n",
" <td> 0.583888</td>\n",
" <td> 0.562920</td>\n",
" <td> 0.570569</td>\n",
" <td> 0.491702</td>\n",
" <td> 0.574086</td>\n",
" <td> NaN</td>\n",
" <td> 0.573237</td>\n",
" <td> 0.545520</td>\n",
" <td> 0.441048</td>\n",
" <td> 0.418729</td>\n",
" <td> 0.315301</td>\n",
" </tr>\n",
" <tr>\n",
" <th>622</th>\n",
" <td> 0.565731</td>\n",
" <td> 0.570258</td>\n",
" <td> 0.489100</td>\n",
" <td> 0.526691</td>\n",
" <td> 0.537444</td>\n",
" <td> 0.431932</td>\n",
" <td> NaN</td>\n",
" <td> 0.550015</td>\n",
" <td> 0.508142</td>\n",
" <td> 0.576704</td>\n",
" <td> 0.618419</td>\n",
" <td> 0.565152</td>\n",
" <td> NaN</td>\n",
" <td> 0.588185</td>\n",
" <td> 0.545734</td>\n",
" <td> 0.444992</td>\n",
" <td> 0.411203</td>\n",
" <td> 0.611549</td>\n",
" </tr>\n",
" <tr>\n",
" <th>623</th>\n",
" <td> 0.690731</td>\n",
" <td> 0.547814</td>\n",
" <td> 0.604246</td>\n",
" <td> 0.506853</td>\n",
" <td> 0.558314</td>\n",
" <td> 0.420731</td>\n",
" <td> 0.327148</td>\n",
" <td> 0.490053</td>\n",
" <td> 0.494646</td>\n",
" <td> 0.477760</td>\n",
" <td> 0.422598</td>\n",
" <td> 0.538062</td>\n",
" <td> 0.589025</td>\n",
" <td> 0.438469</td>\n",
" <td> 0.588725</td>\n",
" <td> 0.418612</td>\n",
" <td> 0.407487</td>\n",
" <td> 0.565994</td>\n",
" </tr>\n",
" <tr>\n",
" <th>624</th>\n",
" <td> 0.631071</td>\n",
" <td> 0.563347</td>\n",
" <td> 0.566045</td>\n",
" <td> 0.540059</td>\n",
" <td> 0.527046</td>\n",
" <td> 0.419409</td>\n",
" <td> 0.349922</td>\n",
" <td> 0.557259</td>\n",
" <td> 0.542784</td>\n",
" <td> 0.595225</td>\n",
" <td> 0.597731</td>\n",
" <td> 0.482756</td>\n",
" <td> 0.563948</td>\n",
" <td> 0.559595</td>\n",
" <td> 0.566472</td>\n",
" <td> 0.417482</td>\n",
" <td> 0.546066</td>\n",
" <td> 0.560571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>625</th>\n",
" <td> 0.776412</td>\n",
" <td> 0.528702</td>\n",
" <td> 0.565955</td>\n",
" <td> 0.507513</td>\n",
" <td> 0.524036</td>\n",
" <td> 0.624092</td>\n",
" <td> 0.635204</td>\n",
" <td> 0.551823</td>\n",
" <td> 0.499075</td>\n",
" <td> 0.821823</td>\n",
" <td> 0.820567</td>\n",
" <td> 0.551897</td>\n",
" <td> 0.614297</td>\n",
" <td> 0.574307</td>\n",
" <td> 0.583396</td>\n",
" <td> 0.627400</td>\n",
" <td> 0.553903</td>\n",
" <td> 0.581571</td>\n",
" </tr>\n",
" <tr>\n",
" <th>626</th>\n",
" <td> 0.344323</td>\n",
" <td> 0.548091</td>\n",
" <td> 0.568676</td>\n",
" <td> 0.522844</td>\n",
" <td> 0.548815</td>\n",
" <td> 0.646992</td>\n",
" <td> 0.839429</td>\n",
" <td> 0.593204</td>\n",
" <td> 0.584476</td>\n",
" <td> 0.556678</td>\n",
" <td> 0.626239</td>\n",
" <td> 0.569192</td>\n",
" <td> 0.601440</td>\n",
" <td> 0.463204</td>\n",
" <td> 0.551428</td>\n",
" <td> 0.619576</td>\n",
" <td> 0.557485</td>\n",
" <td> 0.572047</td>\n",
" </tr>\n",
" <tr>\n",
" <th>627</th>\n",
" <td> 0.429596</td>\n",
" <td> 0.557114</td>\n",
" <td> 0.582951</td>\n",
" <td> 0.605076</td>\n",
" <td> 0.568536</td>\n",
" <td> 0.420578</td>\n",
" <td> 0.633580</td>\n",
" <td> 0.567919</td>\n",
" <td> 0.597918</td>\n",
" <td> 0.435602</td>\n",
" <td> 0.407960</td>\n",
" <td> 0.575341</td>\n",
" <td> 0.556210</td>\n",
" <td> 0.656053</td>\n",
" <td> 0.435138</td>\n",
" <td> 0.425251</td>\n",
" <td> 0.419312</td>\n",
" <td> 0.321988</td>\n",
" </tr>\n",
" <tr>\n",
" <th>628</th>\n",
" <td> 0.762505</td>\n",
" <td> 0.543978</td>\n",
" <td> 0.584458</td>\n",
" <td> 0.603054</td>\n",
" <td> 0.538317</td>\n",
" <td> 0.417545</td>\n",
" <td> 0.577393</td>\n",
" <td> 0.461703</td>\n",
" <td> 0.608771</td>\n",
" <td> 0.502688</td>\n",
" <td> 0.369492</td>\n",
" <td> 0.558571</td>\n",
" <td> 0.543619</td>\n",
" <td> 0.560189</td>\n",
" <td> 0.550537</td>\n",
" <td> 0.620434</td>\n",
" <td> 0.719316</td>\n",
" <td> 0.577551</td>\n",
" </tr>\n",
" <tr>\n",
" <th>629</th>\n",
" <td> 0.445562</td>\n",
" <td> 0.563655</td>\n",
" <td> 0.514475</td>\n",
" <td> 0.523519</td>\n",
" <td> 0.529758</td>\n",
" <td> 0.408874</td>\n",
" <td> 0.976592</td>\n",
" <td> 0.545068</td>\n",
" <td> 0.497158</td>\n",
" <td> 0.415081</td>\n",
" <td> 0.393474</td>\n",
" <td> 0.636485</td>\n",
" <td> 0.591303</td>\n",
" <td> 0.483776</td>\n",
" <td> 0.560353</td>\n",
" <td> 0.638526</td>\n",
" <td> 0.533047</td>\n",
" <td> 0.598957</td>\n",
" </tr>\n",
" <tr>\n",
" <th>630</th>\n",
" <td> 0.640560</td>\n",
" <td> 0.562640</td>\n",
" <td> 0.598153</td>\n",
" <td> 0.518875</td>\n",
" <td> 0.532951</td>\n",
" <td> 0.606742</td>\n",
" <td> NaN</td>\n",
" <td> 0.592160</td>\n",
" <td> 0.586916</td>\n",
" <td> 0.952168</td>\n",
" <td> 0.955881</td>\n",
" <td> 0.582155</td>\n",
" <td> 0.552566</td>\n",
" <td> 0.572556</td>\n",
" <td> 0.537578</td>\n",
" <td> 0.438868</td>\n",
" <td> 0.552459</td>\n",
" <td> 0.609035</td>\n",
" </tr>\n",
" <tr>\n",
" <th>631</th>\n",
" <td> 0.680349</td>\n",
" <td> 0.524804</td>\n",
" <td> 0.557883</td>\n",
" <td> 0.534760</td>\n",
" <td> 0.574604</td>\n",
" <td> 0.417850</td>\n",
" <td> 0.850933</td>\n",
" <td> 0.551493</td>\n",
" <td> 0.538034</td>\n",
" <td> 0.636333</td>\n",
" <td> 0.562319</td>\n",
" <td> 0.554036</td>\n",
" <td> NaN</td>\n",
" <td> 0.596780</td>\n",
" <td> 0.587236</td>\n",
" <td> 0.429805</td>\n",
" <td> 0.526942</td>\n",
" <td> 0.286885</td>\n",
" </tr>\n",
" <tr>\n",
" <th>632</th>\n",
" <td> 0.837227</td>\n",
" <td> 0.546371</td>\n",
" <td> 0.508416</td>\n",
" <td> 0.538026</td>\n",
" <td> 0.554396</td>\n",
" <td> 0.652385</td>\n",
" <td> 0.502946</td>\n",
" <td> 0.589734</td>\n",
" <td> 0.556813</td>\n",
" <td> 0.532489</td>\n",
" <td> 0.465608</td>\n",
" <td> 0.576131</td>\n",
" <td> 0.573285</td>\n",
" <td> 0.580295</td>\n",
" <td> 0.549315</td>\n",
" <td> 0.621053</td>\n",
" <td> 0.571288</td>\n",
" <td> 0.571411</td>\n",
" </tr>\n",
" <tr>\n",
" <th>633</th>\n",
" <td> 0.460225</td>\n",
" <td> 0.524803</td>\n",
" <td> 0.502367</td>\n",
" <td> 0.638403</td>\n",
" <td> 0.561060</td>\n",
" <td> 0.428623</td>\n",
" <td> NaN</td>\n",
" <td> 0.484999</td>\n",
" <td> 0.562022</td>\n",
" <td> 0.514335</td>\n",
" <td> 0.501090</td>\n",
" <td> 0.590612</td>\n",
" <td> 0.597956</td>\n",
" <td> 0.531187</td>\n",
" <td> 0.549403</td>\n",
" <td> 0.629409</td>\n",
" <td> 0.533709</td>\n",
" <td> 0.602249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>634</th>\n",
" <td> 0.579329</td>\n",
" <td> 0.539698</td>\n",
" <td> 0.564684</td>\n",
" <td> 0.517523</td>\n",
" <td> 0.577578</td>\n",
" <td> 0.439764</td>\n",
" <td> 0.585275</td>\n",
" <td> 0.516644</td>\n",
" <td> 0.623826</td>\n",
" <td> 0.537769</td>\n",
" <td> 0.425577</td>\n",
" <td> 0.502194</td>\n",
" <td> NaN</td>\n",
" <td> 0.471640</td>\n",
" <td> 0.578967</td>\n",
" <td> 0.582719</td>\n",
" <td> 0.573971</td>\n",
" <td> 0.609699</td>\n",
" </tr>\n",
" <tr>\n",
" <th>635</th>\n",
" <td> 0.434860</td>\n",
" <td> 0.559430</td>\n",
" <td> 0.514521</td>\n",
" <td> 0.537231</td>\n",
" <td> 0.550310</td>\n",
" <td> 0.455899</td>\n",
" <td> 0.421738</td>\n",
" <td> 0.580443</td>\n",
" <td> 0.596361</td>\n",
" <td> 0.506327</td>\n",
" <td> 0.475655</td>\n",
" <td> 0.560208</td>\n",
" <td> 0.545443</td>\n",
" <td> 0.591031</td>\n",
" <td> 0.548442</td>\n",
" <td> 0.637050</td>\n",
" <td> 0.524159</td>\n",
" <td> 0.601434</td>\n",
" </tr>\n",
" <tr>\n",
" <th>636</th>\n",
" <td> 0.873377</td>\n",
" <td> 0.548054</td>\n",
" <td> 0.484027</td>\n",
" <td> 0.548508</td>\n",
" <td> 0.553252</td>\n",
" <td> 0.614687</td>\n",
" <td> NaN</td>\n",
" <td> 0.577929</td>\n",
" <td> 0.583823</td>\n",
" <td> 0.578824</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.549345</td>\n",
" <td> NaN</td>\n",
" <td> 0.564518</td>\n",
" <td> 0.414137</td>\n",
" <td> 0.291104</td>\n",
" <td> 0.408630</td>\n",
" <td> 0.300661</td>\n",
" </tr>\n",
" <tr>\n",
" <th>637</th>\n",
" <td> 0.415263</td>\n",
" <td> 0.566009</td>\n",
" <td> 0.480186</td>\n",
" <td> 0.527417</td>\n",
" <td> 0.564625</td>\n",
" <td> 0.445517</td>\n",
" <td> NaN</td>\n",
" <td> 0.587302</td>\n",
" <td> 0.558257</td>\n",
" <td> 0.641007</td>\n",
" <td> 0.970785</td>\n",
" <td> 0.476230</td>\n",
" <td> 0.606129</td>\n",
" <td> 0.196598</td>\n",
" <td> 0.555116</td>\n",
" <td> 0.604278</td>\n",
" <td> 0.570199</td>\n",
" <td> 0.608236</td>\n",
" </tr>\n",
" <tr>\n",
" <th>638</th>\n",
" <td> 0.443461</td>\n",
" <td> 0.574391</td>\n",
" <td> 0.495399</td>\n",
" <td> 0.508874</td>\n",
" <td> 0.561229</td>\n",
" <td> 0.659089</td>\n",
" <td> 0.571962</td>\n",
" <td> 0.493200</td>\n",
" <td> 0.524268</td>\n",
" <td> 0.546449</td>\n",
" <td> NaN</td>\n",
" <td> 0.551826</td>\n",
" <td> 0.611695</td>\n",
" <td> 0.582295</td>\n",
" <td> 0.560410</td>\n",
" <td> 0.633905</td>\n",
" <td> 0.555364</td>\n",
" <td> 0.567880</td>\n",
" </tr>\n",
" <tr>\n",
" <th>639</th>\n",
" <td> 0.586547</td>\n",
" <td> 0.562467</td>\n",
" <td> 0.496146</td>\n",
" <td> 0.550786</td>\n",
" <td> 0.563976</td>\n",
" <td> 0.647858</td>\n",
" <td> 1.038201</td>\n",
" <td> 0.593491</td>\n",
" <td> 0.574272</td>\n",
" <td> 0.599196</td>\n",
" <td> 0.638875</td>\n",
" <td> 0.544753</td>\n",
" <td> NaN</td>\n",
" <td> 0.580982</td>\n",
" <td> 0.551629</td>\n",
" <td> 0.622126</td>\n",
" <td> 0.547607</td>\n",
" <td> 0.313568</td>\n",
" </tr>\n",
" <tr>\n",
" <th>640</th>\n",
" <td> 0.550512</td>\n",
" <td> 0.565067</td>\n",
" <td> 0.486812</td>\n",
" <td> 0.514343</td>\n",
" <td> 0.570745</td>\n",
" <td> 0.407640</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.552401</td>\n",
" <td> 0.605474</td>\n",
" <td> 0.633116</td>\n",
" <td> 0.765301</td>\n",
" <td> 0.540236</td>\n",
" <td> 0.554094</td>\n",
" <td> 0.571043</td>\n",
" <td> 0.539960</td>\n",
" <td> 0.582944</td>\n",
" <td> 0.524065</td>\n",
" <td> 0.589000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>641</th>\n",
" <td> 0.652594</td>\n",
" <td> 0.535039</td>\n",
" <td> 0.467216</td>\n",
" <td> 0.637407</td>\n",
" <td> 0.560277</td>\n",
" <td> 0.637464</td>\n",
" <td> 0.318583</td>\n",
" <td> 0.574252</td>\n",
" <td> 0.606945</td>\n",
" <td> 0.557932</td>\n",
" <td> 0.610804</td>\n",
" <td> 0.543056</td>\n",
" <td> NaN</td>\n",
" <td> 0.432869</td>\n",
" <td> 0.549532</td>\n",
" <td> 0.421781</td>\n",
" <td> 0.561814</td>\n",
" <td> 0.564976</td>\n",
" </tr>\n",
" <tr>\n",
" <th>642</th>\n",
" <td> 0.473347</td>\n",
" <td> 0.571456</td>\n",
" <td> 0.606330</td>\n",
" <td> 0.549138</td>\n",
" <td> 0.568798</td>\n",
" <td> 0.424797</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.553750</td>\n",
" <td> 0.557346</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.000000</td>\n",
" <td> 0.554808</td>\n",
" <td> NaN</td>\n",
" <td> 0.566687</td>\n",
" <td> 0.577856</td>\n",
" <td> 0.640890</td>\n",
" <td> 0.566148</td>\n",
" <td> 0.580042</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>643 rows × 18 columns</p>\n",
"</div>"
],
"text/plain": [
" MEAN_BY_age_at_application MEAN_BY_status \\\n",
"0 0.517542 0.571829 \n",
"1 0.399429 0.554419 \n",
"2 0.627790 0.536272 \n",
"3 0.652807 0.540285 \n",
"4 0.475493 0.570873 \n",
"5 0.688976 0.550255 \n",
"6 0.580870 0.552593 \n",
"7 0.350655 0.541036 \n",
"8 0.565863 0.531753 \n",
"9 0.443644 0.530240 \n",
"10 0.684632 0.546995 \n",
"11 0.522674 0.552124 \n",
"12 0.500464 0.537395 \n",
"13 0.694648 0.528359 \n",
"14 0.435828 0.528209 \n",
"15 0.397929 0.526489 \n",
"16 0.000000 0.553516 \n",
"17 0.560746 0.550512 \n",
"18 NaN 0.527371 \n",
"19 0.410384 0.551518 \n",
"20 0.739656 0.551889 \n",
"21 0.507572 0.561915 \n",
"22 0.642662 0.566429 \n",
"23 0.496317 0.541307 \n",
"24 0.389090 0.569904 \n",
"25 0.496467 0.541628 \n",
"26 0.567400 0.548106 \n",
"27 0.729398 0.537536 \n",
"28 0.853946 0.557129 \n",
"29 0.628606 0.577731 \n",
".. ... ... \n",
"613 0.444902 0.531450 \n",
"614 0.512887 0.573706 \n",
"615 0.636723 0.566364 \n",
"616 0.655397 0.568012 \n",
"617 0.380226 0.569853 \n",
"618 0.465282 0.526812 \n",
"619 0.562686 0.530329 \n",
"620 0.540569 0.564725 \n",
"621 0.417533 0.534088 \n",
"622 0.565731 0.570258 \n",
"623 0.690731 0.547814 \n",
"624 0.631071 0.563347 \n",
"625 0.776412 0.528702 \n",
"626 0.344323 0.548091 \n",
"627 0.429596 0.557114 \n",
"628 0.762505 0.543978 \n",
"629 0.445562 0.563655 \n",
"630 0.640560 0.562640 \n",
"631 0.680349 0.524804 \n",
"632 0.837227 0.546371 \n",
"633 0.460225 0.524803 \n",
"634 0.579329 0.539698 \n",
"635 0.434860 0.559430 \n",
"636 0.873377 0.548054 \n",
"637 0.415263 0.566009 \n",
"638 0.443461 0.574391 \n",
"639 0.586547 0.562467 \n",
"640 0.550512 0.565067 \n",
"641 0.652594 0.535039 \n",
"642 0.473347 0.571456 \n",
"\n",
" MEAN_BY_residence_rent_or_own MEAN_BY_bank_account_direct_deposit \\\n",
"0 0.494244 0.520542 \n",
"1 0.469449 0.544038 \n",
"2 0.608097 0.514617 \n",
"3 0.589860 0.540995 \n",
"4 0.473126 0.508780 \n",
"5 0.585853 0.515863 \n",
"6 0.504688 0.547754 \n",
"7 0.496740 0.613502 \n",
"8 0.590477 0.513816 \n",
"9 0.585058 0.648754 \n",
"10 0.585351 0.520896 \n",
"11 0.481081 0.650700 \n",
"12 0.591601 0.663655 \n",
"13 0.600319 0.546374 \n",
"14 0.589725 0.530802 \n",
"15 0.554701 0.648695 \n",
"16 0.467175 0.552598 \n",
"17 0.601912 0.525263 \n",
"18 0.586456 0.511822 \n",
"19 0.555333 0.543057 \n",
"20 0.582182 0.554088 \n",
"21 0.574720 0.515846 \n",
"22 0.595979 0.534712 \n",
"23 0.599506 0.551721 \n",
"24 0.599323 0.535003 \n",
"25 0.582262 0.559240 \n",
"26 0.559774 0.522028 \n",
"27 0.609720 0.640785 \n",
"28 0.591020 0.518054 \n",
"29 0.482355 0.604700 \n",
".. ... ... \n",
"613 0.581533 0.543362 \n",
"614 0.500176 0.510136 \n",
"615 0.579426 0.540522 \n",
"616 0.558486 0.548196 \n",
"617 0.486284 0.556094 \n",
"618 0.470117 0.510534 \n",
"619 0.474786 0.541422 \n",
"620 0.600143 0.515467 \n",
"621 0.501683 0.651355 \n",
"622 0.489100 0.526691 \n",
"623 0.604246 0.506853 \n",
"624 0.566045 0.540059 \n",
"625 0.565955 0.507513 \n",
"626 0.568676 0.522844 \n",
"627 0.582951 0.605076 \n",
"628 0.584458 0.603054 \n",
"629 0.514475 0.523519 \n",
"630 0.598153 0.518875 \n",
"631 0.557883 0.534760 \n",
"632 0.508416 0.538026 \n",
"633 0.502367 0.638403 \n",
"634 0.564684 0.517523 \n",
"635 0.514521 0.537231 \n",
"636 0.484027 0.548508 \n",
"637 0.480186 0.527417 \n",
"638 0.495399 0.508874 \n",
"639 0.496146 0.550786 \n",
"640 0.486812 0.514343 \n",
"641 0.467216 0.637407 \n",
"642 0.606330 0.549138 \n",
"\n",
" MEAN_BY_payment_ach MEAN_BY_payment_frequency MEAN_BY_address_zip \\\n",
"0 0.556772 0.451417 NaN \n",
"1 0.566894 0.658350 1.003726 \n",
"2 0.524484 0.646540 0.436093 \n",
"3 0.552187 0.648897 0.260163 \n",
"4 0.531369 0.444453 NaN \n",
"5 0.541797 0.603874 0.000000 \n",
"6 0.570103 0.437464 0.572063 \n",
"7 0.537937 0.420623 0.691819 \n",
"8 0.523466 0.407679 0.475578 \n",
"9 0.571370 0.438148 0.742308 \n",
"10 0.563226 0.864418 1.034278 \n",
"11 0.540793 0.424293 0.000000 \n",
"12 0.554640 0.425866 NaN \n",
"13 0.534522 0.627331 0.000000 \n",
"14 0.558356 0.401063 0.000000 \n",
"15 0.524565 0.660230 0.579327 \n",
"16 0.575798 0.438324 0.775670 \n",
"17 0.522458 0.422459 0.481897 \n",
"18 0.522281 0.439009 0.479763 \n",
"19 0.569918 0.436761 0.524987 \n",
"20 0.541690 0.607340 0.480076 \n",
"21 0.524367 0.441451 0.445840 \n",
"22 0.552531 0.657346 0.650982 \n",
"23 0.548648 0.450843 0.628880 \n",
"24 0.555871 0.656791 0.956358 \n",
"25 0.539800 0.886367 0.489787 \n",
"26 0.561494 0.661338 0.403418 \n",
"27 0.557616 0.397256 0.876264 \n",
"28 0.567708 0.437493 0.726672 \n",
"29 0.548282 0.435476 NaN \n",
".. ... ... ... \n",
"613 0.576319 0.460006 0.686549 \n",
"614 0.577442 0.447369 0.422781 \n",
"615 0.576262 0.654664 0.974715 \n",
"616 0.525779 0.628419 NaN \n",
"617 0.560889 0.631773 0.592032 \n",
"618 0.535306 0.408610 NaN \n",
"619 0.539314 0.438840 0.242610 \n",
"620 0.552378 0.651670 0.723968 \n",
"621 0.546980 0.456955 0.514782 \n",
"622 0.537444 0.431932 NaN \n",
"623 0.558314 0.420731 0.327148 \n",
"624 0.527046 0.419409 0.349922 \n",
"625 0.524036 0.624092 0.635204 \n",
"626 0.548815 0.646992 0.839429 \n",
"627 0.568536 0.420578 0.633580 \n",
"628 0.538317 0.417545 0.577393 \n",
"629 0.529758 0.408874 0.976592 \n",
"630 0.532951 0.606742 NaN \n",
"631 0.574604 0.417850 0.850933 \n",
"632 0.554396 0.652385 0.502946 \n",
"633 0.561060 0.428623 NaN \n",
"634 0.577578 0.439764 0.585275 \n",
"635 0.550310 0.455899 0.421738 \n",
"636 0.553252 0.614687 NaN \n",
"637 0.564625 0.445517 NaN \n",
"638 0.561229 0.659089 0.571962 \n",
"639 0.563976 0.647858 1.038201 \n",
"640 0.570745 0.407640 0.000000 \n",
"641 0.560277 0.637464 0.318583 \n",
"642 0.568798 0.424797 0.000000 \n",
"\n",
" MEAN_BY_State MEAN_BY_email_provider MEAN_BY_routing_symbol \\\n",
"0 0.594170 0.537133 0.558437 \n",
"1 0.571771 0.521474 0.604892 \n",
"2 0.571640 0.540709 0.403309 \n",
"3 0.564072 0.507995 0.423049 \n",
"4 0.559163 0.546935 0.668395 \n",
"5 0.570112 0.505125 0.548543 \n",
"6 0.588385 0.531421 0.412530 \n",
"7 0.551646 0.607223 0.598742 \n",
"8 0.549040 0.552063 0.380950 \n",
"9 0.546638 0.609696 0.592947 \n",
"10 0.563414 0.498338 0.678406 \n",
"11 0.560486 0.524117 0.493098 \n",
"12 0.586036 0.560867 0.507782 \n",
"13 0.538476 0.498785 0.666678 \n",
"14 0.577762 0.528266 0.551812 \n",
"15 0.571377 0.490050 0.335807 \n",
"16 0.469365 0.517239 0.508184 \n",
"17 0.466428 0.559929 0.507928 \n",
"18 0.568433 0.513343 0.549666 \n",
"19 0.492089 0.618675 0.488143 \n",
"20 0.572875 0.559797 0.548827 \n",
"21 0.548882 0.535417 0.000000 \n",
"22 0.568618 0.550232 0.630204 \n",
"23 0.567121 0.600018 0.506350 \n",
"24 0.489777 0.607980 0.600240 \n",
"25 0.567950 0.518568 0.527640 \n",
"26 0.473217 0.542250 NaN \n",
"27 0.590294 0.560552 1.018300 \n",
"28 0.588861 0.561366 0.564045 \n",
"29 0.470625 0.561767 0.558091 \n",
".. ... ... ... \n",
"613 0.514527 0.628586 0.339397 \n",
"614 0.556519 0.603405 0.566129 \n",
"615 0.555919 0.569009 0.596871 \n",
"616 0.570018 0.527417 0.580270 \n",
"617 0.577998 0.535080 0.575168 \n",
"618 0.562159 0.517129 0.486302 \n",
"619 0.581739 0.511096 0.423126 \n",
"620 0.585175 0.507145 0.380488 \n",
"621 0.583888 0.562920 0.570569 \n",
"622 0.550015 0.508142 0.576704 \n",
"623 0.490053 0.494646 0.477760 \n",
"624 0.557259 0.542784 0.595225 \n",
"625 0.551823 0.499075 0.821823 \n",
"626 0.593204 0.584476 0.556678 \n",
"627 0.567919 0.597918 0.435602 \n",
"628 0.461703 0.608771 0.502688 \n",
"629 0.545068 0.497158 0.415081 \n",
"630 0.592160 0.586916 0.952168 \n",
"631 0.551493 0.538034 0.636333 \n",
"632 0.589734 0.556813 0.532489 \n",
"633 0.484999 0.562022 0.514335 \n",
"634 0.516644 0.623826 0.537769 \n",
"635 0.580443 0.596361 0.506327 \n",
"636 0.577929 0.583823 0.578824 \n",
"637 0.587302 0.558257 0.641007 \n",
"638 0.493200 0.524268 0.546449 \n",
"639 0.593491 0.574272 0.599196 \n",
"640 0.552401 0.605474 0.633116 \n",
"641 0.574252 0.606945 0.557932 \n",
"642 0.553750 0.557346 0.000000 \n",
"\n",
" MEAN_BY_institution MEAN_BY_home_phone_type MEAN_BY_other_phone_type \\\n",
"0 NaN 0.586874 0.603728 \n",
"1 0.632316 0.459916 0.600047 \n",
"2 0.396041 0.546133 NaN \n",
"3 NaN 0.544627 0.556392 \n",
"4 0.000000 0.538942 0.591167 \n",
"5 0.597531 0.461261 0.606653 \n",
"6 0.407121 0.585024 0.547060 \n",
"7 0.585584 0.562681 0.558898 \n",
"8 0.363018 0.568953 NaN \n",
"9 0.474031 0.584009 0.528087 \n",
"10 1.019597 0.490938 0.623438 \n",
"11 0.646970 0.498474 NaN \n",
"12 NaN 0.471243 0.569160 \n",
"13 0.523545 0.579877 NaN \n",
"14 0.475112 0.561278 0.575929 \n",
"15 0.347081 0.592872 0.590621 \n",
"16 NaN 0.553595 NaN \n",
"17 0.967398 0.452353 0.617179 \n",
"18 0.558839 0.476717 NaN \n",
"19 0.443218 0.551421 NaN \n",
"20 0.628056 0.582939 0.531679 \n",
"21 0.000000 0.503453 0.588748 \n",
"22 0.779326 0.464207 0.581757 \n",
"23 0.496482 0.575000 NaN \n",
"24 0.613187 0.536199 0.572101 \n",
"25 0.424441 0.573632 0.562991 \n",
"26 NaN 0.558707 0.607349 \n",
"27 0.990338 0.591778 NaN \n",
"28 0.607998 0.567359 0.605553 \n",
"29 NaN 0.590543 NaN \n",
".. ... ... ... \n",
"613 0.335029 0.483332 NaN \n",
"614 0.653260 0.584228 0.604080 \n",
"615 0.649658 0.548259 0.602617 \n",
"616 0.644592 0.579050 0.564567 \n",
"617 0.450583 0.577126 0.547663 \n",
"618 NaN 0.574824 0.611634 \n",
"619 0.000000 0.575919 NaN \n",
"620 NaN 0.547961 0.555082 \n",
"621 0.491702 0.574086 NaN \n",
"622 0.618419 0.565152 NaN \n",
"623 0.422598 0.538062 0.589025 \n",
"624 0.597731 0.482756 0.563948 \n",
"625 0.820567 0.551897 0.614297 \n",
"626 0.626239 0.569192 0.601440 \n",
"627 0.407960 0.575341 0.556210 \n",
"628 0.369492 0.558571 0.543619 \n",
"629 0.393474 0.636485 0.591303 \n",
"630 0.955881 0.582155 0.552566 \n",
"631 0.562319 0.554036 NaN \n",
"632 0.465608 0.576131 0.573285 \n",
"633 0.501090 0.590612 0.597956 \n",
"634 0.425577 0.502194 NaN \n",
"635 0.475655 0.560208 0.545443 \n",
"636 0.000000 0.549345 NaN \n",
"637 0.970785 0.476230 0.606129 \n",
"638 NaN 0.551826 0.611695 \n",
"639 0.638875 0.544753 NaN \n",
"640 0.765301 0.540236 0.554094 \n",
"641 0.610804 0.543056 NaN \n",
"642 0.000000 0.554808 NaN \n",
"\n",
" MEAN_BY_how_use_money MEAN_BY_FICO_telecom_grade \\\n",
"0 0.556030 0.535966 \n",
"1 0.548491 0.582148 \n",
"2 0.466287 0.539096 \n",
"3 0.534865 0.538689 \n",
"4 0.456890 0.572069 \n",
"5 0.639479 0.556825 \n",
"6 0.584413 0.581124 \n",
"7 0.564348 0.567462 \n",
"8 0.584258 0.536501 \n",
"9 0.494747 0.536497 \n",
"10 0.570159 0.559526 \n",
"11 0.670699 0.549569 \n",
"12 0.480000 0.566595 \n",
"13 0.578266 0.537784 \n",
"14 0.588612 0.566749 \n",
"15 0.551785 0.580684 \n",
"16 0.482620 0.536571 \n",
"17 0.546840 0.580085 \n",
"18 0.607441 0.574126 \n",
"19 0.581054 0.555595 \n",
"20 0.554225 0.581215 \n",
"21 0.599674 0.588519 \n",
"22 0.576969 0.410455 \n",
"23 0.558388 0.583921 \n",
"24 0.650682 0.574680 \n",
"25 0.569694 0.590968 \n",
"26 0.599586 0.548574 \n",
"27 0.595892 0.569365 \n",
"28 0.554384 0.565209 \n",
"29 0.547113 0.549539 \n",
".. ... ... \n",
"613 0.453893 0.568827 \n",
"614 0.191959 0.404393 \n",
"615 0.607272 0.588661 \n",
"616 0.474552 0.585677 \n",
"617 0.587791 0.552982 \n",
"618 0.598125 0.582344 \n",
"619 0.489592 0.579661 \n",
"620 0.574413 0.541090 \n",
"621 0.573237 0.545520 \n",
"622 0.588185 0.545734 \n",
"623 0.438469 0.588725 \n",
"624 0.559595 0.566472 \n",
"625 0.574307 0.583396 \n",
"626 0.463204 0.551428 \n",
"627 0.656053 0.435138 \n",
"628 0.560189 0.550537 \n",
"629 0.483776 0.560353 \n",
"630 0.572556 0.537578 \n",
"631 0.596780 0.587236 \n",
"632 0.580295 0.549315 \n",
"633 0.531187 0.549403 \n",
"634 0.471640 0.578967 \n",
"635 0.591031 0.548442 \n",
"636 0.564518 0.414137 \n",
"637 0.196598 0.555116 \n",
"638 0.582295 0.560410 \n",
"639 0.580982 0.551629 \n",
"640 0.571043 0.539960 \n",
"641 0.432869 0.549532 \n",
"642 0.566687 0.577856 \n",
"\n",
" MEAN_BY_FICO_retail_grade MEAN_BY_FICO_bank_grade \\\n",
"0 0.606974 0.543644 \n",
"1 0.446776 0.559864 \n",
"2 0.432630 0.562527 \n",
"3 0.611847 0.769204 \n",
"4 0.432686 0.429499 \n",
"5 0.588563 0.737572 \n",
"6 0.608334 0.562271 \n",
"7 0.627346 0.555130 \n",
"8 0.585919 0.539063 \n",
"9 0.404297 0.560831 \n",
"10 0.452262 0.435517 \n",
"11 0.410849 0.571156 \n",
"12 0.638426 0.535304 \n",
"13 0.589888 0.519568 \n",
"14 0.588757 0.552344 \n",
"15 0.414970 0.524187 \n",
"16 0.412741 0.546004 \n",
"17 0.620475 0.530691 \n",
"18 0.582103 0.739016 \n",
"19 0.405775 0.404795 \n",
"20 0.430140 0.387715 \n",
"21 0.623244 0.557579 \n",
"22 0.439073 0.389809 \n",
"23 0.600051 0.517963 \n",
"24 0.591002 0.547826 \n",
"25 0.595584 0.560051 \n",
"26 0.629289 0.566470 \n",
"27 0.638073 0.561568 \n",
"28 0.416839 0.550760 \n",
"29 0.421872 0.525814 \n",
".. ... ... \n",
"613 0.445912 0.527159 \n",
"614 0.427156 0.417121 \n",
"615 0.430991 0.524210 \n",
"616 0.580035 0.714946 \n",
"617 0.606649 0.531885 \n",
"618 0.641199 0.564118 \n",
"619 0.590426 0.531310 \n",
"620 0.607958 0.769137 \n",
"621 0.441048 0.418729 \n",
"622 0.444992 0.411203 \n",
"623 0.418612 0.407487 \n",
"624 0.417482 0.546066 \n",
"625 0.627400 0.553903 \n",
"626 0.619576 0.557485 \n",
"627 0.425251 0.419312 \n",
"628 0.620434 0.719316 \n",
"629 0.638526 0.533047 \n",
"630 0.438868 0.552459 \n",
"631 0.429805 0.526942 \n",
"632 0.621053 0.571288 \n",
"633 0.629409 0.533709 \n",
"634 0.582719 0.573971 \n",
"635 0.637050 0.524159 \n",
"636 0.291104 0.408630 \n",
"637 0.604278 0.570199 \n",
"638 0.633905 0.555364 \n",
"639 0.622126 0.547607 \n",
"640 0.582944 0.524065 \n",
"641 0.421781 0.561814 \n",
"642 0.640890 0.566148 \n",
"\n",
" MEAN_BY_FICO_money_grade \n",
"0 0.560367 \n",
"1 0.310969 \n",
"2 0.588560 \n",
"3 0.572985 \n",
"4 0.605833 \n",
"5 0.592148 \n",
"6 0.585503 \n",
"7 0.287170 \n",
"8 0.556564 \n",
"9 0.598067 \n",
"10 0.309912 \n",
"11 0.567746 \n",
"12 0.581685 \n",
"13 0.601833 \n",
"14 0.601179 \n",
"15 0.584980 \n",
"16 0.317705 \n",
"17 0.588687 \n",
"18 0.598383 \n",
"19 0.585047 \n",
"20 0.558890 \n",
"21 0.585748 \n",
"22 0.602730 \n",
"23 0.557051 \n",
"24 0.560444 \n",
"25 0.611709 \n",
"26 0.611868 \n",
"27 0.607779 \n",
"28 0.608390 \n",
"29 0.579806 \n",
".. ... \n",
"613 0.611500 \n",
"614 0.312969 \n",
"615 0.570576 \n",
"616 0.599790 \n",
"617 0.597014 \n",
"618 0.571125 \n",
"619 0.558183 \n",
"620 0.570468 \n",
"621 0.315301 \n",
"622 0.611549 \n",
"623 0.565994 \n",
"624 0.560571 \n",
"625 0.581571 \n",
"626 0.572047 \n",
"627 0.321988 \n",
"628 0.577551 \n",
"629 0.598957 \n",
"630 0.609035 \n",
"631 0.286885 \n",
"632 0.571411 \n",
"633 0.602249 \n",
"634 0.609699 \n",
"635 0.601434 \n",
"636 0.300661 \n",
"637 0.608236 \n",
"638 0.567880 \n",
"639 0.313568 \n",
"640 0.589000 \n",
"641 0.564976 \n",
"642 0.580042 \n",
"\n",
"[643 rows x 18 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"full_df[oneway_cat_aggr_cols]"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"full_cols=num_cols+oneway_cat_aggr_cols\n",
"\n",
"trainx=full_df[full_cols].fillna(-1).values\n",
"trainy=full_df[target_col].fillna(-1).values.reshape(len(full_df))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching.... \n",
"Fitting 10 folds for each of 10 candidates, totalling 100 fits\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.478448 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.477490 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.405651 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.438218 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.537931 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.477833 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.526601 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.446305 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.418719 - 0.0s\n",
"[CV] penalty=l1, C=0.0001 ............................................\n",
"[CV] ................... penalty=l1, C=0.0001, score=0.489286 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.713602 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.632184 - 0.0s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 2 jobs | elapsed: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 5 jobs | elapsed: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 8 jobs | elapsed: 0.0s\n",
"[Parallel(n_jobs=1)]: Done 13 jobs | elapsed: 0.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.617816 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.670498 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.524138 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.610837 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.511330 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.613793 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.663054 - 0.0s\n",
"[CV] penalty=l2, C=0.0001 ............................................\n",
"[CV] ................... penalty=l2, C=0.0001, score=0.647959 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.621648 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.484674 - 0.0s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 18 jobs | elapsed: 0.1s\n",
"[Parallel(n_jobs=1)]: Done 25 jobs | elapsed: 0.2s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.660441 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.636015 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.500493 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.595074 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.421675 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.460099 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.413300 - 0.0s\n",
"[CV] penalty=l1, C=0.0003 ............................................\n",
"[CV] ................... penalty=l1, C=0.0003, score=0.676531 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.721264 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.640805 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.603448 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.671456 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.523153 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.606897 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.508374 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.604926 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.670936 - 0.0s\n",
"[CV] penalty=l2, C=0.0003 ............................................\n",
"[CV] ................... penalty=l2, C=0.0003, score=0.639796 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.672414 - 0.0s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 32 jobs | elapsed: 0.3s\n",
"[Parallel(n_jobs=1)]: Done 41 jobs | elapsed: 0.5s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.590038 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.636015 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.649425 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.533005 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.624631 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.475862 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.566502 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.594089 - 0.0s\n",
"[CV] penalty=l1, C=0.001 .............................................\n",
"[CV] .................... penalty=l1, C=0.001, score=0.631633 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.715517 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.648467 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.595785 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.652299 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.542857 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.593103 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.509360 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.600985 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.673892 - 0.0s\n",
"[CV] penalty=l2, C=0.001 .............................................\n",
"[CV] .................... penalty=l2, C=0.001, score=0.635714 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.677203 - 0.0s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 50 jobs | elapsed: 0.8s\n",
"[Parallel(n_jobs=1)]: Done 61 jobs | elapsed: 1.1s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.605364 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.621648 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.651341 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.510345 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.630542 - 0.1s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.500493 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.600000 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.634483 - 0.0s\n",
"[CV] penalty=l1, C=0.003 .............................................\n",
"[CV] .................... penalty=l1, C=0.003, score=0.628571 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.708812 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.647510 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.593870 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.635057 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.543842 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.591133 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.507389 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.598030 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.677833 - 0.0s\n",
"[CV] penalty=l2, C=0.003 .............................................\n",
"[CV] .................... penalty=l2, C=0.003, score=0.635714 - 0.0s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.705939 - 0.0s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.636015 - 0.1s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.609195 - 0.0s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.664751 - 0.0s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.517241 - 0.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 72 jobs | elapsed: 1.8s\n",
"[Parallel(n_jobs=1)]: Done 85 jobs | elapsed: 2.8s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.607882 - 0.2s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.518227 - 0.0s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.603941 - 0.1s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.673892 - 0.1s\n",
"[CV] penalty=l1, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l1, C=0.01, score=0.643878 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.703065 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.652299 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.597701 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.615900 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.542857 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.588177 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.509360 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.601970 - 0.0s\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.667980 - 0.0s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 98 jobs | elapsed: 4.1s\n",
"[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed: 4.2s finished\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] penalty=l2, C=0.01 ..............................................\n",
"[CV] ..................... penalty=l2, C=0.01, score=0.644898 - 0.0s\n",
"Best score: 0.621\n",
"('Best parameters set:', {'penalty': 'l2', 'C': 0.0001})\n",
"0.620714894349 {'penalty': 'l2', 'C': 0.0001}\n"
]
}
],
"source": [
"from sklearn.linear_model import LogisticRegression,ElasticNet\n",
"print (\"Searching.... \")\n",
"param_grid = {'C': [0.0001,0.0003,0.001,0.003,0.01]\n",
" , 'penalty': ['l1','l2']\n",
" }\n",
"(best_score, best_params) = search_model(trainx\n",
" , trainy\n",
" , LogisticRegression()\n",
" , param_grid\n",
" , n_jobs=1\n",
" , cv=10) \n",
"\n",
"print best_score, best_params"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Searching.... \n",
"Fitting 10 folds for each of 4 candidates, totalling 40 fits\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.716475 - 8.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.773946 - 4.6s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 1 jobs | elapsed: 8.2s\n",
"[Parallel(n_jobs=1)]: Done 2 jobs | elapsed: 13.0s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.659004 - 4.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.772989 - 4.0s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.715271 - 4.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.753695 - 4.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.672906 - 5.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.708374 - 3.9s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 5 jobs | elapsed: 25.5s\n",
"[Parallel(n_jobs=1)]: Done 8 jobs | elapsed: 38.8s\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.739901 - 4.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=8, score=0.798980 - 4.6s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.711686 - 5.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.773946 - 5.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.653257 - 4.9s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.769157 - 8.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.714286 - 5.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.749754 - 5.9s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.677833 - 6.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.697537 - 5.1s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 13 jobs | elapsed: 1.1min\n",
"[Parallel(n_jobs=1)]: Done 18 jobs | elapsed: 1.6min\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.742857 - 5.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=8, score=0.794898 - 5.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.723180 - 4.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.772031 - 4.3s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.655172 - 4.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.783525 - 4.0s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.713300 - 4.1s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.763547 - 4.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.674877 - 7.3s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.708374 - 4.3s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.743842 - 4.7s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=3000, subsample=0.8, max_depth=9, score=0.795918 - 4.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.718391 - 5.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.772989 - 5.6s"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 25 jobs | elapsed: 2.1min\n",
"[Parallel(n_jobs=1)]: Done 32 jobs | elapsed: 2.7min\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.652299 - 5.4s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.780651 - 5.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.711330 - 5.3s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.756650 - 5.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.674877 - 5.0s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.699507 - 5.2s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.748768 - 8.5s\n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9 \n",
"[CV] colsample_bytree=0.8, learning_rate=0.01, min_child_weight=5, n_estimators=4000, subsample=0.8, max_depth=9, score=0.790816 - 5.5s\n",
"Best score: 0.733\n",
"('Best parameters set:', {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'min_child_weight': 5, 'n_estimators': 3000, 'subsample': 0.8, 'max_depth': 9})\n",
"0.733280088461 {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'min_child_weight': 5, 'n_estimators': 3000, 'subsample': 0.8, 'max_depth': 9}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[Parallel(n_jobs=1)]: Done 40 out of 40 | elapsed: 3.5min finished\n"
]
}
],
"source": [
"\n",
"print (\"Searching.... \")\n",
"param_grid = {'learning_rate': [0.01]\n",
" , 'max_depth': [8,9]\n",
" , 'n_estimators': [3000,4000]\n",
" , 'min_child_weight' : [5]\n",
" , 'subsample' : [0.8]\n",
" , 'colsample_bytree' : [0.8]\n",
" }\n",
"(best_score, best_params) = search_model(trainx\n",
" , trainy\n",
" , xgb.XGBClassifier(missing=-1, nthread=-1, gamma=0)\n",
" , param_grid\n",
" , n_jobs=1\n",
" , cv=10) \n",
"\n",
"print best_score, best_params"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Best score: 0.738\n",
"# ('Best parameters set:', {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'min_child_weight': 5, 'n_estimators': 2000, 'subsample': 0.8, 'max_depth': 9})\n",
"# 0.738063997698 {'colsample_bytree': 0.8, 'learning_rate': 0.01, 'min_child_weight': 5, 'n_estimators': 2000, 'subsample': 0.8, 'max_depth': 9}\n",
"# [Parallel(n_jobs=1)]: Done 40 out of 40 | elapsed: 1.8min finished\n",
"\n",
"# Best score: 0.714\n",
"# ('Best parameters set:', {'colsample_bytree': 0.8, 'learning_rate': 0.001, 'min_child_weight': 5, 'n_estimators': 2000, 'subsample': 0.8, 'max_depth': 9})\n",
"# 0.714271796561 {'colsample_bytree': 0.8, 'learning_rate': 0.001, 'min_child_weight': 5, 'n_estimators': 2000, 'subsample': 0.8, 'max_depth': 9}"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_importance(model,feature_names):\n",
" importance = model.get_fscore()\n",
" tuples = [(full_cols[int(k[1:])], importance[k]) for k in importance]\n",
" tuples = sorted(tuples, key=lambda x: x[1],reverse=True)\n",
" return pd.DataFrame(tuples, columns=['name','importance']) "
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"params = { \"objective\": \"binary:logistic\"\n",
" , \"eval_metric\": \"auc\"\n",
" , \"eta\": 0.01\n",
" , \"min_child_weight\": 5\n",
" , \"subsample\": 0.8\n",
" , \"colsample_bytree\": 0.8\n",
" , \"max_depth\": 9\n",
" }\n",
"\n",
"plst = list(params.items())\n",
"xgtrain = xgb.DMatrix(trainx, label=trainy, missing=-1)\n",
"model = xgb.train(params=plst\n",
" , dtrain=xgtrain\n",
" , num_boost_round=2000\n",
" )\n"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" name importance\n",
"0 MEAN_BY_institution 1212\n",
"1 MEAN_BY_age_at_application 942\n",
"2 MEAN_BY_routing_symbol 849\n",
"3 MEAN_BY_payment_frequency 795\n",
"4 MEAN_BY_address_zip 793\n",
"5 MEAN_BY_other_phone_type 751\n",
"6 raw_FICO_bank_card 637\n",
"7 MEAN_BY_how_use_money 630\n",
"8 MEAN_BY_FICO_money_grade 620\n",
"9 raw_FICO_retail 615\n",
"10 MEAN_BY_FICO_bank_grade 612\n",
"11 MEAN_BY_bank_account_direct_deposit 611\n",
"12 MEAN_BY_FICO_retail_grade 610\n",
"13 age_at_application 594\n",
"14 MEAN_BY_home_phone_type 534\n",
"15 MEAN_BY_State 520\n",
"16 MEAN_BY_payment_ach 510\n",
"17 MEAN_BY_residence_rent_or_own 497\n",
"18 MEAN_BY_email_provider 450\n",
"19 raw_l2c_score 419\n",
"20 MEAN_BY_status 412\n",
"21 monthly_pay_ratio 400\n",
"22 monthly_rent_amount 376\n",
"23 MEAN_BY_FICO_telecom_grade 337\n",
"24 day_of_application 302\n",
"25 raw_FICO_telecom 301\n",
"26 monthly_income_amount 278\n",
"27 raw_FICO_money 262\n",
"28 total_payment 237\n",
"29 total_payment_principal_ratio 211\n",
"30 payment_amount_approved 206\n",
"31 payment_approve_ratio 192\n",
"32 month_of_application 165\n",
"33 amount_requested 163\n",
"34 day_of_week 144\n",
"35 payment_amount 130\n",
"36 num_payments 121\n",
"37 bank_account_duration_ordered 116\n",
"38 residence_duration_ordered 71\n",
"39 loan_duration 45\n",
"40 request_approve_ratio 42\n",
"41 more_than_2_phones 18\n",
"42 FICO_retail_grade_ordered 10\n",
"43 FICO_money_grade_ordered 8\n",
"44 amount_approved 7\n",
"45 FICO_bank_grade_ordered 6\n",
"46 duration_approved 6\n",
"47 year_of_application 2\n"
]
}
],
"source": [
"print get_importance(model,full_cols)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.9"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment