Last active
December 22, 2020 19:16
-
-
Save accessnash/b195fd53ddeac04484c803ab60c8aa3d to your computer and use it in GitHub Desktop.
Continuing from the PD model in the previous section to build the LGD and the EAD models and finally calculating the Expected loss for the loan portfolio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Sun Dec 20 17:20:52 2020 | |
@author: User | |
""" | |
#Part III - LGD & EAD Model | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
sns.set() | |
#Pre-processing the dataset | |
loan_data_preprocessed_backup = pd.read_csv('loan_data_2007_2014_preprocessed.csv') | |
loan_data_preprocessed = loan_data_preprocessed_backup.copy() | |
loan_data_defaults = loan_data_preprocessed[loan_data_preprocessed['loan_status'].isin(['Charged Off', 'Does not meet the credit policy. Status:Charged Off'])] | |
loan_data_defaults.isnull().sum() | |
loan_data_defaults['mths_since_last_delinq'].fillna(0, inplace = True) | |
loan_data_defaults['mths_since_last_record'].fillna(0, inplace = True) | |
loan_data_defaults['recovery_rate'] = loan_data_defaults['recoveries'] / loan_data_defaults['funded_amnt'] | |
loan_data_defaults['recovery_rate'].describe() | |
loan_data_defaults['recovery_rate'] = np.where(loan_data_defaults['recovery_rate'] > 1, 1, loan_data_defaults['recovery_rate']) | |
loan_data_defaults['recovery_rate'] = np.where(loan_data_defaults['recovery_rate'] < 0, 0, loan_data_defaults['recovery_rate']) | |
loan_data_defaults['CCF'] = (loan_data_defaults['funded_amnt'] - loan_data_defaults['total_rec_prncp']) / loan_data_defaults['funded_amnt'] | |
loan_data_defaults['CCF'].describe() | |
plt.hist(loan_data_defaults['recovery_rate'], bins = 50) | |
plt.hist(loan_data_defaults['CCF'], bins = 100) | |
loan_data_defaults['recovery_rate_0_1'] = np.where(loan_data_defaults['recovery_rate'] == 0, 0, 1) | |
# LGD Model | |
from sklearn.model_selection import train_test_split | |
lgd_inputs_stage_1_train, lgd_inputs_stage_1_test, lgd_targets_stage_1_train, lgd_targets_stage_1_test = train_test_split(loan_data_defaults.drop(['good_bad', 'recovery_rate', 'recovery_rate_0_1', 'CCF'], axis = 1), loan_data_defaults['recovery_rate_0_1'], test_size = 0.2, random_state = 42) | |
features_all = ['grade:A', | |
'grade:B', | |
'grade:C', | |
'grade:D', | |
'grade:E', | |
'grade:F', | |
'grade:G', | |
'home_ownership:MORTGAGE', | |
'home_ownership:NONE', | |
'home_ownership:OTHER', | |
'home_ownership:OWN', | |
'home_ownership:RENT', | |
'verification_status:Not Verified', | |
'verification_status:Source Verified', | |
'verification_status:Verified', | |
'purpose:car', | |
'purpose:credit_card', | |
'purpose:debt_consolidation', | |
'purpose:educational', | |
'purpose:home_improvement', | |
'purpose:house', | |
'purpose:major_purchase', | |
'purpose:medical', | |
'purpose:moving', | |
'purpose:other', | |
'purpose:renewable_energy', | |
'purpose:small_business', | |
'purpose:vacation', | |
'purpose:wedding', | |
'initial_list_status:f', | |
'initial_list_status:w', | |
'term_int', | |
'emp_length_int', | |
'mths_since_issue_d', | |
'mths_since_earliest_cr_line', | |
'funded_amnt', | |
'int_rate', | |
'installment', | |
'annual_inc', | |
'dti', | |
'delinq_2yrs', | |
'inq_last_6mths', | |
'mths_since_last_delinq', | |
'mths_since_last_record', | |
'open_acc', | |
'pub_rec', | |
'total_acc', | |
'acc_now_delinq', | |
'total_rev_hi_lim'] | |
features_reference_cat = ['grade:G', | |
'home_ownership:RENT', | |
'verification_status:Verified', | |
'purpose:credit_card', | |
'initial_list_status:f'] | |
lgd_inputs_stage_1_train = lgd_inputs_stage_1_train[features_all] | |
lgd_inputs_stage_1_train = lgd_inputs_stage_1_train.drop(features_reference_cat, axis = 1) | |
from sklearn import linear_model | |
import scipy.stats as stat | |
class LogisticRegression_with_p_values: | |
def __init__(self, *args, **kwargs): | |
self.model = linear_model.LogisticRegression(*args, **kwargs) | |
def fit(self, X, y): | |
self.model.fit(X, y) | |
denom = (2.0 *(1.0 + np.cosh(self.model.decision_function(X)))) | |
denom = np.tile(denom, (X.shape[1],1)).T | |
F_ij = np.dot((X / denom).T, X) | |
Cramer_Rao = np.linalg.inv(F_ij) | |
sigma_estimates = np.sqrt(np.diagonal(Cramer_Rao)) | |
z_scores = self.model.coef_[0] / sigma_estimates | |
p_values = [stat.norm.sf(abs(x)) * 2 for x in z_scores] | |
self.coef_ = self.model.coef_ | |
self.intercept_ = self.model.intercept_ | |
self.p_values = p_values | |
reg_lgd_st_1 = LogisticRegression_with_p_values() | |
reg_lgd_st_1.fit(lgd_inputs_stage_1_train, lgd_targets_stage_1_train) | |
feature_name = lgd_inputs_stage_1_train.columns.values | |
summary_table = pd.DataFrame(columns = ['Feature name'], data = feature_name) | |
summary_table['Coefficients'] = np.transpose(reg_lgd_st_1.coef_) | |
summary_table.index = summary_table.index + 1 | |
summary_table.loc[0] = ['Intercept', reg_lgd_st_1.intercept_[0]] | |
summary_table = summary_table.sort_index() | |
p_values = reg_lgd_st_1.p_values | |
p_values = np.append(np.nan, np.array(p_values)) | |
summary_table['p_values'] = p_values | |
summary_table | |
lgd_inputs_stage_1_test = lgd_inputs_stage_1_test[features_all] | |
lgd_inputs_stage_1_test = lgd_inputs_stage_1_test.drop(features_reference_cat, axis = 1) | |
y_hat_test_lgd_stage_1 = reg_lgd_st_1.model.predict(lgd_inputs_stage_1_test) | |
y_hat_test_proba_lgd_stage_1 = reg_lgd_st_1.model.predict_proba(lgd_inputs_stage_1_test) | |
y_hat_test_proba_lgd_stage_1 = y_hat_test_proba_lgd_stage_1[:][:, 1] | |
lgd_targets_stage_1_test_temp = lgd_targets_stage_1_test | |
lgd_targets_stage_1_test_temp.reset_index(drop = True, inplace = True) | |
df_actual_predicted_probs = pd.concat([lgd_targets_stage_1_test_temp, pd.DataFrame(y_hat_test_proba_lgd_stage_1)], axis = 1) | |
df_actual_predicted_probs.columns = ['lgd_targets_stage_1_test','y_hat_test_proba_lgd_stage_1'] | |
df_actual_predicted_probs.index = lgd_targets_stage_1_test.index | |
tr = 0.5 | |
df_actual_predicted_probs['y_hat_test_lgd_stage_1'] = np.where(df_actual_predicted_probs['y_hat_test_proba_lgd_stage_1'] > tr, 1, 0) | |
pd.crosstab(df_actual_predicted_probs['lgd_targets_stage_1_test'], df_actual_predicted_probs['y_hat_test_lgd_stage_1'], | |
rownames = ['Actual'], colnames = ['Predicted']) | |
pd.crosstab(df_actual_predicted_probs['lgd_targets_stage_1_test'], df_actual_predicted_probs['y_hat_test_lgd_stage_1'], | |
rownames = ['Actual'], colnames = ['Predicted'])/df_actual_predicted_probs.shape[0] | |
overall_accuracy = (pd.crosstab(df_actual_predicted_probs['lgd_targets_stage_1_test'], df_actual_predicted_probs['y_hat_test_lgd_stage_1'], | |
rownames = ['Actual'], colnames = ['Predicted'])/df_actual_predicted_probs.shape[0]).iloc[0, 0] + (pd.crosstab(df_actual_predicted_probs['lgd_targets_stage_1_test'], df_actual_predicted_probs['y_hat_test_lgd_stage_1'], | |
rownames = ['Actual'], colnames = ['Predicted'])/df_actual_predicted_probs.shape[0]).iloc[1, 1] | |
from sklearn.metrics import roc_curve, roc_auc_score | |
roc_curve(df_actual_predicted_probs['lgd_targets_stage_1_test'], df_actual_predicted_probs['y_hat_test_proba_lgd_stage_1']) | |
fpr, tpr, thresholds = roc_curve(df_actual_predicted_probs['lgd_targets_stage_1_test'], df_actual_predicted_probs['y_hat_test_proba_lgd_stage_1']) | |
plt.plot(fpr, tpr) | |
plt.plot(fpr, fpr, linestyle = '--', color = 'k') | |
plt.xlabel('False Positive Rate') | |
plt.ylabel('True Positive Rate') | |
plt.title('ROC Curve') | |
AUROC = roc_auc_score(df_actual_predicted_probs['lgd_targets_stage_1_test'], df_actual_predicted_probs['y_hat_test_proba_lgd_stage_1']) | |
import pickle | |
pickle.dump(reg_lgd_st_1, open('lgd_model_stage_1.sav', 'wb')) | |
# LGD - linear regression | |
lgd_stage_2_data = loan_data_defaults[loan_data_defaults['recovery_rate_0_1'] == 1] | |
lgd_inputs_stage_2_train, lgd_inputs_stage_2_test, lgd_targets_stage_2_train, lgd_targets_stage_2_test = train_test_split(lgd_stage_2_data.drop(['good_bad', 'recovery_rate', 'recovery_rate_0_1', 'CCF'], axis = 1), lgd_stage_2_data['recovery_rate'], test_size = 0.2, random_state = 42) | |
from sklearn import linear_model | |
from sklearn.metrics import mean_squared_error, r2_score | |
import scipy.stats as stat | |
class LinearRegression(linear_model.LinearRegression): | |
def __init__(self, fit_intercept=True, normalize=False, copy_X=True,n_jobs=1): | |
self.fit_intercept = fit_intercept | |
self.normalize = normalize | |
self.copy_X = copy_X | |
self.n_jobs = n_jobs | |
def fit(self, X, y, n_jobs=1): | |
self = super(LinearRegression, self).fit(X, y, n_jobs) | |
sse = np.sum((self.predict(X) - y) ** 2, axis=0) / float(X.shape[0] - X.shape[1]) | |
se = np.array([np.sqrt(np.diagonal(sse * np.linalg.inv(np.dot(X.T, X))))]) | |
self.t = self.coef_ / se | |
self.p = np.squeeze(2 * (1 - stat.t.cdf(np.abs(self.t), y.shape[0] - X.shape[1]))) | |
return self | |
lgd_inputs_stage_2_train = lgd_inputs_stage_2_train[features_all] | |
lgd_inputs_stage_2_train = lgd_inputs_stage_2_train.drop(features_reference_cat, axis = 1) | |
reg_lgd_st_2 = LinearRegression() | |
reg_lgd_st_2.fit(lgd_inputs_stage_2_train, lgd_targets_stage_2_train) | |
feature_name = lgd_inputs_stage_2_train.columns.values | |
summary_table = pd.DataFrame(columns = ['Feature name'], data = feature_name) | |
summary_table['Coefficients'] = np.transpose(reg_lgd_st_2.coef_) | |
summary_table.index = summary_table.index + 1 | |
summary_table.loc[0] = ['Intercept', reg_lgd_st_2.intercept_] | |
summary_table = summary_table.sort_index() | |
p_values = reg_lgd_st_2.p | |
p_values = np.append(np.nan, np.array(p_values)) | |
summary_table['p_values'] = p_values.round(3) | |
summary_table | |
lgd_inputs_stage_2_test = lgd_inputs_stage_2_test[features_all] | |
lgd_inputs_stage_2_test = lgd_inputs_stage_2_test.drop(features_reference_cat, axis = 1) | |
lgd_inputs_stage_2_test.columns.values | |
y_hat_test_lgd_stage_2 = reg_lgd_st_2.predict(lgd_inputs_stage_2_test) | |
lgd_targets_stage_2_test_temp = lgd_targets_stage_2_test | |
lgd_targets_stage_2_test_temp.reset_index(drop = True, inplace = True) | |
pd.concat([lgd_targets_stage_2_test_temp, pd.DataFrame(y_hat_test_lgd_stage_2)], axis = 1).corr() | |
sns.distplot(lgd_targets_stage_2_test - y_hat_test_lgd_stage_2) | |
pickle.dump(reg_lgd_st_2, open('lgd_model_stage_2.sav', 'wb')) | |
y_hat_test_lgd_stage_2_all = reg_lgd_st_2.predict(lgd_inputs_stage_1_test) | |
y_hat_test_lgd = y_hat_test_lgd_stage_1 * y_hat_test_lgd_stage_2_all | |
pd.DataFrame(y_hat_test_lgd).describe() | |
y_hat_test_lgd = np.where(y_hat_test_lgd < 0, 0, y_hat_test_lgd) | |
y_hat_test_lgd = np.where(y_hat_test_lgd > 1, 1, y_hat_test_lgd) # the above 2 steps are reqd. becuase linear regression can produce recovery rates values less than 0 or greater than 1 | |
# EAD Model | |
ead_inputs_train, ead_inputs_test, ead_targets_train, ead_targets_test = train_test_split(loan_data_defaults.drop(['good_bad', 'recovery_rate', 'recovery_rate_0_1', 'CCF'], axis = 1), loan_data_defaults['CCF'], test_size = 0.2, random_state = 42) | |
ead_inputs_train = ead_inputs_train[features_all] | |
ead_inputs_train = ead_inputs_train.drop(features_reference_cat, axis = 1) | |
reg_ead = LinearRegression() | |
reg_ead.fit(ead_inputs_train, ead_targets_train) | |
feature_name = ead_inputs_train.columns.values | |
summary_table = pd.DataFrame(columns = ['Feature name'], data = feature_name) | |
summary_table['Coefficients'] = np.transpose(reg_ead.coef_) | |
summary_table.index = summary_table.index + 1 | |
summary_table.loc[0] = ['Intercept', reg_ead.intercept_] | |
summary_table = summary_table.sort_index() | |
p_values = reg_ead.p | |
p_values = np.append(np.nan, np.array(p_values)) | |
summary_table['p_values'] = p_values.round(3) | |
summary_table | |
#Validating the EAD model | |
ead_inputs_test = ead_inputs_test[features_all] | |
ead_inputs_test = ead_inputs_test.drop(features_reference_cat, axis = 1) | |
y_hat_test_ead = reg_ead.predict(ead_inputs_test) | |
ead_targets_test_temp = ead_targets_test | |
ead_targets_test_temp.reset_index(drop = True, inplace = True) | |
pd.concat([ead_targets_test_temp, pd.DataFrame(y_hat_test_ead)], axis = 1).corr() | |
sns.distplot(ead_targets_test - y_hat_test_ead) | |
pd.DataFrame(y_hat_test_ead).describe() | |
y_hat_test_ead = np.where(y_hat_test_ead < 0, 0, y_hat_test_ead) | |
y_hat_test_ead = np.where(y_hat_test_ead > 1, 1, y_hat_test_ead) | |
# Expected Loss | |
loan_data_preprocessed.head() | |
loan_data_defaults['mths_since_last_delinq'].fillna(0, inplace = True) | |
loan_data_defaults['mths_since_last_record'].fillna(0, inplace = True) | |
loan_data_preprocessed_lgd_ead = loan_data_preprocessed[features_all] | |
loan_data_preprocessed_lgd_ead = loan_data_preprocessed_lgd_ead.drop(features_reference_cat, axis = 1) | |
loan_data_preprocessed['recovery_rate_st_1'] = reg_lgd_st_1.model.predict(loan_data_preprocessed_lgd_ead) | |
loan_data_preprocessed['recovery_rate_st_2'] = reg_lgd_st_2.predict(loan_data_preprocessed_lgd_ead) | |
loan_data_preprocessed['recovery_rate'] = loan_data_preprocessed['recovery_rate_st_1'] * loan_data_preprocessed['recovery_rate_st_2'] | |
loan_data_preprocessed['recovery_rate'] = np.where(loan_data_preprocessed['recovery_rate'] < 0, 0, loan_data_preprocessed['recovery_rate'] ) | |
loan_data_preprocessed['recovery_rate'] = np.where(loan_data_preprocessed['recovery_rate'] > 1, 1, loan_data_preprocessed['recovery_rate'] ) | |
loan_data_preprocessed['LGD'] = 1 - loan_data_preprocessed['recovery_rate'] | |
loan_data_preprocessed['LGD'].describe() | |
loan_data_preprocessed['CCF'] = reg_ead.predict(loan_data_preprocessed_lgd_ead) | |
loan_data_preprocessed['CCF'] = np.where(loan_data_preprocessed['CCF'] < 0, 0, loan_data_preprocessed['CCF'] ) | |
loan_data_preprocessed['CCF'] = np.where(loan_data_preprocessed['CCF'] > 1, 1, loan_data_preprocessed['CCF'] ) | |
loan_data_preprocessed['EAD'] = loan_data_preprocessed['CCF'] * loan_data_preprocessed_lgd_ead['funded_amnt'] | |
loan_data_preprocessed['EAD'].describe() | |
loan_data_inputs_train = pd.read_csv('loan_data_inputs_train.csv') | |
loan_data_inputs_test = pd.read_csv('loan_data_inputs_test.csv') | |
loan_data_inputs_pd = pd.concat([loan_data_inputs_train, loan_data_inputs_test], axis = 0) | |
loan_data_inputs_pd.shape | |
loan_data_inputs_pd = loan_data_inputs_pd.set_index('Unnamed: 0') | |
loan_data_inputs_pd.head() | |
features_all_pd = ['grade:A', | |
'grade:B', | |
'grade:C', | |
'grade:D', | |
'grade:E', | |
'grade:F', | |
'grade:G', | |
'home_ownership:RENT_OTHER_NONE_ANY', | |
'home_ownership:OWN', | |
'home_ownership:MORTGAGE', | |
'addr_state:ND_NE_IA_NV_FL_HI_AL', | |
'addr_state:NM_VA', | |
'addr_state:NY', | |
'addr_state:OK_TN_MO_LA_MD_NC', | |
'addr_state:CA', | |
'addr_state:UT_KY_AZ_NJ', | |
'addr_state:AR_MI_PA_OH_MN', | |
'addr_state:RI_MA_DE_SD_IN', | |
'addr_state:GA_WA_OR', | |
'addr_state:WI_MT', | |
'addr_state:TX', | |
'addr_state:IL_CT', | |
'addr_state:KS_SC_CO_VT_AK_MS', | |
'addr_state:WV_NH_WY_DC_ME_ID', | |
'verification_status:Not Verified', | |
'verification_status:Source Verified', | |
'verification_status:Verified', | |
'purpose:educ__sm_b__wedd__ren_en__mov__house', | |
'purpose:credit_card', | |
'purpose:debt_consolidation', | |
'purpose:oth__med__vacation', | |
'purpose:major_purch__car__home_impr', | |
'initial_list_status:f', | |
'initial_list_status:w', | |
'term:36', | |
'term:60', | |
'emp_length:0', | |
'emp_length:1', | |
'emp_length:2-4', | |
'emp_length:5-6', | |
'emp_length:7-9', | |
'emp_length:10', | |
'months_since_issue_d:<38', | |
'months_since_issue_d:38-39', | |
'months_since_issue_d:40-41', | |
'months_since_issue_d:42-48', | |
'months_since_issue_d:49-52', | |
'months_since_issue_d:53-64', | |
'months_since_issue_d:65-84', | |
'months_since_issue_d:>84', | |
'int_rate:<9.548', | |
'int_rate:9.548-12.025', | |
'int_rate:12.025-15.74', | |
'int_rate:15.74-20.281', | |
'int_rate:>20.281', | |
'months_since_earliest_cr_line:<140', | |
'months_since_earliest_cr_line:141-164', | |
'months_since_earliest_cr_line:165-247', | |
'months_since_earliest_cr_line:248-270', | |
'months_since_earliest_cr_line:271-352', | |
'months_since_earliest_cr_line:>352', | |
'delinq_2yrs:0', | |
'delinq_2yrs:1-3', | |
'delinq_2yrs:>=4', | |
'inq_last_6mths:0', | |
'inq_last_6mths:1-2', | |
'inq_last_6mths:3-6', | |
'inq_last_6mths:>6', | |
'open_acc:0', | |
'open_acc:1-3', | |
'open_acc:4-12', | |
'open_acc:13-17', | |
'open_acc:18-22', | |
'open_acc:23-25', | |
'open_acc:26-30', | |
'open_acc:>=31', | |
'pub_rec:0-2', | |
'pub_rec:3-4', | |
'pub_rec:>=5', | |
'total_acc:<=27', | |
'total_acc:28-51', | |
'total_acc:>=52', | |
'acc_now_delinq:0', | |
'acc_now_delinq:>=1', | |
'annual_inc:<20K', | |
'annual_inc:20K-30K', | |
'annual_inc:30K-40K', | |
'annual_inc:40K-50K', | |
'annual_inc:50K-60K', | |
'annual_inc:60K-70K', | |
'annual_inc:70K-80K', | |
'annual_inc:80K-90K', | |
'annual_inc:90K-100K', | |
'annual_inc:100K-120K', | |
'annual_inc:120K-140K', | |
'annual_inc:>140K', | |
'dti:<=1.4', | |
'dti:1.4-3.5', | |
'dti:3.5-7.7', | |
'dti:7.7-10.5', | |
'dti:10.5-16.1', | |
'dti:16.1-20.3', | |
'dti:20.3-21.7', | |
'dti:21.7-22.4', | |
'dti:22.4-35', | |
'dti:>35', | |
'mths_since_last_delinq:Missing', | |
'mths_since_last_delinq:0-3', | |
'mths_since_last_delinq:4-30', | |
'mths_since_last_delinq:31-56', | |
'mths_since_last_delinq:>=57', | |
'mths_since_last_record:Missing', | |
'mths_since_last_record:0-2', | |
'mths_since_last_record:3-20', | |
'mths_since_last_record:21-31', | |
'mths_since_last_record:32-80', | |
'mths_since_last_record:81-86', | |
'mths_since_last_record:>=86'] | |
ref_categories_pd = ['grade:G', | |
'home_ownership:RENT_OTHER_NONE_ANY', | |
'addr_state:ND_NE_IA_NV_FL_HI_AL', | |
'verification_status:Verified', | |
'purpose:educ__sm_b__wedd__ren_en__mov__house', | |
'initial_list_status:f', | |
'term:60', | |
'emp_length:0', | |
'months_since_issue_d:>84', | |
'int_rate:>20.281', | |
'months_since_earliest_cr_line:<140', | |
'delinq_2yrs:>=4', | |
'inq_last_6mths:>6', | |
'open_acc:0', | |
'pub_rec:0-2', | |
'total_acc:<=27', | |
'acc_now_delinq:0', | |
'annual_inc:<20K', | |
'dti:>35', | |
'mths_since_last_delinq:0-3', | |
'mths_since_last_record:0-2'] | |
loan_data_inputs_pd_temp = loan_data_inputs_pd[features_all_pd] | |
loan_data_inputs_pd_temp = loan_data_inputs_pd_temp.drop(ref_categories_pd, axis = 1) | |
loan_data_inputs_pd_temp.shape | |
import pickle | |
reg_pd = pickle.load(open('pd_model.sav', 'rb')) | |
reg_pd.model.predict_proba(loan_data_inputs_temp)[: ][:, 0] | |
loan_data_inputs_pd['PD'] = reg_pd.model.predict_proba(loan_data_inputs_temp)[: ][:, 0] | |
loan_data_inputs_pd['PD'].head() | |
loan_data_inputs_pd['PD'].describe() | |
loan_data_preprocessed_new = pd.concat([loan_data_preprocessed, loan_data_inputs_pd], axis = 1) | |
loan_data_preprocessed_new['EL'] = loan_data_preprocessed_new['PD'] * loan_data_preprocessed_new['LGD'] * loan_data_preprocessed_new['EAD'] | |
loan_data_preprocessed_new['EL'].describe() | |
loan_data_preprocessed_new[['funded_amnt', 'PD', 'LGD', 'EAD', 'EL']].head() | |
loan_data_preprocessed_new['EL'].sum() # total expected loss for the entire loan portfolio | |
loan_data_preprocessed_new['funded_amnt'].sum() | |
EL_proportion = loan_data_preprocessed_new['EL'].sum() / loan_data_preprocessed_new['funded_amnt'].sum() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment