This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | body{ | |
| margin: 0; | |
| padding: 0; | |
| text-align: center; | |
| background: url(Home_Credit/hcdr.png); | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>Predict Loan Default Risk</title> | |
| </head> | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import pandas as pd | |
| import re | |
| import time | |
| import numpy as np | |
| import gc | |
| import lightgbm as lgb | |
| import math | |
| import pickle | |
| import os | |
| import os.path | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #saving the variable to hard drive using a pickle file | |
| with open('lgbm/lgbm_train_predict_500f.pkl','wb') as f: | |
| pickle.dump(train_predict, f) | |
| #loading the pickle file back to memory | |
| with open('lgbm/lgbm_train_predict_500f.pkl','rb') as f: | |
| train_predict = pickle.load(f) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | features_top_df_test['SK_ID_CURR'] = test_data['SK_ID_CURR'] | |
| features_top_df_test['TARGET'] = test_predict | |
| features_top_df_test['SK_ID_CURR'] = features_top_df_test['SK_ID_CURR'].apply(lambda x: np.int32(x)) | |
| features_top_df_test[['SK_ID_CURR', 'TARGET']].to_csv('hcdr_lgbm_500f_final.csv', index= False) | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | with open('lgbm/lgbm_train_predict_500f.pkl','rb') as f: | |
| train_predict = pickle.load(f) | |
| with open('lgbm/lgbm_cv_predict_500f.pkl','rb') as f: | |
| cv_predict = pickle.load(f) | |
| with open('lgbm/lgbm_test_predict_500f.pkl','rb') as f: | |
| test_predict = pickle.load(f) | |
| with open('lgbm/lgbm_best_threshold_500f_api.pkl','rb') as f: | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | start = datetime.now() | |
| train_fpr5, train_tpr5, tr_thresholds5 = roc_curve(y, train_predict) | |
| cv_fpr5,cv_tpr5, cv_thresholds5 = roc_curve(y, cv_predict) | |
| plt.plot(train_fpr5,train_tpr5, label ="Training Data AUC :" + str(auc(train_fpr5,train_tpr5))) | |
| plt.plot(cv_fpr5,cv_tpr5,label="CV Data AUC :" + str(auc(cv_fpr5,cv_tpr5))) | |
| plt.legend() | |
| plt.xlabel("FPR Values") | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | start = datetime.now() | |
| a=0 | |
| for i,(train, cv) in enumerate(f.split(train_df[feats],y)): | |
| X_train, Y_train = train_df[feats].iloc[train], y.iloc[train] | |
| X_valid, Y_valid = train_df[feats].iloc[cv], y.iloc[cv] | |
| lgb = LGBMClassifier( | |
| n_estimators=10837, \ | |
| bagging_fraction= 0.7327318230470493, \ | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | fpr_t, tpr_t, thresh = roc_curve(Y_train, train_predict[train]) | |
| best_stat = tpr_t - fpr_t | |
| best_thresh_index = np.argmax(best_stat) | |
| best_threshold_train += thresh[best_thresh_index]/5 | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | f = KFold(n_splits=5,shuffle=True,random_state=0) #K fold cross validation | |
| lgbm_df = pd.DataFrame() | |
| train_df = train_df.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x)) | |
| test_df = test_df.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x)) | |
| y = train_df['TARGET'] | |
| feats = [f for f in train_df.columns if f not in ['TARGET','SK_ID_CURR','SK_ID_BUREAU',\ | |
| 'SK_ID_PREV','index']] | |
| lgbm_df['feat']=feats | 
NewerOlder