Last active
September 16, 2019 00:10
-
-
Save statcompute/b6b6f5e3f8003baf2ab72a356c8ff316 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df <- readRDS("df.rds") | |
source("mob.R") | |
source("grnnet.R") | |
# PRE-PROCESS THE DATA WITH MOB PACKAGE | |
bin_out <- batch_bin(df, 3) | |
bin_out$BinSum[order(-bin_out$BinSum$iv), ] | |
# var nbin unique miss min median max ks iv | |
# bureau_score 34 315 315 443 692.5 848 35.2651 0.8357 | |
# tot_rev_line 20 3617 477 0 10573.0 205395 26.8943 0.4442 | |
# age_oldest_tr 25 460 216 1 137.0 588 20.3646 0.2714 | |
# tot_derog 7 29 213 0 0.0 32 20.0442 0.2599 | |
# ltv 17 145 1 0 100.0 176 16.8807 0.1911 | |
# rev_util 12 101 0 0 30.0 100 16.9615 0.1635 | |
# tot_tr 15 67 213 0 16.0 77 17.3002 0.1425 | |
# tot_rev_debt 8 3880 477 0 3009.5 96260 8.8722 0.0847 | |
# tot_rev_tr 4 21 636 0 3.0 24 9.0779 0.0789 | |
# tot_income 17 1639 5 0 3400.0 8147167 10.3386 0.0775 | |
# tot_open_tr 7 26 1416 0 5.0 26 6.8695 0.0282 | |
# PERFORMAN WOE TRANSFORMATIONS | |
df_woe <- batch_woe(df, bin_out$BinLst) | |
# PROCESS AND STANDARDIZE THE DATA WITH ZERO MEAN AND UNITY VARIANCE | |
Y <- df$bad | |
X <- scale(df_woe$df[, -1]) | |
Reduce(rbind, Map(function(c) data.frame(var = colnames(X)[c], mean = mean(X[, c]), variance = var(X[, c])), seq(dim(X)[2]))) | |
# var mean variance | |
#1 woe.tot_derog 2.234331e-16 1 | |
#2 woe.tot_tr -2.439238e-15 1 | |
#3 woe.age_oldest_tr -2.502177e-15 1 | |
#4 woe.tot_open_tr -2.088444e-16 1 | |
#5 woe.tot_rev_tr -4.930136e-15 1 | |
#6 woe.tot_rev_debt -2.174607e-16 1 | |
#7 woe.tot_rev_line -8.589630e-16 1 | |
#8 woe.rev_util -8.649849e-15 1 | |
#9 woe.bureau_score 1.439904e-15 1 | |
#10 woe.ltv 3.723332e-15 1 | |
#11 woe.tot_income 5.559240e-16 1 | |
# INITIATE A GRNN OBJECT | |
net1 <- grnn.fit(x = X, y = Y) | |
# CROSS-VALIDATION TO CHOOSE THE OPTIONAL SMOOTH PARAMETER | |
S <- gen_sobol(min = 0.5, max = 1.5, n = 10, seed = 2019) | |
cv <- grnn.cv_auc(net = net1, sigmas = S, nfolds = 5) | |
# $test | |
# sigma auc | |
#1 1.4066449 0.7543912 | |
#2 0.6205723 0.7303415 | |
#3 1.0710133 0.7553075 | |
#4 0.6764866 0.7378430 | |
#5 1.1322939 0.7553664 | |
#6 0.8402438 0.7507192 | |
#7 1.3590402 0.7546164 | |
#8 1.3031974 0.7548670 | |
#9 0.7555905 0.7455457 | |
#10 1.2174429 0.7552097 | |
# $best | |
# sigma auc | |
#5 1.132294 0.7553664 | |
# REFIT A GRNN WITH THE OPTIMAL PARAMETER VALUE | |
net2 <- grnn.fit(x = X, y = Y, sigma = cv$best$sigma) | |
net2.pred <- grnn.parpred(net2, X) | |
# BENCHMARK MODEL PERFORMANCE | |
MLmetrics::KS_Stat(y_pred = net2.pred, y_true = df$bad) | |
# 44.00242 | |
MLmetrics::AUC(y_pred = net2.pred, y_true = df$bad) | |
# 0.7895033 | |
# LOGISTIC REGRESSION PERFORMANCE | |
MLmetrics::KS_Stat(y_pred = fitted(mdl2), y_true = df$bad) | |
# 42.61731 | |
MLmetrics::AUC(y_pred = fitted(mdl2), y_true = df$bad) | |
# 0.7751298 | |
# MARGINAL EFFECT OF EACH ATTRIBUTE | |
par(mfrow = c(3, 4)) | |
lapply(1:11, function(i) grnn.margin(net2, i)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment