Skip to content

Instantly share code, notes, and snippets.

@statcompute
Last active September 16, 2019 00:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save statcompute/b6b6f5e3f8003baf2ab72a356c8ff316 to your computer and use it in GitHub Desktop.
Save statcompute/b6b6f5e3f8003baf2ab72a356c8ff316 to your computer and use it in GitHub Desktop.
df <- readRDS("df.rds")
source("mob.R")
source("grnnet.R")
# PRE-PROCESS THE DATA WITH MOB PACKAGE
bin_out <- batch_bin(df, 3)
bin_out$BinSum[order(-bin_out$BinSum$iv), ]
# var nbin unique miss min median max ks iv
# bureau_score 34 315 315 443 692.5 848 35.2651 0.8357
# tot_rev_line 20 3617 477 0 10573.0 205395 26.8943 0.4442
# age_oldest_tr 25 460 216 1 137.0 588 20.3646 0.2714
# tot_derog 7 29 213 0 0.0 32 20.0442 0.2599
# ltv 17 145 1 0 100.0 176 16.8807 0.1911
# rev_util 12 101 0 0 30.0 100 16.9615 0.1635
# tot_tr 15 67 213 0 16.0 77 17.3002 0.1425
# tot_rev_debt 8 3880 477 0 3009.5 96260 8.8722 0.0847
# tot_rev_tr 4 21 636 0 3.0 24 9.0779 0.0789
# tot_income 17 1639 5 0 3400.0 8147167 10.3386 0.0775
# tot_open_tr 7 26 1416 0 5.0 26 6.8695 0.0282
# PERFORMAN WOE TRANSFORMATIONS
df_woe <- batch_woe(df, bin_out$BinLst)
# PROCESS AND STANDARDIZE THE DATA WITH ZERO MEAN AND UNITY VARIANCE
Y <- df$bad
X <- scale(df_woe$df[, -1])
Reduce(rbind, Map(function(c) data.frame(var = colnames(X)[c], mean = mean(X[, c]), variance = var(X[, c])), seq(dim(X)[2])))
# var mean variance
#1 woe.tot_derog 2.234331e-16 1
#2 woe.tot_tr -2.439238e-15 1
#3 woe.age_oldest_tr -2.502177e-15 1
#4 woe.tot_open_tr -2.088444e-16 1
#5 woe.tot_rev_tr -4.930136e-15 1
#6 woe.tot_rev_debt -2.174607e-16 1
#7 woe.tot_rev_line -8.589630e-16 1
#8 woe.rev_util -8.649849e-15 1
#9 woe.bureau_score 1.439904e-15 1
#10 woe.ltv 3.723332e-15 1
#11 woe.tot_income 5.559240e-16 1
# INITIATE A GRNN OBJECT
net1 <- grnn.fit(x = X, y = Y)
# CROSS-VALIDATION TO CHOOSE THE OPTIONAL SMOOTH PARAMETER
S <- gen_sobol(min = 0.5, max = 1.5, n = 10, seed = 2019)
cv <- grnn.cv_auc(net = net1, sigmas = S, nfolds = 5)
# $test
# sigma auc
#1 1.4066449 0.7543912
#2 0.6205723 0.7303415
#3 1.0710133 0.7553075
#4 0.6764866 0.7378430
#5 1.1322939 0.7553664
#6 0.8402438 0.7507192
#7 1.3590402 0.7546164
#8 1.3031974 0.7548670
#9 0.7555905 0.7455457
#10 1.2174429 0.7552097
# $best
# sigma auc
#5 1.132294 0.7553664
# REFIT A GRNN WITH THE OPTIMAL PARAMETER VALUE
net2 <- grnn.fit(x = X, y = Y, sigma = cv$best$sigma)
net2.pred <- grnn.parpred(net2, X)
# BENCHMARK MODEL PERFORMANCE
MLmetrics::KS_Stat(y_pred = net2.pred, y_true = df$bad)
# 44.00242
MLmetrics::AUC(y_pred = net2.pred, y_true = df$bad)
# 0.7895033
# LOGISTIC REGRESSION PERFORMANCE
MLmetrics::KS_Stat(y_pred = fitted(mdl2), y_true = df$bad)
# 42.61731
MLmetrics::AUC(y_pred = fitted(mdl2), y_true = df$bad)
# 0.7751298
# MARGINAL EFFECT OF EACH ATTRIBUTE
par(mfrow = c(3, 4))
lapply(1:11, function(i) grnn.margin(net2, i))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment