This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df <- readRDS("df.rds") | |
source("mob.R") | |
source("grnnet.R") | |
# PRE-PROCESS THE DATA WITH MOB PACKAGE | |
bin_out <- batch_bin(df, 3) | |
bin_out$BinSum[order(-bin_out$BinSum$iv), ] | |
# var nbin unique miss min median max ks iv | |
# bureau_score 34 315 315 443 692.5 848 35.2651 0.8357 | |
# tot_rev_line 20 3617 477 0 10573.0 205395 26.8943 0.4442 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df <- readRDS("df.rds") | |
source("mob.R") | |
bin_out <- batch_bin(df, 3) | |
bin_out$BinSum[order(-bin_out$BinSum$iv), ] | |
# var nbin unique miss min median max ks iv | |
# bureau_score 34 315 315 443 692.5 848 35.2651 0.8357 | |
# tot_rev_line 20 3617 477 0 10573.0 205395 26.8943 0.4442 | |
# age_oldest_tr 25 460 216 1 137.0 588 20.3646 0.2714 | |
# tot_derog 7 29 213 0 0.0 32 20.0442 0.2599 | |
# ltv 17 145 1 0 100.0 176 16.8807 0.1911 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data(Boston, package = "MASS") | |
### CONSTRUCT THE UNWEIGHTED DATA.FRAME WITH DUPLICATES | |
df1 <- rbind(Boston[rep(seq(100), 5), ], Boston) | |
nrow(df1) | |
# 1006 | |
X1 <- scale(df1[, 1:13]) | |
Y1 <- df1[, 14] | |
N1 <- grnn.fit(X1, Y1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
arb_bin <- function(data, y, x) { | |
yname <- deparse(substitute(y)) | |
xname <- deparse(substitute(x)) | |
df1 <- subset(data, !is.na(data[[xname]]) & data[[yname]] %in% c(0, 1), select = c(xname, yname)) | |
df2 <- data.frame(y = df1[[yname]], x = df1[[xname]]) | |
spc <- cor(df2[, 2], df2[, 1], method = "spearman", use = "complete.obs") | |
mdl <- Rborist::Rborist(as.matrix(df2$x), df2$y, noValidate = T, nTree = 1, regMono = spc / abs(spc), | |
ctgCensus = "prob", minInfo = exp(-100), nSamp = nrow(df2) , withRepl = F) | |
df3 <- data.frame(y = df2$y, x = df2$x, yhat = predict(mdl, newdata = as.matrix(df2$x), ctgCensus = "prob")$yPred) | |
df4 <- Reduce(rbind, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df1 <- read.csv("/mnt/d/projects/data/credit_count.txt") | |
df2 <- df1[which(df1$CARDHLDR == 1), ] | |
ca_glm <- function(fml, data, family, nchunk) { | |
cls <- parallel::makeCluster(nchunk, type = "PSOCK") | |
df1 <- parallel::parLapplyLB(cls, parallel::clusterSplit(cls, seq(nrow(data))), | |
function(c_) data[c_,]) | |
parallel::clusterExport(cls, c("fml", "family", "data"), envir = environment()) | |
est <- parallel::parLapplyLB(cls, df1, | |
function(d_) cbind(coef(summary(glm(fml, data = d_, family = family)))[, 1:2], nrow(d_) / nrow(data))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
qtl_lgd(df, lgd, ltv) | |
#$df | |
# bin rule freq dist mv_cnt mean_y woe iv ks | |
# 1 01 $X <= 0.2442486803 320 0.1257 0 0.0948 -1.0370 0.0987 9.5173 | |
# 2 02 $X > 0.2442486803 & $X <= 0.3994659888 318 0.1250 0 0.0994 -0.9850 0.0900 18.6516 | |
# 3 03 $X > 0.3994659888 & $X <= 0.5314432946 318 0.1250 0 0.1265 -0.7135 0.0515 25.8646 | |
# 4 04 $X > 0.5314432946 & $X <= 0.6594855396 318 0.1250 0 0.1283 -0.6974 0.0494 32.9504 | |
# 5 05 $X > 0.6594855396 & $X <= 0.7917383883 318 0.1250 0 0.1769 -0.3182 0.0116 36.5819 | |
# 6 06 $X > 0.7917383883 & $X <= 0.9243704807 320 0.1257 0 0.2788 0.2683 0.0097 32.9670 | |
# 7 07 $X > 0.9243704807 & $X <= 1.0800711662 317 0.1246 0 0.4028 0.8251 0.1020 20.6104 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df <- readRDS("archive/accepts.rds") | |
head(df, 1) | |
# bankruptcy bad app_id tot_derog tot_tr age_oldest_tr tot_open_tr tot_rev_tr tot_rev_debt tot_rev_line rev_util bureau_score purch_price msrp | |
# 0 0 1001 6 7 46 NaN NaN NaN NaN 0 747 19678 17160 | |
# down_pyt purpose loan_term loan_amt ltv tot_income used_ind weight | |
# 947.15 LEASE 36 18730.85 109 4800 0 4.75 | |
### BY ITERATIVE PARTITION ### | |
source("wqtl_bin.R") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df <- readRDS("df.rds") | |
### SHOWING THE RESPONSE IN THE LAST COLUMN ### | |
head(df, 2) | |
#tot_derog tot_tr age_oldest_tr tot_open_tr tot_rev_tr tot_rev_debt tot_rev_line rev_util bureau_score ltv tot_income bad | |
# 6 7 46 NaN NaN NaN NaN 0 747 109 4800.00 0 | |
# 0 21 153 6 1 97 4637 2 744 97 5833.33 0 | |
source("mob.R") | |
bin_out <- batch_bin(df, 3) | |
bin_out$BinSum |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
wqtl_bin(cbind(df, w = ifelse(df$bad == 1, 1, 5)), bad, tot_derog, w) | |
#$df | |
# bin rule cnt freq dist mv_wt bad_freq bad_rate woe iv ks | |
#1 00 is.na($X) 213 785 0.0322 785 70 0.0892 0.6416 0.0178 2.7716 | |
#2 01 $X <= 1 3741 16465 0.6748 0 560 0.0340 -0.3811 0.0828 18.9469 | |
#3 02 $X > 1 & $X <= 2 478 1906 0.0781 0 121 0.0635 0.2740 0.0066 16.5222 | |
#4 03 $X > 2 & $X <= 4 587 2231 0.0914 0 176 0.0789 0.5078 0.0298 10.6623 | |
#5 04 $X > 4 818 3014 0.1235 0 269 0.0893 0.6426 0.0685 0.0000 | |
#$cuts | |
#[1] 1 2 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
derog_bin <- qtl_bin(df, bad, tot_derog) | |
derog_bin | |
#$df | |
# bin rule freq dist mv_cnt bad_freq bad_rate woe iv ks | |
# 00 is.na($X) 213 0.0365 213 70 0.3286 0.6416 0.0178 2.7716 | |
# 01 $X <= 1 3741 0.6409 0 560 0.1497 -0.3811 0.0828 18.9469 | |
# 02 $X > 1 & $X <= 2 478 0.0819 0 121 0.2531 0.2740 0.0066 16.5222 | |
# 03 $X > 2 & $X <= 4 587 0.1006 0 176 0.2998 0.5078 0.0298 10.6623 | |
# 04 $X > 4 818 0.1401 0 269 0.3289 0.6426 0.0685 0.0000 |