Last active
March 27, 2019 01:37
-
-
Save statcompute/0effa02e85e945dae215af32191038d2 to your computer and use it in GitHub Desktop.
An example showing how to deploy the WoE transformation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
url <- "https://github.com/statcompute/MonotonicBinning/blob/master/data/accepts.rds?raw=true" | |
download.file(url, "df.rds", mode = "wb") | |
df <- readRDS("df.rds") | |
source("https://raw.githubusercontent.com/statcompute/MonotonicBinning/master/code/manual_bin.R") | |
source("https://raw.githubusercontent.com/statcompute/MonotonicBinning/master/code/mono_bin.R") | |
ltv_bin <- mono_bin(df, bad, ltv) | |
ltv_bin$df | |
# bin rule freq dist mv_cnt bad_freq bad_rate woe iv ks | |
# 1 01 $X <= 86 1108 0.1898 0 122 0.1101 -0.7337 0.0810 11.0448 | |
# 2 02 $X > 86 & $X <= 95 1081 0.1852 0 166 0.1536 -0.3510 0.0205 16.8807 | |
# 3 03 $X > 95 & $X <= 101 1102 0.1888 0 242 0.2196 0.0880 0.0015 15.1771 | |
# 4 04 $X > 101 & $X <= 106 743 0.1273 0 177 0.2382 0.1935 0.0050 12.5734 | |
# 5 05 $X > 106 & $X <= 115 935 0.1602 0 226 0.2417 0.2126 0.0077 8.9540 | |
# 6 06 $X > 115 | is.na($X) 868 0.1487 1 263 0.3030 0.5229 0.0468 0.0000 | |
source("https://raw.githubusercontent.com/statcompute/MonotonicBinning/master/code/calc_woe.R") | |
ltv_woe <- calc_woe(df[sample(seq(nrow(df)), 1000), ], ltv, ltv_bin$df) | |
ltv_woe$psi | |
# bin rule dist woe cal_freq cal_dist cal_woe psi | |
# 1 01 $X <= 86 0.1898 -0.7337 188 0.188 -0.7337 0e+00 | |
# 2 02 $X > 86 & $X <= 95 0.1852 -0.3510 179 0.179 -0.3510 2e-04 | |
# 3 03 $X > 95 & $X <= 101 0.1888 0.0880 192 0.192 0.0880 1e-04 | |
# 4 04 $X > 101 & $X <= 106 0.1273 0.1935 129 0.129 0.1935 0e+00 | |
# 5 05 $X > 106 & $X <= 115 0.1602 0.2126 167 0.167 0.2126 3e-04 | |
# 6 06 $X > 115 | is.na($X) 0.1487 0.5229 145 0.145 0.5229 1e-04 | |
head(ltv_woe$df[, c("ltv", "woe.ltv")]) | |
# ltv woe.ltv | |
# 2378 74 -0.7337 | |
# 1897 60 -0.7337 | |
# 2551 80 -0.7337 | |
# 2996 83 -0.7337 | |
# 1174 85 -0.7337 | |
# 2073 74 -0.7337 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment