Last active
March 7, 2021 11:51
-
-
Save martinctc/dba495390f4afbaa4a6e0c14b8bd09d6 to your computer and use it in GitHub Desktop.
[Create base version of Information Value calculations] #R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Implement Information Value in base? | |
iris_iv <- | |
iris %>% | |
mutate(IsSetosa = ifelse(Species == "setosa", 1, 0)) | |
## Information | |
infoTables <- | |
Information::create_infotables( | |
data = iris_iv, | |
y = "IsSetosa", | |
bins = 10, | |
parallel = FALSE | |
) | |
# WOE Table | |
infoTables$Tables$Sepal.Length | |
# Sepal.Length N Percent WOE IV | |
# 1 [4.3,4.7] 11 0.07333333 0.000000 0.0000000 | |
# 2 [4.8,4.9] 11 0.07333333 2.197225 0.3515559 | |
# 3 [5,5.1] 19 0.12666667 2.367124 1.0380218 | |
# 4 [5.2,5.5] 18 0.12000000 1.145132 1.2097916 | |
# 5 [5.6,5.7] 14 0.09333333 -1.098612 1.2976806 | |
# 6 [5.8,6] 16 0.10666667 -2.014903 1.5596180 | |
# 7 [6.1,6.2] 10 0.06666667 0.000000 1.5596180 | |
# 8 [6.3,6.4] 16 0.10666667 0.000000 1.5596180 | |
# 9 [6.5,6.8] 18 0.12000000 0.000000 1.5596180 | |
# 10 [6.9,7.9] 17 0.11333333 0.000000 1.5596180 | |
## Self implementation | |
compute_quantile <- function(variable, bins){ | |
quantile(variable, | |
probs = c(1:(bins - 1) / bins), | |
na.rm = TRUE, | |
type = 3) | |
} | |
q <- compute_quantile(iris_iv$Sepal.Length, bins = 10) | |
cuts <- unique(q) | |
agg_var <- function(predictor, outcome, cuts){ | |
cut_table <- | |
table( | |
findInterval( | |
predictor, | |
vec = cuts, | |
rightmost.closed = FALSE), | |
outcome) %>% | |
as.data.frame.matrix() | |
data.frame( | |
"N" = rowSums(cut_table), | |
"WOE" = log((cut_table$`1`*sum(cut_table$`0`))/ | |
(cut_table$`0`*sum(cut_table$`1`))) | |
) | |
} | |
agg_var(predictor = iris_iv$Sepal.Length, | |
cuts = cuts, | |
outcome = iris_iv$IsSetosa) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment