Skip to content

Instantly share code, notes, and snippets.

@martinctc
Last active March 7, 2021 11:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save martinctc/dba495390f4afbaa4a6e0c14b8bd09d6 to your computer and use it in GitHub Desktop.
Save martinctc/dba495390f4afbaa4a6e0c14b8bd09d6 to your computer and use it in GitHub Desktop.
[Create base version of Information Value calculations] #R
## Implement Information Value in base?
iris_iv <-
iris %>%
mutate(IsSetosa = ifelse(Species == "setosa", 1, 0))
## Information
infoTables <-
Information::create_infotables(
data = iris_iv,
y = "IsSetosa",
bins = 10,
parallel = FALSE
)
# WOE Table
infoTables$Tables$Sepal.Length
# Sepal.Length N Percent WOE IV
# 1 [4.3,4.7] 11 0.07333333 0.000000 0.0000000
# 2 [4.8,4.9] 11 0.07333333 2.197225 0.3515559
# 3 [5,5.1] 19 0.12666667 2.367124 1.0380218
# 4 [5.2,5.5] 18 0.12000000 1.145132 1.2097916
# 5 [5.6,5.7] 14 0.09333333 -1.098612 1.2976806
# 6 [5.8,6] 16 0.10666667 -2.014903 1.5596180
# 7 [6.1,6.2] 10 0.06666667 0.000000 1.5596180
# 8 [6.3,6.4] 16 0.10666667 0.000000 1.5596180
# 9 [6.5,6.8] 18 0.12000000 0.000000 1.5596180
# 10 [6.9,7.9] 17 0.11333333 0.000000 1.5596180
## Self implementation
compute_quantile <- function(variable, bins){
quantile(variable,
probs = c(1:(bins - 1) / bins),
na.rm = TRUE,
type = 3)
}
q <- compute_quantile(iris_iv$Sepal.Length, bins = 10)
cuts <- unique(q)
agg_var <- function(predictor, outcome, cuts){
cut_table <-
table(
findInterval(
predictor,
vec = cuts,
rightmost.closed = FALSE),
outcome) %>%
as.data.frame.matrix()
data.frame(
"N" = rowSums(cut_table),
"WOE" = log((cut_table$`1`*sum(cut_table$`0`))/
(cut_table$`0`*sum(cut_table$`1`)))
)
}
agg_var(predictor = iris_iv$Sepal.Length,
cuts = cuts,
outcome = iris_iv$IsSetosa)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment