Skip to content

Instantly share code, notes, and snippets.

@selva86
Created October 5, 2017 07:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save selva86/133f36b7f82a3cdcc9f6a96dc555d1d3 to your computer and use it in GitHub Desktop.
Save selva86/133f36b7f82a3cdcc9f6a96dc555d1d3 to your computer and use it in GitHub Desktop.
Reproducible example for ks_plot
library(InformationValue)
library(ggplot2)
# 1. Import dataset
trainData <- read.csv('https://raw.githubusercontent.com/selva86/datasets/master/breastcancer_training.csv')
testData <- read.csv('https://raw.githubusercontent.com/selva86/datasets/master/breastcancer_test.csv')
# 2. Build Logistic Model
logitmod <- glm(Class ~ Cl.thickness + Cell.size + Cell.shape, family = "binomial", data=trainData)
# 3. Predict on testData
pred <- predict(logitmod, newdata = testData, type = "response")
# 4. If p > .5, then Class is 1 else 0
y_pred <- ifelse(pred > 0.5, 1, 0)
y_act <- testData$Class
# 5. Accuracy
mean(y_pred == y_act) # 94%
ks_table <- InformationValue:::ks_table
ks_plot <- function (actuals, predictedScores) {
rank <- 0:10
ks_table_out <- ks_table(actuals = actuals, predictedScores = predictedScores)
perc_positive <- c(0, ks_table_out$cum_perc_responders) * 100
perc_negative <- c(0, ks_table_out$cum_perc_non_responders) * 100
random_prediction <- seq(0, 100, 10)
df <- data.frame(rank, random_prediction, perc_positive, perc_negative)
df_stack <- stack(df, c(random_prediction, perc_positive, perc_negative))
df_stack$rank <- rep(rank, 3)
df_stack$delta <- df_stack$values[12:22] - df_stack$values[1:11]
values <- df_stack$values
ind <- df_stack$ind
rowmax <- which.max(ks_table_out$difference)
l_start <- ks_table_out[rowmax, "cum_perc_non_responders"]
l_end <- ks_table_out[rowmax, "cum_perc_responders"]
print(ggplot2::ggplot(df_stack, aes(x = rank, y = values,
colour = ind, label = paste0(round(values, 2), "%"))) +
geom_line(size = 1.25) +
labs(x = "rank", y = "Percentage +Ve & -Ve Captured",
title = "KS Chart", subtitle=paste("KS Statistic: ", ks_stat(actuals, predictedScores))) +
theme(plot.title = element_text(size = 20,
face = "bold")) +
geom_text(aes(y = values + 4)) +
scale_x_continuous(breaks=0:10, labels=0:10) +
geom_segment(x = rowmax, y = l_start*100, xend = rowmax, yend = l_end*100, col="red", arrow = arrow(length = unit(0.05, "npc"), ends="both"), linetype = "dashed", lwd=1))
}
ks_plot(y_act, y_pred)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment