Skip to content

Instantly share code, notes, and snippets.

@prise6
Created November 28, 2016 10:07
Show Gist options
  • Save prise6/7d8602867e33f021bed6a87052027ac2 to your computer and use it in GitHub Desktop.
Save prise6/7d8602867e33f021bed6a87052027ac2 to your computer and use it in GitHub Desktop.
Decision graph for two-class probabilities
decisionGraph = function(probs, outcome, seq = base::seq(1,0,-0.05), precision.x = 0){
qtle = quantile(probs, seq)
lift.base = mean(outcome)
values = lapply(qtle, function(p){
conf = table(probs >= p, outcome, deparse.level = 0)
if(nrow(conf) == 1){
if(rownames(conf) == "FALSE")
return(c(0, 0))
else
return(c(1, 1))
}
c(conf[2, 2]/sum(conf[2, 1:2])/lift.base, conf[2, 2]/sum(conf[1:2, 2]))
})
res.prob = data.table(
qtle = paste0(round(100*(1-seq), digits = precision.x), "%"),
cutoff = qtle,
values_lift = unlist(lapply(values, '[[', i = 1)),
values_freq = unlist(lapply(values, '[[', i = 2))
)[, values_freq_scale := values_freq*max(values_lift)]
ylimmax = ceiling(max(res.prob$values_lift))
old_par = par()$mar
par(mar=c(5, 4, 4, 5) + 0.1)
plot(res.prob$values_lift, xaxt = "n", ylim = c(0, ylimmax), type = "o", yaxt = "n", xlab = "Quantiles", ylab = "", col = "red", pch = 20, cex = .5)
lines(res.prob$values_freq_scale, type = "l", col = "blue", cex = 4)
liftby = ifelse(ylimmax < 6, .5, ifelse(ylimmax < 11, 1, 5))
axis(2, at = seq(0, ylimmax, by = liftby), las = 2, col = "red", col.axis = "red")
axis(4, at = seq(0, ylimmax, by = ylimmax/10), labels = paste0(seq(0, 100, by = 10), "%"), las = 2, col = "blue", col.axis = "blue")
axis(1, at = 1:nrow(res.prob), labels = res.prob$qtle)
axis(3, at = 1:nrow(res.prob), labels = round(res.prob$cutoff, digits = 2), col.axis = "orange", cex.axis = .55)
mtext("% Vrai positif", side = 4, line = 3, cex.lab = 1, las = 0, col = "blue")
mtext("Lift", side = 2, line = 3, cex.lab = 1, las = 0, col = "red")
mtext("Cutoff", side = 3, line = 3, cex.lab = 1, las = 0, col = "orange")
abline(h = 1:ylimmax, col = "red", lty = 1, lwd = .2)
abline(v = res.prob[cutoff %between% c(.4, .6), , which = T][1], col = "orange", lty = 3, lwd = .5)
abline(v = 1:nrow(res.prob), col = "orange", lty = 3, lwd = .5)
par(mar = old_par)
return(invisible(list(
lift.base,
table(probs>.5, outcome)
)))
}
@prise6
Copy link
Author

prise6 commented Nov 28, 2016

# data to process:
data 
data$outcome = ifelse(data$outcome == "Class1", 1, 0)
# ...
# model
# ...
probs = predict(model, type = "probs")[, 2]

# graph on 0:1
decisionGraph(probs, data$outcome)
# zoom and fix precision
decisionGraph(probs, data$outcome, seq(1, 0.8, -0.001), precision.x = 2)

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment