Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
similar to yardstick::conf_mat() but can handle weights
library(dplyr)
#' Confusion Matrix With Observation Weights
#'
#' @param df dataframe
#' @param truth Column that represents 'truth'
#' @param extimate Columns that rrepresents class prediction
#' @param wt Column with observation weights.
#' @param scale_weights_one Whether observations in confusion matrix should equal number of observations.
#' @param dnn Character vector of dimnames for the table
#'
#' @return a confusion matrix
conf_mat_weighted <-
function(df,
truth,
estimate,
wt = NULL,
scale_weights_one = !rlang::quo_is_null(enquo(wt)),
dnn = c("Prediction", "Truth")) {
freq_df <- count(df, {{ truth }}, {{ estimate }}, wt = {{ wt }}, .drop = FALSE)
pred_vals <- unique(freq_df[[1]])
truth_vals <- unique(freq_df[[2]])
output <- matrix(freq_df$n,
nrow = length(pred_vals),
byrow = FALSE,
dimnames =
list(pred_vals, truth_vals) %>%
purrr::set_names(dnn)
)
if(scale_weights_one){
scale_factor <- summarise(df,
sum = sum({{ wt }}),
n = n(),
scale_factor = n / sum) %>%
pull(scale_factor)
output <- output * scale_factor
}
yardstick:::conf_mat.table(output)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment