brshallo/conf_mat_weighted.R

## conf_mat_weighted.R
library(dplyr)

#' Confusion Matrix With Observation Weights
#'
#' @param df dataframe
#' @param truth Column that represents 'truth'
#' @param extimate Columns that rrepresents class prediction
#' @param wt Column with observation weights.
#' @param scale_weights_one Whether observations in confusion matrix should equal number of observations.
#' @param dnn Character vector of dimnames for the table
#'
#' @return a confusion matrix
conf_mat_weighted <-
  function(df,
           truth,
           estimate,
           wt = NULL,
           scale_weights_one = !rlang::quo_is_null(enquo(wt)),
           dnn = c("Prediction", "Truth")) {

  freq_df <- count(df, {{ truth }}, {{ estimate }}, wt = {{ wt }}, .drop = FALSE)

  pred_vals <- unique(freq_df[[1]])
  truth_vals <- unique(freq_df[[2]])

  output <- matrix(freq_df$n,
         nrow = length(pred_vals),
         byrow = FALSE,
         dimnames =
           list(pred_vals, truth_vals) %>%
           purrr::set_names(dnn)
  )

  if(scale_weights_one){

    scale_factor <- summarise(df,
                              sum = sum({{ wt }}),
                              n = n(),
                              scale_factor = n / sum) %>%
      pull(scale_factor)

    output <- output * scale_factor
  }

  yardstick:::conf_mat.table(output)
}
	library(dplyr)

	#' Confusion Matrix With Observation Weights
	#'
	#' @param df dataframe
	#' @param truth Column that represents 'truth'
	#' @param extimate Columns that rrepresents class prediction
	#' @param wt Column with observation weights.
	#' @param scale_weights_one Whether observations in confusion matrix should equal number of observations.
	#' @param dnn Character vector of dimnames for the table
	#'
	#' @return a confusion matrix
	conf_mat_weighted <-
	function(df,
	truth,
	estimate,
	wt = NULL,
	scale_weights_one = !rlang::quo_is_null(enquo(wt)),
	dnn = c("Prediction", "Truth")) {

	freq_df <- count(df, {{ truth }}, {{ estimate }}, wt = {{ wt }}, .drop = FALSE)

	pred_vals <- unique(freq_df[[1]])
	truth_vals <- unique(freq_df[[2]])

	output <- matrix(freq_df$n,
	nrow = length(pred_vals),
	byrow = FALSE,
	dimnames =
	list(pred_vals, truth_vals) %>%
	purrr::set_names(dnn)
	)

	if(scale_weights_one){

	scale_factor <- summarise(df,
	sum = sum({{ wt }}),
	n = n(),
	scale_factor = n / sum) %>%
	pull(scale_factor)

	output <- output * scale_factor
	}

	yardstick:::conf_mat.table(output)
	}