Skip to content

Instantly share code, notes, and snippets.

@grayskripko
Last active September 28, 2023 17:27
Show Gist options
  • Save grayskripko/0615aee30526cb6a0601638b02cf4b19 to your computer and use it in GitHub Desktop.
Save grayskripko/0615aee30526cb6a0601638b02cf4b19 to your computer and use it in GitHub Desktop.
Power BI key influencers finder
# you can use it in Power BI to build a table of insights that will help you not to miss important influencers
suppressWarnings(suppressPackageStartupMessages({
library(tidyverse)
library(forcats)
library(ggplot2)
library(Hmisc)}))
custom_lump <- function(fct, min_size=nrow(tbl) / n_bins) {
kept_levels <- fct %>%
table() %>%
as.data.frame() %>%
setNames(c('Var1', 'Freq')) %>%
filter(Freq > min_size) %>%
pull(Var1) %>%
as.character()
fct_other(fct, keep = kept_levels, other_level = 'Other')
}
extraord_mtr <- list(
lin=function(gr, glob) {
abs(glob - gr) / (
ifelse(gr < glob, glob, 1 - glob)) },
rat=function(gr, glob) {
round(pmin(10, pmax(gr / glob, glob / gr)), 1) })
#### edit here ####
n_bins <- 10
use_interact <- F
tbl <- as_tibble(read_csv('../data/databel.csv')) %>%
rename(target = `Churn Label`) %>%
mutate(target = target == 'Yes') %>%
select(-c(`Churn Category`, `Churn Reason`)) %>%
mutate(target = if (mean(target, na.rm=T) > 0.5)
1 - target else target)
####
tbl_bin <- tbl %>%
mutate(across(where(is.character), fct_inorder)) %>%
mutate(across(where(is.factor), custom_lump)) %>%
mutate(across(where(is.numeric) & -target, ~cut2(., g=n_bins))) %>%
select(where(~n_distinct(.) > 1))
summ_plain <- tbl_bin %>%
pivot_longer(-target, names_to = 'column', values_to = 'bin') %>%
group_by(column, bin) %>%
summarise(
bin_mean = round(mean(target), 3), n_obs = n(), .groups='drop')
if (use_interact) {
summ_interact <- colnames(tbl_bin) %>%
setdiff('target') %>%
expand_grid(col1 = ., col2 = .) %>%
mutate(i = cur_group_id(), .by=col1) %>%
mutate(ii = cur_group_id(), .by=col2) %>%
filter(ii > i) %>%
select(-c(i, ii)) %>%
transpose() %>%
map_dfr(~{
cols <- set_names(unlist(.), str_c('bin', c('', '2')))
tbl_bin %>%
select(c(all_of(unlist(cols)), target)) %>%
group_by(bin, bin2) %>%
summarise(
bin_mean = round(mean(target), 3),
n_obs = n(), .groups='drop') %>%
mutate(column = cols['bin'], column2 = cols['bin2']) %>%
filter(n_obs > nrow(tbl) / n_bins / 2)
})
}
global_mean <- mean(tbl$target)
insight <- summ_plain %>%
{ if (!use_interact) . else bind_rows(., summ_interact) %>%
select(column, bin, column2, bin2, everything()) } %>%
mutate(rate = extraord_mtr$rat(bin_mean, global_mean)) %>%
arrange(desc(rate)) %>%
filter(rate > 0.3) %>%
mutate(is_less = bin_mean < global_mean) %>%
slice_head(n=2, by=c(column, is_less)) %>%
slice_head(n=10, by=is_less) %>%
select(-is_less) %>%
arrange(desc(bin_mean))
print(insight)
print(round(global_mean, 2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment