Last active
September 28, 2023 17:27
-
-
Save grayskripko/0615aee30526cb6a0601638b02cf4b19 to your computer and use it in GitHub Desktop.
Power BI key influencers finder
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# you can use it in Power BI to build a table of insights that will help you not to miss important influencers | |
suppressWarnings(suppressPackageStartupMessages({ | |
library(tidyverse) | |
library(forcats) | |
library(ggplot2) | |
library(Hmisc)})) | |
custom_lump <- function(fct, min_size=nrow(tbl) / n_bins) { | |
kept_levels <- fct %>% | |
table() %>% | |
as.data.frame() %>% | |
setNames(c('Var1', 'Freq')) %>% | |
filter(Freq > min_size) %>% | |
pull(Var1) %>% | |
as.character() | |
fct_other(fct, keep = kept_levels, other_level = 'Other') | |
} | |
extraord_mtr <- list( | |
lin=function(gr, glob) { | |
abs(glob - gr) / ( | |
ifelse(gr < glob, glob, 1 - glob)) }, | |
rat=function(gr, glob) { | |
round(pmin(10, pmax(gr / glob, glob / gr)), 1) }) | |
#### edit here #### | |
n_bins <- 10 | |
use_interact <- F | |
tbl <- as_tibble(read_csv('../data/databel.csv')) %>% | |
rename(target = `Churn Label`) %>% | |
mutate(target = target == 'Yes') %>% | |
select(-c(`Churn Category`, `Churn Reason`)) %>% | |
mutate(target = if (mean(target, na.rm=T) > 0.5) | |
1 - target else target) | |
#### | |
tbl_bin <- tbl %>% | |
mutate(across(where(is.character), fct_inorder)) %>% | |
mutate(across(where(is.factor), custom_lump)) %>% | |
mutate(across(where(is.numeric) & -target, ~cut2(., g=n_bins))) %>% | |
select(where(~n_distinct(.) > 1)) | |
summ_plain <- tbl_bin %>% | |
pivot_longer(-target, names_to = 'column', values_to = 'bin') %>% | |
group_by(column, bin) %>% | |
summarise( | |
bin_mean = round(mean(target), 3), n_obs = n(), .groups='drop') | |
if (use_interact) { | |
summ_interact <- colnames(tbl_bin) %>% | |
setdiff('target') %>% | |
expand_grid(col1 = ., col2 = .) %>% | |
mutate(i = cur_group_id(), .by=col1) %>% | |
mutate(ii = cur_group_id(), .by=col2) %>% | |
filter(ii > i) %>% | |
select(-c(i, ii)) %>% | |
transpose() %>% | |
map_dfr(~{ | |
cols <- set_names(unlist(.), str_c('bin', c('', '2'))) | |
tbl_bin %>% | |
select(c(all_of(unlist(cols)), target)) %>% | |
group_by(bin, bin2) %>% | |
summarise( | |
bin_mean = round(mean(target), 3), | |
n_obs = n(), .groups='drop') %>% | |
mutate(column = cols['bin'], column2 = cols['bin2']) %>% | |
filter(n_obs > nrow(tbl) / n_bins / 2) | |
}) | |
} | |
global_mean <- mean(tbl$target) | |
insight <- summ_plain %>% | |
{ if (!use_interact) . else bind_rows(., summ_interact) %>% | |
select(column, bin, column2, bin2, everything()) } %>% | |
mutate(rate = extraord_mtr$rat(bin_mean, global_mean)) %>% | |
arrange(desc(rate)) %>% | |
filter(rate > 0.3) %>% | |
mutate(is_less = bin_mean < global_mean) %>% | |
slice_head(n=2, by=c(column, is_less)) %>% | |
slice_head(n=10, by=is_less) %>% | |
select(-is_less) %>% | |
arrange(desc(bin_mean)) | |
print(insight) | |
print(round(global_mean, 2)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment