Created
October 3, 2022 18:38
-
-
Save mrecos/057926133a8df2b789b5f1cf7ce14ad2 to your computer and use it in GitHub Desktop.
Comparing models and MODEL RANK
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(tidyverse) | |
library(glue) | |
library(DataRobotColors) | |
pred <- read.csv("DATA/Predictions/[MDH]_DRPrimaryV3_10pcnt_SF_20221003_eXtreme_Gradient_Boosted_Trees_Classifier_with_Ear_(57)_79.93_IF_-_MDH_Reduced_v2_holdout.csv") | |
pred_rank <- pred %>% | |
group_by(TRAVELERID) %>% | |
arrange(TRAVELERID, desc(Cross.Validation.Prediction)) %>% | |
mutate(pred_order = 1:n()) | |
top20_pred <- pred_rank %>% | |
filter(BOOKED == 1, pred_order <= 20) | |
top20_MODEL_RANK <- pred_rank %>% | |
filter(MODEL_RANK > 0) %>% | |
filter(BOOKED == 1, MODEL_RANK <= 20) | |
all_trav_pred <- length(unique(pred$TRAVELERID)) | |
all_trav_RANK <- length(unique(filter(pred, MODEL_RANK > 0)$TRAVELERID)) | |
top20_pred_trav <- nrow(top20_pred) | |
top20_RANK_trav <- nrow(top20_MODEL_RANK) | |
all_booked <- sum(pred$BOOKED) | |
pcnt_top_20_pred <- round((top20_pred_trav / all_booked) * 100,2) | |
pcnt_top_20_RANK <- round((top20_RANK_trav / all_booked) * 100,2) | |
p_order_pred <- data.frame(table(top20_pred$BOOKED, top20_pred$pred_order)) %>% | |
filter(Var1 == 1) %>% | |
rename(Booked = Var1, Pred_order = Var2) %>% | |
mutate(pcnt_of_all_booked = (Freq / all_booked)*100, | |
model = "DR Prediction") | |
p_order_RANK <- data.frame(table(top20_MODEL_RANK$BOOKED, top20_MODEL_RANK$MODEL_RANK)) %>% | |
filter(Var1 == 1) %>% | |
rename(Booked = Var1, Pred_order = Var2) %>% | |
mutate(pcnt_of_all_booked = (Freq / all_booked)*100, | |
model = "MODEL RANK") | |
pred_compare <- rbind(p_order_pred, p_order_RANK) | |
ggplot(pred_compare, aes(x = Pred_order, y = pcnt_of_all_booked, fill = model)) + | |
scale_fill_DataRobot(palette = "DR_Diverging") + | |
geom_bar(stat = "identity") + | |
labs(x = "Top 20 Recommendations (Predicted)", | |
y = "% of All Booked O/T Pairs", | |
title = "What percent of 'Booked' orders appear in the top 20 predictions", | |
subtitle = "Project: [MDH] DRPrimaryV3_10pcnt SF 20221003, Model: 112, Blueprint: 57", | |
caption = glue("{pcnt_top_20_pred}% of 'Booked' O/T pairs are in top 20 DataRobot recommended Orders\n{pcnt_top_20_RANK}% of 'Booked' O/T pairs are in top 20 MODEL RANK recommended Orders")) + | |
facet_wrap(~model) + | |
theme_bw() + | |
theme( | |
legend.position = "none" | |
) | |
pred_rank %>% | |
ungroup() %>% | |
filter(MODEL_RANK > 0) %>% | |
select(MODEL_RANK, pred_order) %>% | |
na.omit() %>% | |
sample_frac(0.25) %>% | |
filter(pred_order <= 100 & MODEL_RANK <= 100) %>% | |
ggplot(aes(y = MODEL_RANK, x = pred_order)) + | |
geom_density_2d() + | |
# geom_density_2d_filled(bins=30) + | |
coord_equal() + | |
labs(x = "DR Prediction Order", y = "MODEL RANK") + | |
geom_point(alpha = 0.05) + | |
scale_x_reverse(limits = c(100,1), | |
labels=c(1,seq(10,100,10)), | |
breaks=c(1,seq(10,100,10)), | |
expand=c(0,0)) + | |
scale_y_reverse(limits = c(100,1), | |
labels=c(1,seq(10,100,10)), | |
breaks=c(1,seq(10,100,10)), | |
expand=c(0,0)) + | |
# geom_smooth() + | |
theme_bw() + | |
theme( | |
legend.position = "none" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment