Created
May 12, 2021 12:14
-
-
Save FrankRuns/9b5208bc345dcc56460f2205c75bbaae to your computer and use it in GitHub Desktop.
Script to support scenario planning article
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# purpose: support article on scenario planning with monte carlo | |
# caution: this is a learning script (for me, and you) | |
# read packages | |
library(dplyr) | |
library(ggplot2) | |
library(tidyr) | |
library(EnvStats) | |
library(MASS) | |
#### Introduction #### | |
# create images for normal distribution | |
n_df <- data.frame(values = rnorm(10000, 14, 2)) | |
ggplot(n_df, aes(x=values)) + geom_histogram(fill='steelblue') + | |
labs(x='Transit', y='Count', title='Normal Distribution') + | |
theme_minimal() | |
# is this really a normal distribution? | |
# https://stats.stackexchange.com/questions/132652/how-to-determine-which-distribution-fits-my-data-best | |
fitdistr(n_df$values, "normal") | |
ks.test(n_df$values, "pnorm", mean=mean(n_df$values), sd=sd(n_df$values)) # p-value > 0.05? | |
# create images for left skew distirbution | |
l_df <- data.frame(values = rsnorm(10000, mean = 14, sd = 2, xi = 2.5)) | |
ggplot(l_df, aes(x=values)) + geom_histogram(fill='steelblue') + | |
labs(x='Transit', y='Count', title='Left Skewed Distribution') + | |
theme_minimal() | |
# is this really left skew distribution? | |
qqnorm(l_df$values) | |
qqline(l_df$values, col = 2) | |
#### Start with Super Slow, Easy to Understand Function #### | |
inner_sim <- function() { | |
# create initial empty dataset | |
output_data <- data.frame(volume=integer(), | |
start_date=as.Date(character()), | |
arrival_date=as.Date(character()), | |
full_transit=integer()) | |
# create your data | |
for (the_date in date_range) { | |
the_volume <- round(rnorm(1, 1000, 1), 0) | |
order_oport <- rsnorm(1, mean = 14, sd = 2, xi = 2.5) | |
oport_dport <- rsnorm(1, mean = 14, sd = 2, xi = 2.5) | |
dport_dwhse <- rsnorm(1, mean = 4, sd = 1, xi = 2.5) | |
the_full_transit <- round(order_oport + oport_dport + dport_dwhse) | |
the_arrival_date <- the_date + the_full_transit | |
new_data <- data.frame(volume=the_volume, | |
start_date=the_date, | |
arrival_date=the_arrival_date, | |
full_transit=the_full_transit) | |
output_data <- rbind(output_data, new_data) | |
} | |
date_cols <- c("start_date", "arrival_date") | |
output_data[date_cols] <- lapply(output_data[date_cols], | |
function(x) type.convert(as.Date(x, origin="1970-01-01"), as.is = TRUE)) | |
return(output_data) | |
} | |
# define a date range | |
# create it 10x because we assume 1 date range for each origin location (here that's 10 locations) | |
date_range <- rep(seq.Date(from=as.Date("2020-12-01"), | |
to=as.Date("2021-03-01"), by="day"), 10) | |
# MC consists of loops within loops - run a scenario, append it to the possibilities | |
test <- inner_sim() | |
head(test, 10) # this is one iterations of what might happen | |
# now, we'll want to do this hundreds of times to build a range of expectations | |
# BUT, before doing that, we'll want to build the inner function to calculate faster | |
# the above was to build an understanding of what's happening | |
#### Slightly Less Slow, Easy to Understand Function #### | |
# now, we rebuild the inner function, allowing for parameters instead of hardcoding | |
# plus, we add in the potential for delays | |
inner_sim <- function(date_range=c(), | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=10, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) { | |
output_data <- data.frame(volume=abs(round(rnorm(length(date_range), origin_daily_value_mean, origin_daily_value_sd), 0)), | |
start_date=date_range) | |
output_data$order_oport <- rsnorm(length(date_range), mean = int_means, sd = int_sds, xi = tail_values) | |
output_data$oport_dport <- rsnorm(length(date_range), mean = int_means, sd = int_sds, xi = tail_values) | |
output_data$dport_dwhse <- rsnorm(length(date_range), mean = dom_means, sd = dom_sds, xi = tail_values) | |
output_data$the_delay_seq <- ifelse(output_data$start_date %in% delay_stage_1, delays[1], | |
ifelse(output_data$start_date %in% delay_stage_2, delays[2], | |
ifelse(output_data$start_date %in% delay_stage_3, delays[3], | |
ifelse(output_data$start_date %in% delay_stage_4, delays[4], | |
ifelse(output_data$start_date %in% delay_stage_5, delays[5], 0))))) | |
output_data$full_transit <- round(output_data$order_oport + output_data$oport_dport + output_data$dport_dwhse + output_data$the_delay_seq) | |
output_data$arrival_date <- output_data$start_date + output_data$full_transit | |
return(output_data) | |
} | |
# create date range function | |
create_dates <- function(num_origin_ports) { | |
date_range <- rep(seq.Date(from=as.Date("2020-12-01"), | |
to=as.Date("2021-03-01"), by="day"), num_origin_ports) | |
return(date_range) | |
} | |
# finally, create the outer loop that will return all the possibilities it calculates for arrival volume | |
outer_sim <- function(iterations=10, | |
num_origin_ports=10, | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=10, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) { | |
date_range <- create_dates(num_origin_ports=num_origin_ports) | |
outer_data <- data.frame(arrival_date=as.Date(character()), | |
arrival_volume=integer()) | |
for (i in 1:iterations) { | |
inner_data <- inner_sim(date_range=date_range, | |
delays=delays, | |
origin_daily_value_mean=origin_daily_value_mean, | |
origin_daily_value_sd=origin_daily_value_sd, | |
int_means=int_means, dom_means=dom_means, | |
int_sds=int_sds, dom_sds=dom_sds, | |
tail_values=tail_values) | |
inner_temp <- inner_data %>% mutate(arrival_date = as.character(arrival_date)) %>% | |
group_by(arrival_date) %>% | |
summarise(arrival_volume=sum(volume), .groups = 'drop') %>% | |
mutate(arrival_date = as.Date(arrival_date)) %>% | |
complete(arrival_date = seq.Date(min(arrival_date), max(arrival_date), by="day")) %>% | |
replace_na(list(arrival_volume = 0)) | |
outer_data <- rbind(outer_data, inner_temp) | |
if(i %% 50 == 0) {print(i)} | |
} | |
return(outer_data) | |
} | |
# define weeks that will have delay pattern (if any) | |
delay_stage_1 <- seq.Date(from=as.Date("2021-01-01"), to=as.Date("2021-01-07"), by="day") | |
delay_stage_2 <- seq.Date(from=as.Date("2021-01-08"), to=as.Date("2021-01-14"), by="day") | |
delay_stage_3 <- seq.Date(from=as.Date("2021-01-15"), to=as.Date("2021-01-21"), by="day") | |
delay_stage_4 <- seq.Date(from=as.Date("2021-01-22"), to=as.Date("2021-01-28"), by="day") | |
delay_stage_5 <- seq.Date(from=as.Date("2021-01-29"), to=as.Date("2021-02-04"), by="day") | |
# run a test simulation and visualize the output | |
outer_data <- outer_sim(iterations=100, | |
num_origin_ports=10, | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=10, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) | |
# typically, you'll want to chop off the beginning and end of the output | |
start_date_filter <- date_range[1] + quantile(inner_data$full_transit, 0.8) | |
end_date_filter <- date_range[length(unique(date_range))] + quantile(inner_data$full_transit, 0.2) | |
outer_data <- outer_data %>% filter(arrival_date >= start_date_filter & arrival_date <= end_date_filter) | |
# visualize the results with boxplots over time | |
ggplot(outer_data,aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + | |
geom_boxplot() + | |
theme_minimal() + | |
ylim(0, 2250) | |
# what is the probability of receiving >=1250 units? | |
1 - ecdf(outer_data$arrival_volume)(1250) # 18% | |
# what is the probability of receiving >=1500 units? | |
1 - ecdf(outer_data$arrival_volume)(1500) # 5% | |
# what is the probability of receiving >=750 units? | |
1 - ecdf(outer_data$arrival_volume)(750) # 79% | |
#### Scenarios #### | |
# Scenario: No Variance | |
novariance_data <- outer_sim(iterations=100, | |
num_origin_ports=10, | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=0.01, | |
int_means=14, dom_means=4, | |
int_sds=0.01, dom_sds=0.01, | |
tail_values=2.5) | |
# filter date range | |
novariance_data <- novariance_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24")) | |
# visualize | |
ggplot(novariance_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() + | |
theme_minimal() + ylim(0, 2250) + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="No Variance") | |
# prob of arrival volume of >=1250 | |
1 - ecdf(novariance_data$arrival_volume)(1250) | |
# Scenario: Only Transit Variance | |
onlytransit_variance_data <- outer_sim(iterations=100, | |
num_origin_ports=10, | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=0.01, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) | |
# filter date range | |
onlytransit_variance_data <- onlytransit_variance_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24")) | |
# visualize | |
ggplot(onlytransit_variance_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() + | |
theme_minimal() + ylim(0, 2250) + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Only Transit Variance") | |
# prob of arrival volume of >=1250 | |
1 - ecdf(onlytransit_variance_data$arrival_volume)(1250) | |
# visualize distribution of outcomes | |
ggplot(onlytransit_variance_data, aes(x=arrival_volume)) + geom_histogram(bins=26) + | |
theme_minimal() + | |
geom_histogram(data=onlytransit_variance_data %>% filter(arrival_volume >= 1250), bins=26, | |
aes(x=arrival_volume, fill="orange")) + | |
labs(x="Unit Arrival Volume", y="Frequency of Occurance", title="Only Transit Variance - Distribution of Outcomes") | |
# Scenario: Baseline - Both Transit and Daily Shipment Variance | |
baseline_data <- outer_sim(iterations=100, | |
num_origin_ports=10, | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=30, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) | |
# filter date range | |
baseline_data <- baseline_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24")) | |
# visualize | |
ggplot(baseline_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() + | |
theme_minimal() + ylim(0, 2250) + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Baseline") | |
# prob of arrival volume of >=1250 | |
1 - ecdf(baseline_data$arrival_volume)(1250) | |
# Scenario: Single Instance | |
single_data <- outer_sim(iterations=1, | |
num_origin_ports=10, | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=30, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) | |
# filter date range | |
single_data <- single_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24")) | |
# visualize | |
ggplot(single_data, aes(x=arrival_date, y=arrival_volume, group=1)) + | |
geom_line() + | |
theme_minimal() + ylim(0, 2250) + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Single Instance") | |
# prob of arrival volume of >=1250 | |
1 - ecdf(baseline_data$arrival_volume)(1250) | |
# Scenario: Single Instance | |
bigvariance_data <- outer_sim(iterations=100, | |
num_origin_ports=10, | |
delays=c(0,0,0,0,0), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=30, | |
int_means=14, dom_means=4, | |
int_sds=3, dom_sds=3, | |
tail_values=6.5) | |
# filter date range | |
bigvariance_data <- bigvariance_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24")) | |
# visualize | |
ggplot(bigvariance_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() + | |
theme_minimal() + ylim(0, 2250) + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Big Variance") | |
# prob of arrival volume of >=1250 | |
1 - ecdf(bigvariance_data$arrival_volume)(1250) | |
# Scenario: Small Delay | |
smalldelay_data <- outer_sim(iterations=100, | |
num_origin_ports=10, | |
delays=c(1,3,5,3,1), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=30, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) | |
# filter date range | |
smalldelay_data <- smalldelay_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24")) | |
# visualize | |
ggplot(smalldelay_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() + | |
theme_minimal() + ylim(0, 2250) + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Small Delay") | |
# prob of arrival volume of >=1250 | |
1 - ecdf(smalldelay_data$arrival_volume)(1250) | |
# prob of arrival volume of >=1250 (beginning of March) | |
temp <- smalldelay_data %>% filter(arrival_date > as.Date("2021-03-01") & arrival_date < as.Date("2021-03-15")) | |
1 - ecdf(temp$arrival_volume)(1250) | |
# Scenario: Big Delay | |
bigdelay_data <- outer_sim(iterations=100, | |
num_origin_ports=10, | |
delays=c(10,8,6,4,2), | |
origin_daily_value_mean=100, | |
origin_daily_value_sd=10, | |
int_means=14, dom_means=4, | |
int_sds=1, dom_sds=1, | |
tail_values=2.5) | |
# filter date range | |
bigdelay_data <- bigdelay_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24")) | |
# visualize | |
ggplot(bigdelay_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() + | |
theme_minimal() + ylim(0, 2250) + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Big Bang Delay") | |
# prob of arrival volume of >=1250 | |
1 - ecdf(bigdelay_data$arrival_volume)(1250) | |
#### Scenario Planning #### | |
# define which percentile you want to visualize | |
pctile <- 0.5 | |
novariance_plot <- novariance_data %>% group_by(arrival_date) %>% | |
summarise(novariance_volume=quantile(arrival_volume, pctile), .groups = 'drop') | |
onlytransit_variance_plot <- onlytransit_variance_data %>% group_by(arrival_date) %>% | |
summarise(onlytransit_variance_volume=quantile(arrival_volume, pctile), .groups = 'drop') | |
baseline_plot <- baseline_data %>% group_by(arrival_date) %>% | |
summarise(baseline_volume=quantile(arrival_volume, pctile), .groups = 'drop') | |
bigvariance_plot <- bigvariance_data %>% group_by(arrival_date) %>% | |
summarise(bigvariance_volume=quantile(arrival_volume, pctile), .groups = 'drop') | |
smalldelay_plot <- smalldelay_data %>% group_by(arrival_date) %>% | |
summarise(smalldelay_volume=quantile(arrival_volume, pctile), .groups = 'drop') | |
bigdelay_plot <- bigdelay_data %>% group_by(arrival_date) %>% | |
summarise(bigdelay_volume=quantile(arrival_volume, pctile), .groups = 'drop') | |
# combine all scenarios into single dataframe | |
all_data <- left_join(novariance_plot, onlytransit_variance_plot, by="arrival_date") %>% | |
left_join(baseline_plot, by="arrival_date") %>% | |
left_join(bigvariance_plot, by="arrival_date") %>% | |
left_join(smalldelay_plot, by="arrival_date") %>% | |
left_join(bigdelay_plot, by="arrival_date") | |
melt_all <- melt(all_data, id.vars="arrival_date") | |
names(melt_all) <- c("arrival_date", "Scenario", "value") | |
# visualize together | |
ggplot(melt_all, aes(x=arrival_date, y=value, group=Scenario, color=Scenario)) + | |
geom_line() + | |
theme_minimal() + | |
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Comparting Scenarios (80th Percentile)") | |
# volumes from different scenarios on single day | |
all_data %>% filter(arrival_date == as.Date("2021-03-01")) %>% head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment