Skip to content

Instantly share code, notes, and snippets.

@FrankRuns
Created May 12, 2021 12:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FrankRuns/9b5208bc345dcc56460f2205c75bbaae to your computer and use it in GitHub Desktop.
Save FrankRuns/9b5208bc345dcc56460f2205c75bbaae to your computer and use it in GitHub Desktop.
Script to support scenario planning article
# purpose: support article on scenario planning with monte carlo
# caution: this is a learning script (for me, and you)
# read packages
library(dplyr)
library(ggplot2)
library(tidyr)
library(EnvStats)
library(MASS)
#### Introduction ####
# create images for normal distribution
n_df <- data.frame(values = rnorm(10000, 14, 2))
ggplot(n_df, aes(x=values)) + geom_histogram(fill='steelblue') +
labs(x='Transit', y='Count', title='Normal Distribution') +
theme_minimal()
# is this really a normal distribution?
# https://stats.stackexchange.com/questions/132652/how-to-determine-which-distribution-fits-my-data-best
fitdistr(n_df$values, "normal")
ks.test(n_df$values, "pnorm", mean=mean(n_df$values), sd=sd(n_df$values)) # p-value > 0.05?
# create images for left skew distirbution
l_df <- data.frame(values = rsnorm(10000, mean = 14, sd = 2, xi = 2.5))
ggplot(l_df, aes(x=values)) + geom_histogram(fill='steelblue') +
labs(x='Transit', y='Count', title='Left Skewed Distribution') +
theme_minimal()
# is this really left skew distribution?
qqnorm(l_df$values)
qqline(l_df$values, col = 2)
#### Start with Super Slow, Easy to Understand Function ####
inner_sim <- function() {
# create initial empty dataset
output_data <- data.frame(volume=integer(),
start_date=as.Date(character()),
arrival_date=as.Date(character()),
full_transit=integer())
# create your data
for (the_date in date_range) {
the_volume <- round(rnorm(1, 1000, 1), 0)
order_oport <- rsnorm(1, mean = 14, sd = 2, xi = 2.5)
oport_dport <- rsnorm(1, mean = 14, sd = 2, xi = 2.5)
dport_dwhse <- rsnorm(1, mean = 4, sd = 1, xi = 2.5)
the_full_transit <- round(order_oport + oport_dport + dport_dwhse)
the_arrival_date <- the_date + the_full_transit
new_data <- data.frame(volume=the_volume,
start_date=the_date,
arrival_date=the_arrival_date,
full_transit=the_full_transit)
output_data <- rbind(output_data, new_data)
}
date_cols <- c("start_date", "arrival_date")
output_data[date_cols] <- lapply(output_data[date_cols],
function(x) type.convert(as.Date(x, origin="1970-01-01"), as.is = TRUE))
return(output_data)
}
# define a date range
# create it 10x because we assume 1 date range for each origin location (here that's 10 locations)
date_range <- rep(seq.Date(from=as.Date("2020-12-01"),
to=as.Date("2021-03-01"), by="day"), 10)
# MC consists of loops within loops - run a scenario, append it to the possibilities
test <- inner_sim()
head(test, 10) # this is one iterations of what might happen
# now, we'll want to do this hundreds of times to build a range of expectations
# BUT, before doing that, we'll want to build the inner function to calculate faster
# the above was to build an understanding of what's happening
#### Slightly Less Slow, Easy to Understand Function ####
# now, we rebuild the inner function, allowing for parameters instead of hardcoding
# plus, we add in the potential for delays
inner_sim <- function(date_range=c(),
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=10,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5) {
output_data <- data.frame(volume=abs(round(rnorm(length(date_range), origin_daily_value_mean, origin_daily_value_sd), 0)),
start_date=date_range)
output_data$order_oport <- rsnorm(length(date_range), mean = int_means, sd = int_sds, xi = tail_values)
output_data$oport_dport <- rsnorm(length(date_range), mean = int_means, sd = int_sds, xi = tail_values)
output_data$dport_dwhse <- rsnorm(length(date_range), mean = dom_means, sd = dom_sds, xi = tail_values)
output_data$the_delay_seq <- ifelse(output_data$start_date %in% delay_stage_1, delays[1],
ifelse(output_data$start_date %in% delay_stage_2, delays[2],
ifelse(output_data$start_date %in% delay_stage_3, delays[3],
ifelse(output_data$start_date %in% delay_stage_4, delays[4],
ifelse(output_data$start_date %in% delay_stage_5, delays[5], 0)))))
output_data$full_transit <- round(output_data$order_oport + output_data$oport_dport + output_data$dport_dwhse + output_data$the_delay_seq)
output_data$arrival_date <- output_data$start_date + output_data$full_transit
return(output_data)
}
# create date range function
create_dates <- function(num_origin_ports) {
date_range <- rep(seq.Date(from=as.Date("2020-12-01"),
to=as.Date("2021-03-01"), by="day"), num_origin_ports)
return(date_range)
}
# finally, create the outer loop that will return all the possibilities it calculates for arrival volume
outer_sim <- function(iterations=10,
num_origin_ports=10,
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=10,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5) {
date_range <- create_dates(num_origin_ports=num_origin_ports)
outer_data <- data.frame(arrival_date=as.Date(character()),
arrival_volume=integer())
for (i in 1:iterations) {
inner_data <- inner_sim(date_range=date_range,
delays=delays,
origin_daily_value_mean=origin_daily_value_mean,
origin_daily_value_sd=origin_daily_value_sd,
int_means=int_means, dom_means=dom_means,
int_sds=int_sds, dom_sds=dom_sds,
tail_values=tail_values)
inner_temp <- inner_data %>% mutate(arrival_date = as.character(arrival_date)) %>%
group_by(arrival_date) %>%
summarise(arrival_volume=sum(volume), .groups = 'drop') %>%
mutate(arrival_date = as.Date(arrival_date)) %>%
complete(arrival_date = seq.Date(min(arrival_date), max(arrival_date), by="day")) %>%
replace_na(list(arrival_volume = 0))
outer_data <- rbind(outer_data, inner_temp)
if(i %% 50 == 0) {print(i)}
}
return(outer_data)
}
# define weeks that will have delay pattern (if any)
delay_stage_1 <- seq.Date(from=as.Date("2021-01-01"), to=as.Date("2021-01-07"), by="day")
delay_stage_2 <- seq.Date(from=as.Date("2021-01-08"), to=as.Date("2021-01-14"), by="day")
delay_stage_3 <- seq.Date(from=as.Date("2021-01-15"), to=as.Date("2021-01-21"), by="day")
delay_stage_4 <- seq.Date(from=as.Date("2021-01-22"), to=as.Date("2021-01-28"), by="day")
delay_stage_5 <- seq.Date(from=as.Date("2021-01-29"), to=as.Date("2021-02-04"), by="day")
# run a test simulation and visualize the output
outer_data <- outer_sim(iterations=100,
num_origin_ports=10,
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=10,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5)
# typically, you'll want to chop off the beginning and end of the output
start_date_filter <- date_range[1] + quantile(inner_data$full_transit, 0.8)
end_date_filter <- date_range[length(unique(date_range))] + quantile(inner_data$full_transit, 0.2)
outer_data <- outer_data %>% filter(arrival_date >= start_date_filter & arrival_date <= end_date_filter)
# visualize the results with boxplots over time
ggplot(outer_data,aes(x=arrival_date, y=arrival_volume, group=arrival_date)) +
geom_boxplot() +
theme_minimal() +
ylim(0, 2250)
# what is the probability of receiving >=1250 units?
1 - ecdf(outer_data$arrival_volume)(1250) # 18%
# what is the probability of receiving >=1500 units?
1 - ecdf(outer_data$arrival_volume)(1500) # 5%
# what is the probability of receiving >=750 units?
1 - ecdf(outer_data$arrival_volume)(750) # 79%
#### Scenarios ####
# Scenario: No Variance
novariance_data <- outer_sim(iterations=100,
num_origin_ports=10,
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=0.01,
int_means=14, dom_means=4,
int_sds=0.01, dom_sds=0.01,
tail_values=2.5)
# filter date range
novariance_data <- novariance_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24"))
# visualize
ggplot(novariance_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() +
theme_minimal() + ylim(0, 2250) +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="No Variance")
# prob of arrival volume of >=1250
1 - ecdf(novariance_data$arrival_volume)(1250)
# Scenario: Only Transit Variance
onlytransit_variance_data <- outer_sim(iterations=100,
num_origin_ports=10,
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=0.01,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5)
# filter date range
onlytransit_variance_data <- onlytransit_variance_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24"))
# visualize
ggplot(onlytransit_variance_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() +
theme_minimal() + ylim(0, 2250) +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Only Transit Variance")
# prob of arrival volume of >=1250
1 - ecdf(onlytransit_variance_data$arrival_volume)(1250)
# visualize distribution of outcomes
ggplot(onlytransit_variance_data, aes(x=arrival_volume)) + geom_histogram(bins=26) +
theme_minimal() +
geom_histogram(data=onlytransit_variance_data %>% filter(arrival_volume >= 1250), bins=26,
aes(x=arrival_volume, fill="orange")) +
labs(x="Unit Arrival Volume", y="Frequency of Occurance", title="Only Transit Variance - Distribution of Outcomes")
# Scenario: Baseline - Both Transit and Daily Shipment Variance
baseline_data <- outer_sim(iterations=100,
num_origin_ports=10,
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=30,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5)
# filter date range
baseline_data <- baseline_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24"))
# visualize
ggplot(baseline_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() +
theme_minimal() + ylim(0, 2250) +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Baseline")
# prob of arrival volume of >=1250
1 - ecdf(baseline_data$arrival_volume)(1250)
# Scenario: Single Instance
single_data <- outer_sim(iterations=1,
num_origin_ports=10,
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=30,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5)
# filter date range
single_data <- single_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24"))
# visualize
ggplot(single_data, aes(x=arrival_date, y=arrival_volume, group=1)) +
geom_line() +
theme_minimal() + ylim(0, 2250) +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Single Instance")
# prob of arrival volume of >=1250
1 - ecdf(baseline_data$arrival_volume)(1250)
# Scenario: Single Instance
bigvariance_data <- outer_sim(iterations=100,
num_origin_ports=10,
delays=c(0,0,0,0,0),
origin_daily_value_mean=100,
origin_daily_value_sd=30,
int_means=14, dom_means=4,
int_sds=3, dom_sds=3,
tail_values=6.5)
# filter date range
bigvariance_data <- bigvariance_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24"))
# visualize
ggplot(bigvariance_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() +
theme_minimal() + ylim(0, 2250) +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Big Variance")
# prob of arrival volume of >=1250
1 - ecdf(bigvariance_data$arrival_volume)(1250)
# Scenario: Small Delay
smalldelay_data <- outer_sim(iterations=100,
num_origin_ports=10,
delays=c(1,3,5,3,1),
origin_daily_value_mean=100,
origin_daily_value_sd=30,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5)
# filter date range
smalldelay_data <- smalldelay_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24"))
# visualize
ggplot(smalldelay_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() +
theme_minimal() + ylim(0, 2250) +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Small Delay")
# prob of arrival volume of >=1250
1 - ecdf(smalldelay_data$arrival_volume)(1250)
# prob of arrival volume of >=1250 (beginning of March)
temp <- smalldelay_data %>% filter(arrival_date > as.Date("2021-03-01") & arrival_date < as.Date("2021-03-15"))
1 - ecdf(temp$arrival_volume)(1250)
# Scenario: Big Delay
bigdelay_data <- outer_sim(iterations=100,
num_origin_ports=10,
delays=c(10,8,6,4,2),
origin_daily_value_mean=100,
origin_daily_value_sd=10,
int_means=14, dom_means=4,
int_sds=1, dom_sds=1,
tail_values=2.5)
# filter date range
bigdelay_data <- bigdelay_data %>% filter(arrival_date > as.Date("2021-01-08") & arrival_date < as.Date("2021-03-24"))
# visualize
ggplot(bigdelay_data, aes(x=arrival_date, y=arrival_volume, group=arrival_date)) + geom_boxplot() +
theme_minimal() + ylim(0, 2250) +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Big Bang Delay")
# prob of arrival volume of >=1250
1 - ecdf(bigdelay_data$arrival_volume)(1250)
#### Scenario Planning ####
# define which percentile you want to visualize
pctile <- 0.5
novariance_plot <- novariance_data %>% group_by(arrival_date) %>%
summarise(novariance_volume=quantile(arrival_volume, pctile), .groups = 'drop')
onlytransit_variance_plot <- onlytransit_variance_data %>% group_by(arrival_date) %>%
summarise(onlytransit_variance_volume=quantile(arrival_volume, pctile), .groups = 'drop')
baseline_plot <- baseline_data %>% group_by(arrival_date) %>%
summarise(baseline_volume=quantile(arrival_volume, pctile), .groups = 'drop')
bigvariance_plot <- bigvariance_data %>% group_by(arrival_date) %>%
summarise(bigvariance_volume=quantile(arrival_volume, pctile), .groups = 'drop')
smalldelay_plot <- smalldelay_data %>% group_by(arrival_date) %>%
summarise(smalldelay_volume=quantile(arrival_volume, pctile), .groups = 'drop')
bigdelay_plot <- bigdelay_data %>% group_by(arrival_date) %>%
summarise(bigdelay_volume=quantile(arrival_volume, pctile), .groups = 'drop')
# combine all scenarios into single dataframe
all_data <- left_join(novariance_plot, onlytransit_variance_plot, by="arrival_date") %>%
left_join(baseline_plot, by="arrival_date") %>%
left_join(bigvariance_plot, by="arrival_date") %>%
left_join(smalldelay_plot, by="arrival_date") %>%
left_join(bigdelay_plot, by="arrival_date")
melt_all <- melt(all_data, id.vars="arrival_date")
names(melt_all) <- c("arrival_date", "Scenario", "value")
# visualize together
ggplot(melt_all, aes(x=arrival_date, y=value, group=Scenario, color=Scenario)) +
geom_line() +
theme_minimal() +
labs(x="Arrival at Warehouse Date", y="Unit Arrival Volume", title="Comparting Scenarios (80th Percentile)")
# volumes from different scenarios on single day
all_data %>% filter(arrival_date == as.Date("2021-03-01")) %>% head()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment