Last active
August 30, 2021 23:12
-
-
Save FrankRuns/1fbba268d9ec667f3827a20361f4ddc2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# purpose: use changepoint algorithm to determine | |
# appropriate history range for statistical control charts | |
# 0.0 Packages ---- | |
library(ggplot2) # for visualizations | |
library(gridExtra) # for plotting charts together | |
library(lubridate) # for data manipulations | |
library(tidyverse) # for piping data | |
library(reshape2) # for wide to long data transform | |
library(changepoint) # for finding change points in ts data | |
# 1.0 Load Data ---- | |
# * link to data ---- | |
# https://docs.google.com/spreadsheets/d/1D6zoj1fQGkccfjLW8c2wVqgxNQ8EhtDFBqjlRuMPPmY/edit?usp=sharing | |
cass_data <- read.delim(pipe("pbpaste")) | |
cass_data <- cass_data %>% filter(!is.na(Change)) | |
plot(cass_data$Change, type='l') | |
# plot index over time ---- | |
iv_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Index.Value)) + | |
geom_line() + | |
labs(title="Cass Freight Index Value", x="Year", y="Index Value") + | |
stat_smooth(geom='line', method="lm", alpha=0.2, se=FALSE) + | |
theme_minimal() + | |
scale_x_date(date_labels="%y",date_breaks ="1 year") | |
# 2.0 Base Control Charts ---- | |
# * create control limits ---- | |
cass_mean <- mean(cass_data$Change); cass_sd <- sd(cass_data$Change); | |
cc_inner_limits <- c(cass_mean + cass_sd * 1, cass_mean - cass_sd * 1, | |
cass_mean + cass_sd * 2, cass_mean - cass_sd * 2) | |
cc_outer_limits <- c(cass_mean + cass_sd * 3, cass_mean - cass_sd * 3) | |
# * create control plot ---- | |
cl_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Change)) + | |
geom_line() + | |
labs(title="Cass Control Limits Based on Add Data", x="Year", y="Year-Over-Year Change") + | |
geom_hline(yintercept = cass_mean, color="steelblue") + | |
geom_hline(yintercept = cc_outer_limits, color="red") + | |
geom_hline(yintercept = cc_inner_limits, color="deep pink", linetype="dotted") + | |
scale_x_date(date_labels="%y",date_breaks ="1 year") + | |
theme_minimal() | |
# 3.0 Change Points ---- | |
# * find appropirate change points ---- | |
# what is an appropriate penalty? | |
pv <- NULL | |
for (i in 1:100) { | |
temp <- cpt.meanvar(cass_data$Change, penalty = "Manual", pen.value = i * log(nrow(cass_data), method = "PELT") | |
if (min(seg.len(temp)) > round(log(nrow(cass_data)) * 0.3)) { | |
pv <- i | |
break | |
} | |
} | |
# * create the change points ---- | |
m.pm <- cpt.meanvar(cass_data$Change, penalty = "Manual", pen.value = pv * log(nrow(cass_data)), method = "PELT") | |
plot(m.pm, type = "l", cpt.col = "blue", xlab = "Months from 2006-01-01", ylab="Year-Over-Year Change", cpt.width = 4, main="Cass Fright Index Change Points") | |
mean(seg.len(m.pm)); sd(seg.len(m.pm)) | |
# * capture last changepoint ---- | |
mcpt.pts <- attributes(m.pm)$cpts | |
last_changepoint <<- cass_data$Month[mcpt.pts][length(cass_data$Month[mcpt.pts])-1] | |
# 4.0 Create Final Visual ---- | |
# plot change points on time series | |
the_change_points <- head(as.Date(cass_data$Month[mcpt.pts]), -1) | |
mc_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Change)) + | |
geom_line() + | |
labs(title="Multiple Change Points", y="Year-Over-Year Change", x="Year") + | |
geom_vline(xintercept = the_change_points, color="dodgerblue") + | |
theme_minimal() + | |
scale_x_date(date_labels="%y",date_breaks ="1 year") | |
# * create new control limits ---- | |
cass_split <- cass_data[cass_data$Month > tail(the_change_points, 1),] | |
cass_mean <- mean(cass_split$Change); cass_sd <- sd(cass_split$Change); | |
cc_inner_limits <- c(cass_mean + cass_sd * 1, cass_mean - cass_sd * 1, | |
cass_mean + cass_sd * 2, cass_mean - cass_sd * 2) | |
cc_outer_limits <- c(cass_mean + cass_sd * 3, cass_mean - cass_sd * 3) | |
# * create new control plot ---- | |
cl_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Change)) + | |
geom_line() + | |
labs(title="Cass Control Limits Based on Last Change Point", x="Year", y="Year-Over-Year Change") + | |
geom_hline(yintercept = cass_mean, color="steelblue") + | |
geom_hline(yintercept = cc_outer_limits, color="red") + | |
geom_hline(yintercept = cc_inner_limits, color="deep pink", linetype="dotted") + | |
theme_minimal() + | |
scale_x_date(date_labels="%y",date_breaks ="1 year") | |
# * panel visual ---- | |
grid.arrange(iv_plot, mc_plot, cl_plot, ncol = 1, nrow = 3) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment