Skip to content

Instantly share code, notes, and snippets.

@FrankRuns
Last active August 30, 2021 23:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save FrankRuns/1fbba268d9ec667f3827a20361f4ddc2 to your computer and use it in GitHub Desktop.
Save FrankRuns/1fbba268d9ec667f3827a20361f4ddc2 to your computer and use it in GitHub Desktop.
# purpose: use changepoint algorithm to determine
# appropriate history range for statistical control charts
# 0.0 Packages ----
library(ggplot2) # for visualizations
library(gridExtra) # for plotting charts together
library(lubridate) # for data manipulations
library(tidyverse) # for piping data
library(reshape2) # for wide to long data transform
library(changepoint) # for finding change points in ts data
# 1.0 Load Data ----
# * link to data ----
# https://docs.google.com/spreadsheets/d/1D6zoj1fQGkccfjLW8c2wVqgxNQ8EhtDFBqjlRuMPPmY/edit?usp=sharing
cass_data <- read.delim(pipe("pbpaste"))
cass_data <- cass_data %>% filter(!is.na(Change))
plot(cass_data$Change, type='l')
# plot index over time ----
iv_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Index.Value)) +
geom_line() +
labs(title="Cass Freight Index Value", x="Year", y="Index Value") +
stat_smooth(geom='line', method="lm", alpha=0.2, se=FALSE) +
theme_minimal() +
scale_x_date(date_labels="%y",date_breaks ="1 year")
# 2.0 Base Control Charts ----
# * create control limits ----
cass_mean <- mean(cass_data$Change); cass_sd <- sd(cass_data$Change);
cc_inner_limits <- c(cass_mean + cass_sd * 1, cass_mean - cass_sd * 1,
cass_mean + cass_sd * 2, cass_mean - cass_sd * 2)
cc_outer_limits <- c(cass_mean + cass_sd * 3, cass_mean - cass_sd * 3)
# * create control plot ----
cl_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Change)) +
geom_line() +
labs(title="Cass Control Limits Based on Add Data", x="Year", y="Year-Over-Year Change") +
geom_hline(yintercept = cass_mean, color="steelblue") +
geom_hline(yintercept = cc_outer_limits, color="red") +
geom_hline(yintercept = cc_inner_limits, color="deep pink", linetype="dotted") +
scale_x_date(date_labels="%y",date_breaks ="1 year") +
theme_minimal()
# 3.0 Change Points ----
# * find appropirate change points ----
# what is an appropriate penalty?
pv <- NULL
for (i in 1:100) {
temp <- cpt.meanvar(cass_data$Change, penalty = "Manual", pen.value = i * log(nrow(cass_data), method = "PELT")
if (min(seg.len(temp)) > round(log(nrow(cass_data)) * 0.3)) {
pv <- i
break
}
}
# * create the change points ----
m.pm <- cpt.meanvar(cass_data$Change, penalty = "Manual", pen.value = pv * log(nrow(cass_data)), method = "PELT")
plot(m.pm, type = "l", cpt.col = "blue", xlab = "Months from 2006-01-01", ylab="Year-Over-Year Change", cpt.width = 4, main="Cass Fright Index Change Points")
mean(seg.len(m.pm)); sd(seg.len(m.pm))
# * capture last changepoint ----
mcpt.pts <- attributes(m.pm)$cpts
last_changepoint <<- cass_data$Month[mcpt.pts][length(cass_data$Month[mcpt.pts])-1]
# 4.0 Create Final Visual ----
# plot change points on time series
the_change_points <- head(as.Date(cass_data$Month[mcpt.pts]), -1)
mc_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Change)) +
geom_line() +
labs(title="Multiple Change Points", y="Year-Over-Year Change", x="Year") +
geom_vline(xintercept = the_change_points, color="dodgerblue") +
theme_minimal() +
scale_x_date(date_labels="%y",date_breaks ="1 year")
# * create new control limits ----
cass_split <- cass_data[cass_data$Month > tail(the_change_points, 1),]
cass_mean <- mean(cass_split$Change); cass_sd <- sd(cass_split$Change);
cc_inner_limits <- c(cass_mean + cass_sd * 1, cass_mean - cass_sd * 1,
cass_mean + cass_sd * 2, cass_mean - cass_sd * 2)
cc_outer_limits <- c(cass_mean + cass_sd * 3, cass_mean - cass_sd * 3)
# * create new control plot ----
cl_plot <- ggplot(cass_data, aes(x=as.Date(Month), y=Change)) +
geom_line() +
labs(title="Cass Control Limits Based on Last Change Point", x="Year", y="Year-Over-Year Change") +
geom_hline(yintercept = cass_mean, color="steelblue") +
geom_hline(yintercept = cc_outer_limits, color="red") +
geom_hline(yintercept = cc_inner_limits, color="deep pink", linetype="dotted") +
theme_minimal() +
scale_x_date(date_labels="%y",date_breaks ="1 year")
# * panel visual ----
grid.arrange(iv_plot, mc_plot, cl_plot, ncol = 1, nrow = 3)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment