Skip to content

Instantly share code, notes, and snippets.

@hckim1991
Created October 24, 2020 04:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hckim1991/e308d6b5caccc242be1847d15da0c8a5 to your computer and use it in GitHub Desktop.
Save hckim1991/e308d6b5caccc242be1847d15da0c8a5 to your computer and use it in GitHub Desktop.
Shiny rail project
library(tidyverse)
library(quantmod)
library(shiny)
library(shinydashboard)
library(dashboardthemes)
library(scales)
library(RColorBrewer)
library(zoo)
library(shinyjs)
#Read carload data
#Source: https://www.stb.gov/stb/railserviceissues/rail_service_reports.html
carloads = read.csv('./EP724 Data.csv')
#Convert columns do dates
for (i in 7:ncol(carloads)) {
colnames(carloads)[i] = as.character(as.Date(colnames(carloads)[i], "X%m.%d.%Y"))
}
colnames(carloads)[1] = 'Name'
#Make sure all numbers are of numeric type (not character)
carloads[, 7:ncol(carloads)] = mutate_at(carloads[, 7:ncol(carloads)], 1:(ncol(carloads)-6),
function(x) {as.numeric(str_replace_all(x, ',', ''))})
#Filter for carloads, sum up carloads by railroad, transpose, then clean up column names
carloads_temp = carloads %>%
filter(Measure == 'Weekly Carloads By 22 Commodity Categories' & carloads$Sub.Variable == 'Total') %>%
group_by(Name) %>%
summarize_at(6:(ncol(carloads)-1), sum) %>%
t()
colnames(carloads_temp) = carloads_temp[1, ]
matchname = function(x) {#Change column names to be consistent with stock tickers
return(ifelse(x == 'CN', 'CNI',
ifelse(x == 'KCS', 'KSU',
ifelse(x == 'NS', 'NSC',
ifelse(x == 'UP', 'UNP', x)))))
}
colnames(carloads_temp) = sapply(colnames(carloads_temp), matchname)
carloads_temp = carloads_temp[-1, ]
carloads_temp = as.data.frame(carloads_temp) #Convert back to data frame since it's a matrix currently
carloads_temp = carloads_temp %>%
mutate(Date = rownames(carloads_temp))
carloads_temp = mutate_at(carloads_temp, 1:7, as.numeric)
#Final clean up of carloads data to stack by railroad
carloads_final = arrange(pivot_longer(carloads_temp, 1:7, names_to = 'Name', values_to = 'Carloads'), Name)
carloads_final$Date = as.Date(carloads_final$Date)
carloads_final$Date = carloads_final$Date - 5 #Subtract 5 days to be consistent with stocks data below
#Supplementary carload data for the intro section
carloads_type = carloads %>%
filter(Measure == 'Weekly Carloads By 22 Commodity Categories' & carloads$Sub.Variable == 'Total') %>%
group_by(Variable) %>%
summarize_at(6:(ncol(carloads)-1), sum) %>%
select(-1) %>%
rowSums()
carloads_name = carloads %>%
filter(Measure == 'Weekly Carloads By 22 Commodity Categories' & carloads$Sub.Variable == 'Total') %>%
group_by(Variable) %>%
summarize_at(6:(ncol(carloads)-1), sum) %>%
select(1)
carloads_by_type = data.frame(Segment = carloads_name, Total = carloads_type)
#Load stock prices
start = as.Date(carloads_temp$Date[1])
end = as.Date(carloads_temp$Date[nrow(carloads_temp)])
getSymbols(c('^GSPC', 'IYT', 'XLI', 'CSX', 'CNI', 'CP', 'KSU', 'NSC', 'UNP'),
from = start - 5, to = end - 5, return.class = 'data.frame')
#Cbind datasets and create new columns for relative prices
stocks = cbind(GSPC, IYT, XLI, CSX, CNI, CP, KSU, NSC, UNP)
stocks = stocks %>%
mutate(Date = as.Date(rownames(stocks))) %>%
select(Date, contains('.Close')) %>%
mutate(CSX.SandP = CSX.Close / GSPC.Close,
CSX.IYT = CSX.Close / IYT.Close,
CSX.XLI = CSX.Close / XLI.Close,
CNI.SandP = CNI.Close / GSPC.Close,
CNI.IYT = CNI.Close / IYT.Close,
CNI.XLI = CNI.Close / XLI.Close,
CP.SandP = CP.Close / GSPC.Close,
CP.IYT = CP.Close / IYT.Close,
CP.XLI = CP.Close / XLI.Close,
KSU.SandP = KSU.Close / GSPC.Close,
KSU.IYT = KSU.Close / IYT.Close,
KSU.XLI = KSU.Close / XLI.Close,
NSC.SandP = NSC.Close / GSPC.Close,
NSC.IYT = NSC.Close / IYT.Close,
NSC.XLI = NSC.Close / XLI.Close,
UNP.SandP = UNP.Close / GSPC.Close,
UNP.IYT = UNP.Close / IYT.Close,
UNP.XLI = UNP.Close / XLI.Close) %>%
select(-(2:4))
stocks = stocks[, c('Date', sort(colnames(stocks)[2:ncol(stocks)]))]
#Final clean up of stock data to stack by railroad (not possible to use pivot_longer here)
stocks_final = data.frame(Name = c(), Date = c(), Price = c(),
Relative.To.IYT = c(), Relative.To.XLI = c(), Relative.To.SPY = c())
for (i in seq(2, ncol(stocks), 4)) {
temp = data.frame(Name = rep(str_sub(colnames(stocks)[i], 1, str_locate(colnames(stocks)[i], '\\.')[1]-1),
nrow(stocks)),
Date = stocks$Date,
Price = stocks[, i],
Relative.To.IYT = stocks[, i+1],
Relative.To.XLI = stocks[, i+3],
Relative.To.SPY = stocks[, i+2])
stocks_final = rbind(stocks_final, temp)
}
#Join stocks_final with carloads_final
df_main = left_join(carloads_final, stocks_final, by = c('Name', 'Date'))
df_main_reactive = df_main %>%
rename(Carloads.1 = Carloads) %>%
group_by(Name) %>%
mutate(Carloads.4 = rollmean(Carloads.1, k = 4, fill = NA, align = 'right'),
Carloads.12 = rollmean(Carloads.1, k = 12, fill = NA, align = 'right'),
Carloads.52 = rollmean(Carloads.1, k = 52, fill = NA, align = 'right'))
#Create data frames for US vs. Canadian rail analysis
temp_CSX = df_main %>% filter(Name == 'CSX') %>% select(1, 3:4)
temp_NSC = df_main %>% filter(Name == 'NSC') %>% select(1, 3:4)
temp_UNP = df_main %>% filter(Name == 'UNP') %>% select(1, 3:4)
df_US = inner_join(inner_join(temp_CSX, temp_NSC, by = 'Date'), temp_UNP, by = 'Date')
df_US = df_US %>%
mutate(Carloads_US = Carloads.x + Carloads.y + Carloads,
Price_US = (Price.x + Price.y + Price) / 3,
YoY_US = Carloads_US / lag(Carloads_US, 52) - 1) %>%
select(Date, Carloads_US, Price_US, YoY_US)
temp_CNI = df_main %>% filter(Name == 'CNI') %>% select(1, 3:4)
temp_CP = df_main %>% filter(Name == 'CP') %>% select(1, 3:4)
df_CAD = inner_join(temp_CNI, temp_CP, by = 'Date')
df_CAD = df_CAD %>%
mutate(Carloads_CAD = Carloads.x + Carloads.y,
Price_CAD = (Price.x + Price.y) / 2,
YoY_CAD = Carloads_CAD / lag(Carloads_CAD, 52) - 1) %>%
select(Date, Carloads_CAD, Price_CAD, YoY_CAD)
df_USCAD = inner_join(df_US, df_CAD, by = 'Date')
df_USCAD = df_USCAD %>%
mutate(Relative.Carloads = Carloads_US / Carloads_CAD,
Relative.Price = Price_US / Price_CAD,
Relative.YoY = YoY_US - YoY_CAD) %>%
select(Date, Relative.Carloads, Relative.Price, Relative.YoY)
#Variables to scale the second axis of US rails vs. Canadian rails analysis
scale_uc1 = (max(df_USCAD$Relative.Carloads) - min(df_USCAD$Relative.Carloads)) /
(max(df_USCAD$Relative.Price, na.rm = T) - min(df_USCAD$Relative.Price, na.rm = T))
translation_uc1 = min(df_USCAD$Relative.Carloads)
scale_uc2 = (max(df_USCAD$Relative.YoY, na.rm = T) - min(df_USCAD$Relative.YoY, na.rm = T)) /
(max(df_USCAD$Relative.Price, na.rm = T) - min(df_USCAD$Relative.Price, na.rm = T))
translation_uc2 = min(df_USCAD$Relative.YoY, na.rm = T)
#Pull GDP data for the Intro section and cbind with carloads
getSymbols('GDP', src = 'FRED')
GDP = as.data.frame(GDP)
GDP_final = cbind(rownames(GDP), GDP)
rownames(GDP_final) = NULL
GDP_final = GDP_final %>%
rename(Date = 'rownames(GDP)') %>%
filter(Date > '2000-01-01') %>%
mutate(Date = str_c(format(as.Date(Date), '%y'), quarters(as.Date(Date)))) %>%
group_by(Date) %>%
summarize(GDP = sum(GDP))
carloads_GDP = carloads_temp %>%
mutate(Total = BNSF + CNI + CP + CSX + KSU + NSC + UNP,
Date = str_c(format(as.Date(Date), '%y'), quarters(as.Date(Date)))) %>%
select(Date, Total) %>%
group_by(Date) %>%
summarize(Total = sum(Total)) %>%
inner_join(GDP_final, by = 'Date')
#Variables to scale the second axis of carloads vs. GDP plot
scale_cg = (max(carloads_GDP[-1, ]$Total) - min(carloads_GDP[-1, ]$Total)) /
(max(carloads_GDP[-1, ]$GDP) - min(carloads_GDP[-1, ]$GDP))
translation_cg = min(carloads_GDP[-1, ]$Total)
#ISM data analysis
#Source: https://www.thefinancials.com/ShowEvent.aspx?s=ismmfg&pid=free&section=usecon
ISM = read.csv('./ISM.csv')
ISM = rownames_to_column(ISM)
colnames(ISM) = ISM[1, ]
ISM = ISM[-1, ]
ISM[, 2] = as.numeric(ISM[, 2]) * 100
ISM[, 1] = as.Date(ISM[, 1], '%m/%d/%Y')
carloads_ISM = carloads_temp %>%
mutate(Total = BNSF + CNI + CP + CSX + KSU + NSC + UNP,
Date = as.Date(format(as.Date(Date), '%Y-%m-01'))) %>%
select(Date, Total) %>%
group_by(Date) %>%
summarize(Total = sum(Total)) %>%
inner_join(ISM, by = 'Date')
#Variables to scale the second axis of carloads vs. ISM plot
scale_ci = (max(carloads_ISM[-1, ]$Total) - min(carloads_ISM[-1, ]$Total)) /
(max(carloads_ISM[-1, ]$Last) - min(carloads_ISM[-1, ]$Last))
translation_ci = min(carloads_ISM[-1, ]$Total)
#ISM vs. GDP data
ISMGDP = ISM %>%
mutate(Date = str_c(format(Date, '%y'), quarters(Date))) %>%
group_by(Date) %>%
summarize(ISM = mean(Last)) %>%
inner_join(GDP_final, by = 'Date')
#Variables to scale the second axis of ISM vs. GDP plot
scale_ig = (max(ISMGDP$ISM) - min(ISMGDP$ISM)) /
(max(ISMGDP$GDP) - min(ISMGDP$GDP))
translation_ig = min(ISMGDP$ISM)
function(input, output, session) {
#Intro section: Rail carload mix plot
output$rails = renderPlot(
carloads_by_type %>%
ggplot(aes(x = reorder(Variable, Total / sum(Total)), y = Total / sum(Total))) +
geom_col() +
scale_y_continuous(name = '% of Total Carloads', label = percent) +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
plot.title = element_text(size = 25, face = 'bold'), axis.text.y = element_text(face = 'bold'),
axis.text.x = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
xlab("Carload Type") +
labs(fill = 'Carload Type') +
coord_flip() +
ggtitle('Rail Carload Mix')
)
#Intro section: Rail carloads. vs. ISM plot
output$CarloadISM = renderPlot(
carloads_ISM[-1, ] %>% #Skip 1st row since March 2017 has incomplete data.
ggplot(aes(x = Date)) +
geom_line(aes(y = Total, group = 1), color = 'red', size = 1.5) +
#axis transformation
geom_line(aes(y = (Last - min(Last)) * scale_ci + translation_ci, group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'Total Carloads',
labels = scientific,
#axis transformation
sec.axis = sec_axis(~./scale_ci + min(carloads_ISM[-1, ]$Last) - translation_ci / scale_ci,
name = 'ISM Manufacturing Index', labels = number_format(accuracy = 0.1))
) +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('Rail carloads (Red) vs. ISM Manufacturing Index (Blue)')
)
#Intro section: ISM vs. GDP
output$ISMGDP = renderPlot(
ISMGDP %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = ISM, group = 1), color = 'red', size = 1.5) +
#axis transformation
geom_line(aes(y = (GDP - min(GDP)) * scale_ig + translation_ig,
group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'ISM',
#axis transformation
sec.axis = sec_axis(~./scale_ig + min(ISMGDP$GDP) - translation_ig / scale_ig,
name = 'US GDP ($ bn)')
) +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('ISM (Red) vs. US GDP (Blue)')
)
#Intro section: Rail carloads. vs. GDP plot
output$CarloadGDP = renderPlot(
carloads_GDP[-1, ] %>% #Skip 1st row since 1Q17 has incomplete data.
ggplot(aes(x = Date)) +
geom_line(aes(y = Total, group = 1), color = 'red', size = 1.5) +
#axis transformation
geom_line(aes(y = (GDP - min(GDP)) * scale_cg + translation_cg, group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'Total Carloads',
labels = scientific,
#axis transformation
sec.axis = sec_axis(~./scale_cg + min(carloads_GDP[-1, ]$GDP) - translation_cg / scale_cg,
name = 'US GDP ($ bn)')
) +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('Rail Carloads (Red) vs. US GDP (Blue)')
)
#Create a reactive df for the Rails section
df_reactive = reactive(
df_main_reactive %>%
filter(Name == input$rail_selected) %>%
select('Date', 'Name', str_c('Carloads.', as.character(input$ma)), 'Price', 'Relative.To.IYT',
'Relative.To.XLI', 'Relative.To.SPY') %>%
rename(Carloads = 3)
)
#Rails section: Single rail analysis
output$RelToSelf = renderPlot(
df_reactive() %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Carloads, group = 1), color = 'black', size = 1.5) +
#axis transformation (more complicated since it's reactive but the concept is the same)
geom_line(aes(y = (Price - min(Price, na.rm = T)) * ((max(Carloads, na.rm = T) - min(Carloads, na.rm = T)) /
(max(Price, na.rm = T) - min(Price, na.rm = T))) + min(Carloads, na.rm = T),
group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'Total Carloads',
labels = scientific,
#axis transformation (more complicated since it's reactive but the concept is the same)
sec.axis = sec_axis(~./((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Price, na.rm = T) - min(df_reactive()$Price, na.rm = T))) +
min(df_reactive()$Price, na.rm = T) -
min(df_reactive()$Carloads, na.rm = T) /
((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Price, na.rm = T) - min(df_reactive()$Price, na.rm = T))),
name = 'Stock Price',
labels = dollar_format(accuracy = 0.01))
) +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('Carloads (Black) vs. Stock Price (Blue)') +
annotate('rect', xmin = as.Date('2017-03-01'), xmax = as.Date('2019-07-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-01-01'), xmax = as.Date('2020-02-29'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-06-01'), xmax = max(df_reactive()$Date),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2019-08-01'), xmax = as.Date('2019-12-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred') +
annotate('rect', xmin = as.Date('2020-03-01'), xmax = as.Date('2020-05-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred')
)
#Rails section: Rail analysis vs. IYT
output$RelToIYT = renderPlot(
df_reactive() %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Carloads, group = 1), color = 'black', size = 1.5) +
#axis transformation (more complicated since it's reactive but the concept is the same)
geom_line(aes(y = (Relative.To.IYT - min(Relative.To.IYT, na.rm = T)) *
((max(Carloads, na.rm = T) - min(Carloads, na.rm = T)) /
(max(Relative.To.IYT, na.rm = T) - min(Relative.To.IYT, na.rm = T))) +
min(Carloads, na.rm = T),
group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'Total Carloads',
labels = scientific,
#axis transformation (more complicated since it's reactive but the concept is the same)
sec.axis = sec_axis(~./((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Relative.To.IYT, na.rm = T) - min(df_reactive()$Relative.To.IYT, na.rm = T))) +
min(df_reactive()$Relative.To.IYT, na.rm = T) -
min(df_reactive()$Carloads, na.rm = T) /
((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Relative.To.IYT, na.rm = T) - min(df_reactive()$Relative.To.IYT, na.rm = T))),
name = 'Stock Price / IYT')
) +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('Carloads (Black) vs. Stock Price/IYT (Blue)') +
annotate('rect', xmin = as.Date('2017-03-01'), xmax = as.Date('2019-07-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-01-01'), xmax = as.Date('2020-02-29'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-06-01'), xmax = max(df_reactive()$Date),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2019-08-01'), xmax = as.Date('2019-12-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred') +
annotate('rect', xmin = as.Date('2020-03-01'), xmax = as.Date('2020-05-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred')
)
observeEvent(input$hideshow, toggle("RelToIYT"))
#Rails section: Rail analysis vs. XLI
output$RelToXLI = renderPlot(
df_reactive() %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Carloads, group = 1), color = 'black', size = 1.5) +
#axis transformation (more complicated since it's reactive but the concept is the same)
geom_line(aes(y = (Relative.To.XLI - min(Relative.To.XLI, na.rm = T)) *
((max(Carloads, na.rm = T) - min(Carloads, na.rm = T)) /
(max(Relative.To.XLI, na.rm = T) - min(Relative.To.XLI, na.rm = T))) +
min(Carloads, na.rm = T),
group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'Total Carloads',
labels = scientific,
#axis transformation (more complicated since it's reactive but the concept is the same)
sec.axis = sec_axis(~./((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Relative.To.XLI, na.rm = T) - min(df_reactive()$Relative.To.XLI, na.rm = T))) +
min(df_reactive()$Relative.To.XLI, na.rm = T) -
min(df_reactive()$Carloads, na.rm = T) /
((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Relative.To.XLI, na.rm = T) - min(df_reactive()$Relative.To.XLI, na.rm = T))),
name = 'Stock Price / XLI')
) +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('Carloads (Black) vs. Stock Price/XLI (Blue)') +
annotate('rect', xmin = as.Date('2017-03-01'), xmax = as.Date('2019-07-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-01-01'), xmax = as.Date('2020-02-29'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-06-01'), xmax = max(df_reactive()$Date),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2019-08-01'), xmax = as.Date('2019-12-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred') +
annotate('rect', xmin = as.Date('2020-03-01'), xmax = as.Date('2020-05-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred')
)
observeEvent(input$hideshow, toggle("RelToXLI"))
#Rails section: Rail analysis vs. S&P500
output$RelToSPY = renderPlot(
df_reactive() %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Carloads, group = 1), color = 'black', size = 1.5) +
#axis transformation (more complicated since it's reactive but the concept is the same)
geom_line(aes(y = (Relative.To.SPY - min(Relative.To.SPY, na.rm = T)) *
((max(Carloads, na.rm = T) - min(Carloads, na.rm = T)) /
(max(Relative.To.SPY, na.rm = T) - min(Relative.To.SPY, na.rm = T))) +
min(Carloads, na.rm = T),
group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'Total Carloads',
labels = scientific,
#axis transformation (more complicated since it's reactive but the concept is the same)
sec.axis = sec_axis(~./((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Relative.To.SPY, na.rm = T) - min(df_reactive()$Relative.To.SPY, na.rm = T))) +
min(df_reactive()$Relative.To.SPY, na.rm = T) -
min(df_reactive()$Carloads, na.rm = T) /
((max(df_reactive()$Carloads, na.rm = T) - min(df_reactive()$Carloads, na.rm = T)) /
(max(df_reactive()$Relative.To.SPY, na.rm = T) - min(df_reactive()$Relative.To.SPY, na.rm = T))),
name = 'Stock Price / S&P500')
) +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('Carloads (Black) vs. Stock Price/S&P500 (Blue)') +
annotate('rect', xmin = as.Date('2017-03-01'), xmax = as.Date('2019-07-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-01-01'), xmax = as.Date('2020-02-29'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-06-01'), xmax = max(df_reactive()$Date),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2019-08-01'), xmax = as.Date('2019-12-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred') +
annotate('rect', xmin = as.Date('2020-03-01'), xmax = as.Date('2020-05-31'),
ymin = min(df_reactive()$Carloads, na.rm = T), ymax = max(df_reactive()$Carloads, na.rm = T),
alpha = 0.2, fill = 'darkred')
)
#USCAD section: US rails vs. Canadian rails Chart 1 (relative carloads vs. relative stock price )
output$uscad.analysis1 = renderPlot(
df_USCAD %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Relative.Carloads, group = 1), color = 'black', size = 1.5) +
#axis transformation
geom_line(aes(y = (Relative.Price - min(Relative.Price, na.rm = T)) * scale_uc1 + translation_uc1,
group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'US carloads / Canadian carloads',
#axis transformation
sec.axis = sec_axis(~./scale_uc1 + min(df_USCAD$Relative.Price, na.rm = T) - translation_uc1 / scale_uc1,
name = 'US Rail Stock Price / Canadian Rail Stock Price')
) +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('Relative Carloads (Black) vs. Relative Stock Price (Blue)') +
annotate('rect', xmin = as.Date('2017-03-01'), xmax = as.Date('2019-07-31'),
ymin = min(df_USCAD$Relative.Carloads), ymax = max(df_USCAD$Relative.Carloads),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-01-01'), xmax = as.Date('2020-02-29'),
ymin = min(df_USCAD$Relative.Carloads), ymax = max(df_USCAD$Relative.Carloads),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-06-01'), xmax = max(df_USCAD$Date),
ymin = min(df_USCAD$Relative.Carloads), ymax = max(df_USCAD$Relative.Carloads),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2019-08-01'), xmax = as.Date('2019-12-31'),
ymin = min(df_USCAD$Relative.Carloads), ymax = max(df_USCAD$Relative.Carloads),
alpha = 0.2, fill = 'darkred') +
annotate('rect', xmin = as.Date('2020-03-01'), xmax = as.Date('2020-05-31'),
ymin = min(df_USCAD$Relative.Carloads), ymax = max(df_USCAD$Relative.Carloads),
alpha = 0.2, fill = 'darkred')
)
#USCAD section: US rails vs. Canadian rails Chart 1 (relative YoY vs. relative stock price )
output$uscad.analysis2 = renderPlot(
df_USCAD %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = Relative.YoY, group = 1), color = 'black', size = 1.5) +
#axis transformation
geom_line(aes(y = (Relative.Price - min(Relative.Price, na.rm = T)) * scale_uc2 + translation_uc2,
group = 1), color = 'blue', size = 1.5) +
scale_y_continuous(
name = 'YoY change in US carloads - YoY change in Canadian carloads',
labels = percent_format(accuracy = 0.1),
#axis transformation
sec.axis = sec_axis(~./scale_uc2 + min(df_USCAD$Relative.Price, na.rm = T) - translation_uc2 / scale_uc2,
name = 'US Rail Stock Price / Canadian Rail Stock Price')
) +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold')) +
ggtitle('YoY US Carloads less YoY Canadian Carloads (Black) vs. Relative Stock Price (Blue)') +
annotate('rect', xmin = as.Date('2017-03-01'), xmax = as.Date('2019-07-31'),
ymin = min(df_USCAD$Relative.YoY, na.rm = T), ymax = max(df_USCAD$Relative.YoY, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-01-01'), xmax = as.Date('2020-02-29'),
ymin = min(df_USCAD$Relative.YoY, na.rm = T), ymax = max(df_USCAD$Relative.YoY, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2020-06-01'), xmax = max(df_USCAD$Date),
ymin = min(df_USCAD$Relative.YoY, na.rm = T), ymax = max(df_USCAD$Relative.YoY, na.rm = T),
alpha = 0.2, fill = 'darkgreen') +
annotate('rect', xmin = as.Date('2019-08-01'), xmax = as.Date('2019-12-31'),
ymin = min(df_USCAD$Relative.YoY, na.rm = T), ymax = max(df_USCAD$Relative.YoY, na.rm = T),
alpha = 0.2, fill = 'darkred') +
annotate('rect', xmin = as.Date('2020-03-01'), xmax = as.Date('2020-05-31'),
ymin = min(df_USCAD$Relative.YoY, na.rm = T), ymax = max(df_USCAD$Relative.YoY, na.rm = T),
alpha = 0.2, fill = 'darkred')
)
#Conclusion section: Rail vs. SPY over time
colnames(stocks)
output$railhistory = renderPlot(
stocks_final %>%
ggplot(aes(x = Date, y = Relative.To.SPY)) +
geom_line(aes(color = Name), size = 1.5) +
scale_y_continuous(
name = 'Stock price relative to S&P500') +
scale_x_date(breaks = '3 months') +
theme_bw() +
theme(panel.grid.major.y = element_blank(), panel.grid.major.x = element_blank(),
panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = -45, face = 'bold'), plot.title = element_text(size = 25, face = 'bold'),
axis.text.y = element_text(face = 'bold'), axis.title.y = element_text(face = 'bold'),
axis.title.x = element_text(face = 'bold'), legend.text = element_text(face = 'bold'),
legend.title = element_text(face = 'bold')) +
labs(color = 'Stock') +
scale_color_brewer(palette = 'Accent') +
ggtitle('Rail Stock Prices Relative to S&P 500 Over Time')
)
}
dashboardPage(
dashboardHeader(title = "Rail carloads vs. Stock prices", titleWidth = 300),
dashboardSidebar(width = 300,
sidebarUserPanel("Hong C. Kim",
image = ''),
sidebarMenu(
menuItem("Intro", tabName = "intro", icon = icon("book")),
menuItem("Class 1 Rails", tabName = "rails", icon = icon("train")),
menuItem("US Rails vs. Canadian Rails", tabName = "uscad", icon = icon("balance-scale")),
menuItem("Conclusion", tabName = "conclusion", icon = icon("sticky-note")),
menuItem("About Me", tabName = "aboutme", icon = icon("question-circle")))
),
dashboardBody(
shinyDashboardThemes(
theme = 'purple_gradient'
),
tabItems(
tabItem(tabName = "intro",
fluidRow(h2("Background on Rails"),
box("If Warren Buffet were stuck on a deserted island and could only have
access to one economic indicator, he was quoted to choose rail carloads (he owns
Burlington Northern, one of the largest railroads in the US). While
rails only constitute mid-single-digit percentage of the US transportation
spend, because they move such diverse types of freight from coal to grain to
automobiles, the amount of carloads moved by rails represent an excellent indicator
of the US economy. The frequency of data availability also makes it attractive
since class 1 rails (largest railroads in the US) are required to report weekly
how many carloads they move each week.",
br(),
plotOutput('rails'),
width = 12
),
br()
),
fluidRow(h2("Economic Relationship"),
box("Consistent with Buffet's claims, the plot below shows a clear relationship between
rail carloads and ISM Manufacturing Index, which is one of the most important metrics
for the US economy. ISM is a monthly survey that measures purchasing managers' sentiment
at more than 300 companies. ISM >= 50 implies US economy is healthy while ISM < 50 implies
US economy is weak.",
plotOutput('CarloadISM'),
br(),
"The chart below shows that ISM is a great leading indicator of US GDP.",
plotOutput('ISMGDP'),
br(),
"Directly plotting rail carloads vs. GDP shows a less clear relationship as below but
this is due to the short timeframe (only 13 quarterly data used below). With more data,
there will be a clear relationship.",
plotOutput('CarloadGDP'),
width = 12),
br()
),
fluidRow(h2("Goal"),
box("Given the importance of rails to the US economy, it is no surprise that the rail stocks
have a wide investor base.", tags$u(tags$b("The goal of this project is to identify trading
opportunities based on the relationship between carloads and rail stock prices under
different macro environments.")), "This is a common analysis hedge funds do on the rails
and this web app will automate the analysis.",
width = 12),
br()
)
),
tabItem(tabName = "rails",
fluidRow(box(column(width = 6,
selectizeInput("rail_selected",
"Which Rail? (BNSF is private so will only show carloads)",
unique(df_main$Name))
),
column(width = 6,
selectizeInput("ma",
"Moving Average (1 = default, 4 = 1 month,
12 = 1 quarter, 52 = 1 year)",
c(1, 4, 12, 52))
),
width = 12
),
box(useShinyjs(),
checkboxInput('hideshow', 'Show IYT and XLI Analysis', value = F),
width = 12
)
),
fluidRow(h2("Single Rail Analysis"),
"Green area: ISM >= 50, Red area: ISM < 50",
box(plotOutput('RelToSelf'), width = 12)),
fluidRow(h2("Vs. IYT (transportation index)"),
"Green area: ISM >= 50, Red area: ISM < 50",
box(plotOutput('RelToIYT'), width = 12)),
fluidRow(h2("Vs. XLI (industrials index)"),
"Green area: ISM >= 50, Red area: ISM < 50",
box(plotOutput('RelToXLI'), width = 12)),
fluidRow(h2("Vs. S&P500"),
"Green area: ISM >= 50, Red area: ISM < 50",
box(plotOutput('RelToSPY'), width = 12))
),
tabItem(tabName = "uscad",
fluidRow(h2("US rails (CSX, NSC, UNP) vs. Canadian rails (CP, CNI)"),
"Green area: ISM >= 50, Red area: ISM < 50",
br(),
"Caveat: Canadian rail volumes only reflect the US portion.",
box(plotOutput('uscad.analysis1'), width = 12),
br()),
fluidRow(box(plotOutput('uscad.analysis2'), width = 12))
),
tabItem(tabName = "conclusion",
fluidRow(h2("Conclusion"),
box("Before making any conclusion, it's important to keep in mind that it's hard to lose
from buying the rails per the chart below.",
br(),
plotOutput('railhistory'),
"Over the past 3 years, most rails have outperformed the S&P500 index. In fact, the
sector has outperformed the S&P500 index 16 out of the last 17 years. This is because
Class 1 rails are essentially duopolies (2 in Canada, 2 in the Eastern US, and 2 in the
Western US) with great pricing power, consistent margin expansion, and solid free cash
flow generation. That said, this project will still aid in picking the best time to
buy the rails and narrow down which one has the highest chance of becoming a successful
investment.",
br(),
br(),
tags$ul(
tags$li("Starting with big picture, the very first conclusion is that rail stocks are
more likely to work when ISM is >= 50, which makes sense since they will move
less carloads and make less money when ISM is below 50."),
tags$li("Narrowing down the opportunity set a bit, US rails seem to be a better value
right now than the Canadian rails as of 10/18/20."),
tags$li("On an individual rail basis, as of 10/18/20",
tags$ul(
tags$li("CP and CNI seem to have ran ahead of what's justified by their carloads."),
tags$li("Between the 2 Eastern rails, CSX seems to be a better value than NSC."),
tags$li("In the west, both UNP and KSU seem rich but KSU is a better value."),
tags$li(tags$u(tags$b("Out of all the rails, CSX seems to be the best value.")))
)
)
),
"Final caveat is that while carloads explain the majority of rail stock price movement
in the long run, other exogenous factors could drive the stock prices in the near term.",
width = 12))
),
tabItem(tabName = "aboutme",
fluidRow(h2("Hong C. Kim"),
box(column(width = 3,
img(src = '')),
column(width = 9,
"Hong is a data science fellow at New York City Data Science Academy (NYCDSA)
with expected graduation date of December 2020. His domain expertise lies in
the US equity market, where he spent 7 years in the hedge fund industry investing
in the industrials sector (specifically automobile and transportation companies).
He hopes to complement his investing skillsets with data science to become a better
investor and find unique ways to outperform the ever more competitive stock market.",
br(),
br(),
tags$b("LinkedIn: "),
tags$u(tags$a(href = "https://www.linkedin.com/in/hong-c-kim/",
"https://www.linkedin.com/in/hong-c-kim/")),
br(),
tags$b("GitHub: "),
tags$u(tags$a(href = "https://github.com/hckim1991",
"https://github.com/hckim1991")),
br(),
tags$b("Email: "),
tags$u("hk486@cornell.edu")
),
width = 12)
)
)
)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment