Last active
July 26, 2021 17:48
-
-
Save euclidjda/a7de9feec2eaac90e73e to your computer and use it in GitHub Desktop.
Generate chart of value vs. growth investing recoveries since 1945 based on Kenneth F. French's data library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load libraries | |
library(ggplot2) | |
library(stringr) | |
library(scales) | |
library(ggthemes) | |
library(PerformanceAnalytics) | |
library(RColorBrewer) | |
# The URL for the data | |
url.name <- "http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp" | |
file.name <- "25_Portfolios_5x5_CSV.zip" | |
full.url <- paste(url.name, file.name, sep="/") | |
# You can change the starting date of the analysis by changing these variables | |
# Beware, however, that the source datafile starts in 1926 | |
start.year <- 1945 | |
start.month <- 1 | |
# You can change the ending date of the analysis by changing these variables | |
# Beware, however, that the source datafile may only be updated annually | |
# and therefore the most recent monthly may not be available | |
end.year <- 2016 | |
end.month <- 11 | |
window.width <- 5*12 # The rolling window width. 5 Years in this case | |
look.forward <- 8*12 # How far to look forward for the end of a value recovery | |
# Download the data and unzip it | |
temp.file <- tempfile() | |
download.file(full.url, temp.file) | |
file.list <- unzip(temp.file, list=TRUE) | |
# Parse the data | |
french.data <- read.csv(unzip(temp.file, | |
files=as.character(file.list[1,1])), | |
skip=19, | |
header=TRUE, | |
stringsAsFactors=FALSE) | |
names(french.data)[[1]] <- "DATE" | |
# Now we want to remove all the data below the end date | |
ds.year <- as.numeric(substr(french.data$DATE[[1]],1,4)) | |
ds.month <- as.numeric(substr(french.data$DATE[[1]],5,6)) | |
num.rows <- 12*(end.year-ds.year)+(end.month-ds.month)+1 | |
french.data <- head(french.data,num.rows) | |
date.seq <- as.Date(paste(french.data$DATE,"01",sep=""),"%Y%m%d") | |
french.data$DATE <- date.seq | |
# Transform the data so that the return cells are numeric decimal format | |
for (i in 2:ncol(french.data)) french.data[,i] <- as.numeric(str_trim(french.data[,i])) | |
for (i in 2:ncol(french.data)) french.data[,i] <- french.data[,i]/100 | |
# Now we calculate the the HML sequence from the factor sequences | |
# Column names are: DATE SMALL.LoBM ME1.BM2 SMALL.HiBM BIG.LoBM ME2.BM2 BIG.HiBM | |
fd <- french.data | |
french.data$Hi.Lo <- | |
0.2*( fd$SMALL.HiBM + fd$ME2.BM5 + fd$ME3.BM5 + fd$ME4.BM5 + fd$BIG.HiBM ) - | |
0.2*( fd$SMALL.LoBM + fd$ME2.BM1 + fd$ME3.BM1 + fd$ME4.BM1 + fd$BIG.LoBM ) | |
# Now create a time series of the HML data that we can pass off to apply.rolling | |
# and other PerformanceAnalytics functions | |
ts.data <- data.frame(french.data$Hi.Lo) | |
row.names(ts.data) <- date.seq | |
# Now calculate the a time series of rolling 5-year annualized returns | |
z <- apply.rolling(ts.data, width=window.width, FUN = "Return.annualized") | |
french.data$Hi.Lo.5YrRolling <- z[[1]] | |
rows <- nrow(french.data) | |
# The following loop programatically finds the value recoveries and fills | |
# the lists start.indexes and end.indexes with the start and end index of these | |
# recoveries in the french.data data frame. | |
# Beware that the results of this search are sensitive to the parameters | |
# window.width and look.forward and may change significantly if these | |
# are altered from the default values of 60 months and 96 months, respectively. | |
start.indexes <- c() | |
end.indexes <- c() | |
i <- max( (start.year-ds.year)*12+(start.month-ds.month), window.width) | |
while ( i < rows ) { | |
if (french.data$Hi.Lo.5YrRolling[i]<0) { | |
# A new periods starts when 5YrRolling turns negative | |
j <- i+1 | |
start.idx <- i | |
end.idx <- j | |
min.val <- french.data$Hi.Lo.5YrRolling[i] | |
max.val <- french.data$Hi.Lo.5YrRolling[i] | |
while ( j < i + look.forward) { | |
if ( j < rows ) { | |
if (french.data$Hi.Lo.5YrRolling[j]>max.val) { | |
max.val <- french.data$Hi.Lo.5YrRolling[j] | |
end.idx <- j | |
} | |
} else { | |
max.val <- french.data$Hi.Lo.5YrRolling[rows] | |
end.idx <- rows | |
} | |
j <- j+1 | |
} | |
start.indexes <- c(start.indexes,start.idx) | |
end.indexes <- c(end.indexes,end.idx) | |
# print info on the value recovery we found | |
str <- paste("Start=",french.data$DATE[start.idx], | |
"End=",french.data$DATE[end.idx], | |
"EndValue=",french.data$Hi.Lo.5YrRolling[end.idx], | |
"Duration=",(end.idx-start.idx)/12) | |
print(str) | |
# Restart search from where value recovery ends | |
i <- end.idx + 1 | |
} else { | |
i <- i+1 | |
} | |
} | |
# This last loop creates a long format data frame of all the value recoveries | |
# so that it is suitable for plotting with ggplot2 | |
series <- c() | |
months <- c() | |
values <- c() | |
for (i in 1:length(start.indexes)) { | |
start.idx <- start.indexes[i] | |
end.idx <- end.indexes[i] | |
str <- paste(format(french.data$DATE[start.idx], "%Y"), | |
format(french.data$DATE[end.idx] , "%Y"), sep="-") | |
cur.value <- 0 | |
series <- c(series,str) | |
months <- c(months,1) | |
values <- c(values,cur.value) | |
# for each month in the recovery we calculate the cur.value of | |
# the cumulative compounded return | |
for (j in start.idx:end.idx) { | |
cur.value <- (cur.value+1)*(french.data$Hi.Lo[j]+1)-1 | |
series <- c(series,str) | |
months <- c(months,j-start.idx+2) | |
values <- c(values,cur.value) | |
} | |
} | |
# Create the data frame with the three columns series, months, and value | |
recov.data <- data.frame(series,months,values) | |
# Create a suitable color palette | |
colors <- rev( brewer.pal(7,"Blues") )[1:5] | |
colors <- c(colors, '#DD592D' ) | |
# The most current line size will be 2 while the rest are 1 | |
sizes <- rep( 1, length(start.indexes)-1 ) | |
sizes <- c(sizes, 2) | |
# Plot the data | |
g <- ggplot(recov.data,aes(x=months,y=values,size=series,group=series,colour=series)) | |
g <- g + geom_hline(aes(yintercept=0.0),color="grey",linetype=1) | |
g <- g + geom_line() | |
g <- g + scale_size_manual(values=sizes) | |
g <- g + scale_colour_manual(values=colors ) | |
g <- g + scale_x_discrete( "", breaks=c( 1, 25, 50, 75 ) ) | |
g <- g + scale_y_continuous( "", labels=percent ) | |
g <- g + guides(fill=FALSE) | |
g + theme_bw() + theme(panel.background=element_blank() , | |
panel.border=element_blank() ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment