Skip to content

Instantly share code, notes, and snippets.

@chrisconlan
Last active January 15, 2017 02:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chrisconlan/dfc624b6c158ac2a43caaef6a91480a8 to your computer and use it in GitHub Desktop.
Save chrisconlan/dfc624b6c158ac2a43caaef6a91480a8 to your computer and use it in GitHub Desktop.
####################################################################
# Compute n-over-n returns for various n on S&P 500 data. #
# By: Chris Conlan #
# http://chrisconlan.com/sp500-long-haul-figures-to-keep-in-mind/ #
####################################################################
# Point filepath to data
sp <- read.csv("~/script_and_data/sp500.csv", stringsAsFactors = FALSE)
# Set plotting window to 3 vertical plots
par(mfrow = c(3, 1))
# List time periods (in years) to analyze
yvec <- c(0.0833, 0.25, 0.5, 1, 2, 3, 5, 8, 10, 12, 15, 20)
# Convert data to zoo object, keep only date and closing price
library(zoo)
sp <- zoo(sp$Close, order.by = strptime(sp$Date, "%Y-%m-%d", tz = 'EST'))
out_mat <- NULL
for(yrs in yvec){
lag_dist <- round(252 * yrs)
# Time series of n-over-n returns
yoy <- (as.numeric(sp[-(1:lag_dist)]) / as.numeric(lag(sp, -lag_dist)) - 1) * 100
###################################################
# Compute emperical cdf and various summary stats #
###################################################
cdf_yoy <- ecdf(yoy)
cdf_out <- quantile(yoy, c(0.01, 0.05, .10, 0.25, 0.5, 0.75, 0.90, 0.95, 0.99))
mu <- mean(yoy)
med <- median(yoy)
sig <- sd(yoy)
bep <- 100 * (1 - ((max(which(sort(yoy) <= 0)) + min(which(sort(yoy) > 0)))/2) / length(yoy))
if( is.infinite(bep) & sign(bep ) == 1 ){
bep <- 100
} else if( is.infinite(bep) & sign(bep) == -1){
bep <- 0
}
###################################################
# Organize final data matrix (table seen in blog) #
###################################################
out_vec <- c(yrs = y, cdf_out, mean = mu, median = med, std_dev = sig, BEprob = bep)
if(is.null(out_mat)){
out_mat <- matrix(ncol= length(out_vec), nrow = 1, out_vec)
} else {
out_mat <- rbind(out_mat, out_vec)
}
###############################################
# Plot simple time series of n-over-n returns #
###############################################
plot(y = yoy, x = index(sp)[-(1:lag_dist)], type = 'l',
main = paste0(yrs, "-Year Returns on the S&P 500 (Measured Daily)"),
ylab = paste0(yrs, "-Year Return (%)"),
xlab = "",
ylim = c(min(c(yoy, 0)), max(yoy, 0)) )
abline(h = 0, col = 1)
#####################################
# Plot histogram (or empirical PDF) #
#####################################
hist( yoy, freq = FALSE, breaks = 50,
main = paste0(yrs, "-Year Returns on the S&P 500 (Measured Daily)"),
xlab = paste0(yrs, "-Year Return (%)"),
ylab = "Frequency")
grid()
abline(v = 0, lwd = 2)
abline(v = mu, col = 2, lwd = 2)
abline(v = med, col = 4, lwd = 2)
# Create vector of strings for legend table (used throughout)
leg_str <- paste0(c("Mean = ", "Median = ", "Std Dev = ", "Breakeven Prob. = "), round(c(mu, med, sig, bep), 2), c("%", "%", "", "%"))
legend('topright', legend = leg_str, col = c(2, 4, 0, 0), lwd = c(2, 2, 0, 0))
######################
# Plot empirical CDF #
######################
plot(cdf_yoy,
main = paste0(yrs, "-Year Returns on the S&P 500 (Measured Daily)"),
xlab =paste0(yrs, "-Year Return (%)"),
ylab = "Cumulative Frequency")
for(i in seq(.1, .9, .10)){
abline(h = i, col = 8, lwd = 0.7)
}
abline(h = 0)
abline(h = 1)
interv <- round((max(yoy) - min(yoy)) / 25, -1)
if(interv == 0) interv <- 10
for(i in seq(round(min(yoy), -1), round(max(yoy), -1), interv)){
abline(v = i, col = 8, lwd = 0.7)
}
abline(v = 0, lwd = 2)
abline(v = mu, col = 2, lwd = 2)
abline(v = med, col = 4, lwd = 2)
legend('bottomright', legend = leg_str, col = c(2, 4, 0, 0), lwd = c(2, 2, 0, 0))
}
# Output markdown table
library(knitr)
row.names(out_mat) <- NULL
kable(out_mat)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment