Skip to content

Instantly share code, notes, and snippets.

@dmarx
Created March 16, 2014 01:31
Show Gist options
  • Save dmarx/9577136 to your computer and use it in GitHub Desktop.
Save dmarx/9577136 to your computer and use it in GitHub Desktop.
library(TTR)
GetClosePrices=function(stocks, from=20090206, to=20140206){
# Returns a dataframe whose columns correspond to
# the prices of stocks in the input parameter.
#
# stocks: A character vector of ticker symbols
# from/to: Dates in YYYYMMDD format, from < to.
df = xts()
symbols=c()
for(sym in stocks){
print(sym)
#prices = getYahooData(sym, from, to)$Close
#df = tryCatch({
tryCatch({
data = getYahooData(sym, from, to)
prices = data$Close
if(length(prices)>0){
df=merge(df, prices)
symbols=c(symbols, sym)}
#print(length(symbols))
#print(dim(df))
#merge(df, prices)
},
error=function(cond) {
print(paste("Error encountered retrieving symbol:",sym))
print(cond)
#df
},
warning=function(cond) {
print(paste("Warning encountered retrieving symbol:",sym))
print(cond)
#df
}#,
#finally=df
)
if(length(symbols)!=dim(df)[2]){
print("PROBLEMO!!")
return(NULL)
}
#df = merge(df, prices, join='inner') # drop missing days
}
print(length(symbols))
print(dim(df))
colnames(df) = symbols #stocks
df
}
WeekYear <- function(x, format="%Y-%m-%d"){
# from http://grokbase.com/t/r/r-help/124yxpntwm/r-extracting-week-number-starting-from-a-specific-date
as.integer(format(strptime(x, format=format), "%Y%W"))
}
Calc_r_ji = function(stocks){
wy = WeekYear(index(stocks))
unq_wy = unique(wy)
# Extract the first trading day from each week
# There's probably a better way to do this...
firstday_ix = c()
for(d in unq_wy){
first = which(wy==d)[1]
firstday_ix = c(firstday_ix, first)
}
# coerce to dataframe to allow for subtraction as I have it
# in the r_ji calculation
d_ji = data.frame(stocks[firstday_ix])
n=nrow(d_ji)
r_ji = (d_ji[-1,] - d_ji[-n,])/d_ji[-n,]
r_ji
}
GetStats=function(stock_names=all_stocknames2
,start_date=20120206
,end_date=20140206){
prices = GetClosePrices(stock_names, start_date, end_date)
# Trim stocks down only those that have the most days in common.
# This method assumes that all rows that are NA are in common, which
# won't strictly be true, so the end number of rows will be somewhat
# less than the anticipated total, but this will still give us a lot
# of data to work with.
colnas = lapply(prices, function(x)sum(is.na(x)))
colnas = sapply(colnas, c)
num_na = as.integer(names(which(table(colnas) == max(table(colnas)))))
prices = prices[,colnas==num_na]
prices = na.omit(prices)
r_ji = Calc_r_ji(prices)
mu = colMeans(r_ji, na.rm=TRUE)
sigma = cov(r_ji, use="pairwise.complete.obs")
list(mu=mu, sigma=sigma, r_ji=r_ji, prices=prices)
}
stockData = GetStats()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment