dmarx/stock portfolio optimization.r

## stock portfolio optimization.r
library(TTR)

GetClosePrices=function(stocks, from=20090206, to=20140206){
  # Returns a dataframe whose columns correspond to
  # the prices of stocks in the input parameter.
  #
  # stocks:  A character vector of ticker symbols
  # from/to: Dates in YYYYMMDD format, from < to.
  df = xts()
  symbols=c()
  for(sym in stocks){
    print(sym)
    #prices = getYahooData(sym, from, to)$Close
    #df = tryCatch({
    tryCatch({
      data = getYahooData(sym, from, to)
      prices = data$Close
      if(length(prices)>0){
        df=merge(df, prices)
        symbols=c(symbols, sym)}
      #print(length(symbols))
      #print(dim(df))
      #merge(df, prices)
    },
    error=function(cond) {
      print(paste("Error encountered retrieving symbol:",sym))
      print(cond)
      #df
    },
    warning=function(cond) {
      print(paste("Warning encountered retrieving symbol:",sym))
      print(cond)
      #df
    }#,
    #finally=df
    )
    if(length(symbols)!=dim(df)[2]){
      print("PROBLEMO!!")
      return(NULL)
    }
    #df = merge(df, prices, join='inner') # drop missing days
  }
  print(length(symbols))
  print(dim(df))
  colnames(df) = symbols #stocks
  df
}


WeekYear <- function(x, format="%Y-%m-%d"){
  # from http://grokbase.com/t/r/r-help/124yxpntwm/r-extracting-week-number-starting-from-a-specific-date
  as.integer(format(strptime(x, format=format), "%Y%W"))
}

Calc_r_ji = function(stocks){

  wy = WeekYear(index(stocks))
  unq_wy = unique(wy)

  # Extract the first trading day from each week
  # There's probably a better way to do this...
  firstday_ix = c()
  for(d in unq_wy){
    first = which(wy==d)[1]
    firstday_ix = c(firstday_ix, first)
  }

  # coerce to dataframe to allow for subtraction as I have it
  # in the r_ji calculation
  d_ji = data.frame(stocks[firstday_ix])
  n=nrow(d_ji)
  r_ji = (d_ji[-1,] - d_ji[-n,])/d_ji[-n,]
  r_ji
}


GetStats=function(stock_names=all_stocknames2
                  ,start_date=20120206
                  ,end_date=20140206){
  prices = GetClosePrices(stock_names, start_date, end_date)

  # Trim stocks down only those that have the most days in common.
  # This method assumes that all rows that are NA are in common, which
  # won't strictly be true, so the end number of rows will be somewhat
  # less than the anticipated total, but this will still give us a lot
  # of data to work with.
  colnas = lapply(prices, function(x)sum(is.na(x)))
  colnas = sapply(colnas, c)
  num_na = as.integer(names(which(table(colnas) == max(table(colnas)))))
  prices = prices[,colnas==num_na]
  prices = na.omit(prices)

  r_ji = Calc_r_ji(prices)
  mu = colMeans(r_ji, na.rm=TRUE)
  sigma = cov(r_ji, use="pairwise.complete.obs")
  list(mu=mu, sigma=sigma, r_ji=r_ji, prices=prices)
}

stockData = GetStats()
	library(TTR)

	GetClosePrices=function(stocks, from=20090206, to=20140206){
	# Returns a dataframe whose columns correspond to
	# the prices of stocks in the input parameter.
	#
	# stocks: A character vector of ticker symbols
	# from/to: Dates in YYYYMMDD format, from < to.
	df = xts()
	symbols=c()
	for(sym in stocks){
	print(sym)
	#prices = getYahooData(sym, from, to)$Close
	#df = tryCatch({
	tryCatch({
	data = getYahooData(sym, from, to)
	prices = data$Close
	if(length(prices)>0){
	df=merge(df, prices)
	symbols=c(symbols, sym)}
	#print(length(symbols))
	#print(dim(df))
	#merge(df, prices)
	},
	error=function(cond) {
	print(paste("Error encountered retrieving symbol:",sym))
	print(cond)
	#df
	},
	warning=function(cond) {
	print(paste("Warning encountered retrieving symbol:",sym))
	print(cond)
	#df
	}#,
	#finally=df
	)
	if(length(symbols)!=dim(df)[2]){
	print("PROBLEMO!!")
	return(NULL)
	}
	#df = merge(df, prices, join='inner') # drop missing days
	}
	print(length(symbols))
	print(dim(df))
	colnames(df) = symbols #stocks
	df
	}


	WeekYear <- function(x, format="%Y-%m-%d"){
	# from http://grokbase.com/t/r/r-help/124yxpntwm/r-extracting-week-number-starting-from-a-specific-date
	as.integer(format(strptime(x, format=format), "%Y%W"))
	}

	Calc_r_ji = function(stocks){

	wy = WeekYear(index(stocks))
	unq_wy = unique(wy)

	# Extract the first trading day from each week
	# There's probably a better way to do this...
	firstday_ix = c()
	for(d in unq_wy){
	first = which(wy==d)[1]
	firstday_ix = c(firstday_ix, first)
	}

	# coerce to dataframe to allow for subtraction as I have it
	# in the r_ji calculation
	d_ji = data.frame(stocks[firstday_ix])
	n=nrow(d_ji)
	r_ji = (d_ji[-1,] - d_ji[-n,])/d_ji[-n,]
	r_ji
	}


	GetStats=function(stock_names=all_stocknames2
	,start_date=20120206
	,end_date=20140206){
	prices = GetClosePrices(stock_names, start_date, end_date)

	# Trim stocks down only those that have the most days in common.
	# This method assumes that all rows that are NA are in common, which
	# won't strictly be true, so the end number of rows will be somewhat
	# less than the anticipated total, but this will still give us a lot
	# of data to work with.
	colnas = lapply(prices, function(x)sum(is.na(x)))
	colnas = sapply(colnas, c)
	num_na = as.integer(names(which(table(colnas) == max(table(colnas)))))
	prices = prices[,colnas==num_na]
	prices = na.omit(prices)

	r_ji = Calc_r_ji(prices)
	mu = colMeans(r_ji, na.rm=TRUE)
	sigma = cov(r_ji, use="pairwise.complete.obs")
	list(mu=mu, sigma=sigma, r_ji=r_ji, prices=prices)
	}

	stockData = GetStats()