njcray/mvo-example.r

## mvo-example.r
#
# Some example R code for processing stock data
#
# A lot of error checking and problem reporting has been omitted in favour of keeping the
# basic ideas to the fore.
#


#---- Get a list of the FTSE-100 companies ----
# e.g. by scraping a suitable web page

require(XML)
require(RCurl)

pagescrape = readHTMLTable(getURL('https://en.wikipedia.org/wiki/FTSE_100_Index'),header=TRUE)
n.rows     = unlist(lapply(pagescrape,function(tab)if(!is.null(dim(tab)[1]))dim(tab)[1] else 0))
stocktable = pagescrape[[which.max(n.rows)]] # grab biggest table on the page

stocktable$Ticker = sub('^(.*)\\.(.+)$','\\1-\\2',stocktable$Ticker) # cope with BT.A
ftse100 = sub('^(.*?)\\.?$','\\1.L',stocktable$Ticker) # add .L to all stock names

#-------------------

# Now grab the historical data for the ftse100 stocks
# A small amount of error detection and skipping is included here because the 'getSymbols' interface
# is likely to fail on some stocks. In real use you will need to resolve any problems which arise
# but for demo purposes we will just work with whatever we can get data for.
#

# Get historical stock data from Yahoo inside a 'try' function so that any errors do not stop execution.

require(quantmod)
invisible(lapply(ftse100,function(x)try(getSymbols(x,auto.assign=TRUE,src='yahoo',env=.GlobalEnv,from='2015-01-01'))))

# Build the list of daily returns. If any are missing after the download we will just skip them

require(TTR)
roclist = do.call(cbind,lapply(ftse100,function(x){if(exists(x)) ROC(Cl(get(x)),type='discrete') else NULL}))
names(roclist) = sub('.Close','',names(roclist))
roclist = tail(roclist, 256) # only keep last 256 days of data

#
# Some tidying up due to problems with Yahoo's data feed.
# It's a bit frightening how many stocks have missing data
#
# This tidying up is OK for demo purposes, but better solutions might be needed for production use.
#
roclist = roclist[, colSums(is.na(roclist)) < nrow(roclist)*0.1] # remove stocks with insufficient data
roclist = na.omit(roclist) # remove any days with missing data

#---- Here is how to compute *daily* means and variances. -----

mean.return       = colMeans(roclist,na.rm=T)
covariance.matrix = cov(roclist,use="pairwise.complete.obs")

View(mean.return)
View(covariance.matrix)


#---- Here is now to create a portfolio by specifying the weight for each stock ----

# The example here would be an equal-weighted portfolio of every stock we managed to download
# sum(portfolio) should alwayss be 1

portfolio        = rep(1/ncol(roclist),ncol(roclist))
names(portfolio) = names(roclist)


#
#---- Here is how to compute an approximate yearly return and volatility of the portfolio, assuming 225 working days in year ----
#
return.of.portfolio     = drop(t(portfolio) %*% mean.return) * 225
volatility.of.portfolio = sqrt(drop(t(portfolio) %*% covariance.matrix %*% portfolio)) * sqrt(225)


#
#---- This is a very simple Sharpe Ratio optimisation example ----
#
# Sharpe Ratio = (u-r)/s where r is the minimum acceptable return (per annum)
# Default r=0.25 represents a return of 25% p.a.
#
# Note that for readability this function has been written to work with yearly return and volatility data.
# For better performance, there is no need to multiply by all these constants during the optimization.
# The matrix multiplications can be be done using 'crossprod' for a tiny performance boost.
#
# Also note that we do not supply a 'gradient function' to 'optim' for this demo.
# If you are trying to optimise using maybe 1000 stocks it is worth creating the equivalent gradient function
# and supplying it to 'optim'.
#
# The projection is applied inside the SR function because this is the easiest way to explain
# the simplex bounds constraint to the 'optim' function.
#
# Note that during the optimisation we do not need to recompute the mean.return and covariance.matrix
# arrays. They are computed once. Only the array of proportions in 'portfolio' needs to change.
#

Sharpe.Ratio = function(portfolio,mean.return,covariance.matrix,r=0.25){

	portfolio = portfolio/sum(portfolio) # project onto the unit simplex given by sum(portfolio) = 1

	u = drop(t(portfolio) %*% mean.return) * 225
	s = sqrt(drop(t(portfolio) %*% covariance.matrix %*% portfolio)) * sqrt(225)

	sr = (u-r)/s
}


#
# The 'optim' function will find the arguments (par) to a function (fn) which can maximise it (for fnscale=-1)
#

o = optim(
	par = rep(1/ncol(roclist),ncol(roclist)),   # initial portfolio - we use equal weight here
	fn = Sharpe.Ratio,   # function to optimise
	gr = NULL, # gradient of function. Not supplied in this demo.
	mean.return=mean.return, covariance.matrix=covariance.matrix, r=0.25, # the other arguments to 'fn'
	control = list(trace=1,REPORT=10,fnscale=-1,maxit=100,factr=1e7), # see documentation for info about control parameters
	lower = 0, upper = 1.05, # upper and lower bounds for the parameters
	method = "L-BFGS-B" # This method is a very fast quasi-Newton optimisation
	)

if(o$convergence != 0) warning('Optimisation failed to converge')

#
# Let's have a look at the portfolio it has found
#
max.portfolio = o$par/sum(o$par)
names(max.portfolio) = names(roclist)

print(round(max.portfolio[max.portfolio>0],2))
	#
	# Some example R code for processing stock data
	#
	# A lot of error checking and problem reporting has been omitted in favour of keeping the
	# basic ideas to the fore.
	#


	#---- Get a list of the FTSE-100 companies ----
	# e.g. by scraping a suitable web page

	require(XML)
	require(RCurl)

	pagescrape = readHTMLTable(getURL('https://en.wikipedia.org/wiki/FTSE_100_Index'),header=TRUE)
	n.rows = unlist(lapply(pagescrape,function(tab)if(!is.null(dim(tab)[1]))dim(tab)[1] else 0))
	stocktable = pagescrape[[which.max(n.rows)]] # grab biggest table on the page

	stocktable$Ticker = sub('^(.*)\\.(.+)$','\\1-\\2',stocktable$Ticker) # cope with BT.A
	ftse100 = sub('^(.*?)\\.?$','\\1.L',stocktable$Ticker) # add .L to all stock names

	#-------------------

	# Now grab the historical data for the ftse100 stocks
	# A small amount of error detection and skipping is included here because the 'getSymbols' interface
	# is likely to fail on some stocks. In real use you will need to resolve any problems which arise
	# but for demo purposes we will just work with whatever we can get data for.
	#

	# Get historical stock data from Yahoo inside a 'try' function so that any errors do not stop execution.

	require(quantmod)
	invisible(lapply(ftse100,function(x)try(getSymbols(x,auto.assign=TRUE,src='yahoo',env=.GlobalEnv,from='2015-01-01'))))

	# Build the list of daily returns. If any are missing after the download we will just skip them

	require(TTR)
	roclist = do.call(cbind,lapply(ftse100,function(x){if(exists(x)) ROC(Cl(get(x)),type='discrete') else NULL}))
	names(roclist) = sub('.Close','',names(roclist))
	roclist = tail(roclist, 256) # only keep last 256 days of data

	#
	# Some tidying up due to problems with Yahoo's data feed.
	# It's a bit frightening how many stocks have missing data
	#
	# This tidying up is OK for demo purposes, but better solutions might be needed for production use.
	#
	roclist = roclist[, colSums(is.na(roclist)) < nrow(roclist)*0.1] # remove stocks with insufficient data
	roclist = na.omit(roclist) # remove any days with missing data

	#---- Here is how to compute daily means and variances. -----

	mean.return = colMeans(roclist,na.rm=T)
	covariance.matrix = cov(roclist,use="pairwise.complete.obs")

	View(mean.return)
	View(covariance.matrix)


	#---- Here is now to create a portfolio by specifying the weight for each stock ----

	# The example here would be an equal-weighted portfolio of every stock we managed to download
	# sum(portfolio) should alwayss be 1

	portfolio = rep(1/ncol(roclist),ncol(roclist))
	names(portfolio) = names(roclist)


	#
	#---- Here is how to compute an approximate yearly return and volatility of the portfolio, assuming 225 working days in year ----
	#
	return.of.portfolio = drop(t(portfolio) %% mean.return) 225
	volatility.of.portfolio = sqrt(drop(t(portfolio) %% covariance.matrix %% portfolio)) * sqrt(225)


	#
	#---- This is a very simple Sharpe Ratio optimisation example ----
	#
	# Sharpe Ratio = (u-r)/s where r is the minimum acceptable return (per annum)
	# Default r=0.25 represents a return of 25% p.a.
	#
	# Note that for readability this function has been written to work with yearly return and volatility data.
	# For better performance, there is no need to multiply by all these constants during the optimization.
	# The matrix multiplications can be be done using 'crossprod' for a tiny performance boost.
	#
	# Also note that we do not supply a 'gradient function' to 'optim' for this demo.
	# If you are trying to optimise using maybe 1000 stocks it is worth creating the equivalent gradient function
	# and supplying it to 'optim'.
	#
	# The projection is applied inside the SR function because this is the easiest way to explain
	# the simplex bounds constraint to the 'optim' function.
	#
	# Note that during the optimisation we do not need to recompute the mean.return and covariance.matrix
	# arrays. They are computed once. Only the array of proportions in 'portfolio' needs to change.
	#

	Sharpe.Ratio = function(portfolio,mean.return,covariance.matrix,r=0.25){

	portfolio = portfolio/sum(portfolio) # project onto the unit simplex given by sum(portfolio) = 1

	u = drop(t(portfolio) %% mean.return) 225
	s = sqrt(drop(t(portfolio) %% covariance.matrix %% portfolio)) * sqrt(225)

	sr = (u-r)/s
	}


	#
	# The 'optim' function will find the arguments (par) to a function (fn) which can maximise it (for fnscale=-1)
	#

	o = optim(
	par = rep(1/ncol(roclist),ncol(roclist)), # initial portfolio - we use equal weight here
	fn = Sharpe.Ratio, # function to optimise
	gr = NULL, # gradient of function. Not supplied in this demo.
	mean.return=mean.return, covariance.matrix=covariance.matrix, r=0.25, # the other arguments to 'fn'
	control = list(trace=1,REPORT=10,fnscale=-1,maxit=100,factr=1e7), # see documentation for info about control parameters
	lower = 0, upper = 1.05, # upper and lower bounds for the parameters
	method = "L-BFGS-B" # This method is a very fast quasi-Newton optimisation
	)

	if(o$convergence != 0) warning('Optimisation failed to converge')

	#
	# Let's have a look at the portfolio it has found
	#
	max.portfolio = o$par/sum(o$par)
	names(max.portfolio) = names(roclist)

	print(round(max.portfolio[max.portfolio>0],2))