jaymon0703/random_strategy_builder.R

## random_strategy_builder.R
require(blotter)

# Remove portfolio and account data if run previously
try(rm("portfolio.txnsim_rnorm_port","account.txnsim_rnorm_acct",pos=.blotter), silent = TRUE)

# load the example data
currency("USD")
stock("GSPC",currency="USD",multiplier=1)
getSymbols('^GSPC', src='yahoo', index.class=c("POSIXt","POSIXct"),from='1998-01-01')

# Initialize the Portfolio
initPortf("txnsim_rnorm_port",symbols="GSPC",initDate="1998-01-02")
initAcct("txnsim_rnorm_acct",portfolios="txnsim_rnorm_port",initDate="1998-01-02", initEq=10000)

# Store strategy calendar duration
calendardur <- nrow(GSPC)
calendardur
targetdur <- calendardur # for now...TODO - add levels which will possibly take targetdur over calendardur
# Randomise time in market
# First flat periods (qty = 0, so no need to randomise these)
flatdur <- round(runif(1, 0, calendardur*0.3),0)
flatdur
flatdur_mean <- 90 # choose a flat duration mean that makes sense to your random use case
flatdur_stddev <- 20 # choose a flat duration std dev that makes sense to your random use case
flatrows <- round(flatdur/rnorm(n = 1, mean = flatdur_mean, sd = flatdur_stddev))
flatdur_vec <- round(rnorm(n = flatrows, mean = flatdur_mean, sd = flatdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
flatdur_vec
sum(flatdur_vec)
avgdur <- sum(flatdur_vec)/flatrows

flatqty <- replicate(n = flatrows, 0) # no need to randomise qty

# Now long periods (adding randomisation for position sizes)
lsratio <- 0.5
longdur <- round((targetdur - flatdur) * lsratio,0)
longdur
longdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
longdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
longrows <- round(longdur/rnorm(n = 1, mean = longdur_mean, sd = longdur_stddev))
longdur_vec <- round(rnorm(n = longrows, mean = longdur_mean, sd = longdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
longdur_vec
sum(longdur_vec)
avgdur <- sum(longdur_vec)/longrows

longqty <- round(rnorm(n = longrows, mean = 100, sd = 10), 0)
longqty

# Now short periods (adding randomisation for position sizes)
lsratio <- lsratio
shortdur <- targetdur - flatdur - longdur
shortdur
shortdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
shortdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
shortrows <- round(shortdur/rnorm(n = 1, mean = shortdur_mean, sd = shortdur_stddev))
shortdur_vec <- round(rnorm(n = shortrows, mean = shortdur_mean, sd = shortdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
shortdur_vec
sum(shortdur_vec)
avgdur <- sum(shortdur_vec)/shortrows

shortqty <- round(rnorm(n = shortrows, mean = -100, sd = 10), 0)
shortqty

# TODO: randomise levels

subsample <- function(svector, targetdur, replacement=TRUE, ...,duration, qty) {
  #`trades` already exists in function scope

  dur <- 0 # initialize duration counter
  tdf <- data.frame() #initialize output data.frame
  nsamples <- round(length(svector) * fudgefactor, 0)
  while (dur < targetdur) {
    s <- sample(svector, nsamples, replace = replacement)
    # sdf <- data.frame(duration = trades[s,'duration'],
    #                   quantity = trades[s,'quantity'])
    sdf <- data.frame(duration = duration[s],
                      quantity = qty[s])
    if (is.null(tdf$duration)) {
      tdf <- sdf
    } else {
      tdf <- rbind(tdf, sdf)
    }
    dur <- sum(tdf$duration)
    nsamples <- round(((targetdur - dur) / avgdur) * fudgefactor, 0)
    nsamples <- ifelse(nsamples == 0, 1, nsamples)
    # print(nsamples) # for debugging
    dur
  }
  # could truncate data frame here to correct total duration
  # the row which takes our duration over the target
  xsrow <- last(which(cumsum(as.numeric(tdf$duration)) < (targetdur))) + 1
  if (xsrow == nrow(tdf)) {
    # the last row sampled takes us over targetdur
    adjxsrow <- sum(tdf$duration) - targetdur
    tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
  } else if (xsrow < nrow(tdf)) {
    # the last iteration of the while loop added more than one row
    # which took our duration over the target
    tdf <- tdf[-seq.int(xsrow + 1, nrow(tdf), 1), ]
    adjxsrow <- sum(tdf$duration) - targetdur
    tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
  }

  tdf  # return target data frame
} # end subsample

#sample long, short, flat periods
if(flatdur > 0){
  a <- flatdur/sum(flatdur_vec)
  fudgefactor <- ceiling(a * 100) / 100
  flatdf  <- subsample(svector = flatrows, targetdur = flatdur, duration = flatdur_vec, qty = flatqty)
} else {
  flatdf <- NULL
}
if(longdur > 0){ # ie. there are long round turn trades in the strategy
  a <- longdur/sum(longdur_vec)
  fudgefactor <- ceiling(a * 100) / 100
  longdf  <- subsample(svector = longrows, targetdur = longdur, duration = longdur_vec, qty = longqty)
} else {
  longdf <- NULL
}
if(shortdur > 0){ # ie. there are short round turn trades in the strategy
  a <- shortdur/sum(shortdur_vec)
  fudgefactor <- ceiling(a * 100) / 100
  shortdf <- subsample(svector = shortrows, targetdur = shortdur, duration = shortdur_vec, qty = shortqty)
} else {
  shortdf <- NULL
}
# Lines 129-162 are directly from txnsim...as we are not levelling for now, there is no need to concern ourselves with layers
# #browser()
# # make the first layer
# # 1. start with flat periods
# firstlayer <- flatdf
# # 2. segment trades for first layer
# targetlongdur <- structure(round((calendardur-flatdur)*lsratio),units='secs',class='difftime')
# if(!is.null(longdf)){ # ie. there are long round turn trades in the strategy
#   targetlongrow <- last(which(cumsum(as.numeric(longdf$duration))<targetlongdur))
#   firstlayer    <- rbind(firstlayer,longdf[1:targetlongrow,])
# } else {
#   targetlongrow <- 0
# }
# # firstlayer    <- rbind(firstlayer,longdf[1:targetlongrow,])
# if(!is.null(shortdf)){ # ie. there are short round turn trades in the strategy
#   targetshortrow <- last( which( cumsum(as.numeric(shortdf$duration))<(calendardur-sum(firstlayer$duration)) ) )
#   firstlayer     <- rbind(firstlayer,shortdf[1:targetshortrow,])
# } else {
#   targetshortrow <- 0
# }
# firstlayer    <- firstlayer[sample(nrow(firstlayer),replace=FALSE),]
# # firstlayer should be just slightly longer than calendardur, we'll truncate later
#
# tdf <- firstlayer # establish target data.frame
#
# # build a vector of start times
# start <- first(trades$start) + cumsum(as.numeric(tdf$duration))
# # add the first start time back in
# start <- c(first(trades$start), start)
# # take off the last end time, since we won't put in a closing trade
# start <- start[-length(start)]
# # add start column to tdf
# tdf$start <- start
# # rearrange columns for consistency
# tdf <- tdf[, c("start", "duration", "quantity")]

# Since we now have flatdf, longdf and shortdf we are in a position to combine the dataframes
# to make a single dataframe of durations and quantities for our random strategy which we will
# add transaction based on. To start, we will randomly select a flat pertiod and then sample
# the remaining rows. This is to mirror production backtests which generally require lead time
# before rules are activated and positions opened.
startrow <- sample(flatrows, size = 1)
# idx <- c(startrow, as.numeric(row.names((flatdf[-startrow,]))))
startdf <- flatdf[startrow,]
flatdf <- flatdf[-startrow,]
mergedf <- rbind(flatdf, longdf, shortdf)
mergedf <- mergedf[order(sample(nrow(mergedf), size = nrow(mergedf))),]
mergedf <- rbind(startdf, mergedf)
mergedf

# Add transactions
# First get prices
dargs <- list()
if (!is.null(dargs$env)) {
  env <- dargs$env
} else {
  env <- .GlobalEnv
}
if (!is.null(dargs$prefer)) {
  prefer <- dargs$prefer
} else {
  prefer <- NULL
}
# prices <- get('GSPC')
prices <- getPrice(get("GSPC", pos = env), prefer = prefer)[, 1]
txns <- list()

dur_cumsum <- cumsum(mergedf$duration)
# Now for tranactions
for (r in 1:nrow(mergedf)) {
  # opening trade
  open  <- data.frame(
    if(r == 1) {
      start = index(prices[dur_cumsum[r] - mergedf[r,1] + 1])
      } else {
        start = index(prices[dur_cumsum[r] - mergedf[r,1]])
        },
    TxnQty = mergedf[r, "quantity"],
    TxnPrice = as.numeric(prices[start])
    )
    colnames(open) <- c("start","TxnQty","TxnPrice")
  # closing trade
  close <-
    data.frame(
      start = index(prices[dur_cumsum[r]]),
      TxnQty = -1 * mergedf[r, "quantity"],
      TxnPrice = as.numeric(prices[dur_cumsum[r]])
    )
  txns[[r]] <- rbind(open, close)
} # end loop over rows
txns <- do.call(rbind, txns)
txns <- xts(txns[, c("TxnQty", "TxnPrice")], order.by = txns[, 1])
txns <- txns[which(txns$TxnQty != 0), ]
txns

# portname <- "random_trader"
symbol <- "GSPC"
# initPortf(portname, "GSPC")
addTxns(Portfolio = "txnsim_rnorm_port",
        Symbol = symbol,
        TxnData = txns)
updatePortf(Portfolio = "txnsim_rnorm_port")
chart.Posn("random_trader", "GSPC")

ex.txnsim <- function(Portfolio
                      ,n=10
                      ,replacement=FALSE
                      , tradeDef='increased.to.reduced'
                      , chart=FALSE
)
{
  out <- txnsim(Portfolio,n,replacement, tradeDef = tradeDef)
  if(isTRUE(chart)) {
    portnames <- blotter:::txnsim.portnames(Portfolio, replacement, n)
    for (i in 1:n){
      p<- portnames[i]
      symbols<-names(getPortfolio(p)$symbols)
      for(symbol in symbols) {
        dev.new()
        chart.Posn(p,symbol)
      }
    }
  }
  invisible(out)
} # end ex.txnsim

rnorm.wr <- ex.txnsim('txnsim_rnorm_port',1000, replacement = TRUE, chart = FALSE)
plot(rnorm.wr)
rnorm.wr$pvalues
hist(rnorm.wr)
	require(blotter)

	# Remove portfolio and account data if run previously
	try(rm("portfolio.txnsim_rnorm_port","account.txnsim_rnorm_acct",pos=.blotter), silent = TRUE)

	# load the example data
	currency("USD")
	stock("GSPC",currency="USD",multiplier=1)
	getSymbols('^GSPC', src='yahoo', index.class=c("POSIXt","POSIXct"),from='1998-01-01')

	# Initialize the Portfolio
	initPortf("txnsim_rnorm_port",symbols="GSPC",initDate="1998-01-02")
	initAcct("txnsim_rnorm_acct",portfolios="txnsim_rnorm_port",initDate="1998-01-02", initEq=10000)

	# Store strategy calendar duration
	calendardur <- nrow(GSPC)
	calendardur
	targetdur <- calendardur # for now...TODO - add levels which will possibly take targetdur over calendardur
	# Randomise time in market
	# First flat periods (qty = 0, so no need to randomise these)
	flatdur <- round(runif(1, 0, calendardur*0.3),0)
	flatdur
	flatdur_mean <- 90 # choose a flat duration mean that makes sense to your random use case
	flatdur_stddev <- 20 # choose a flat duration std dev that makes sense to your random use case
	flatrows <- round(flatdur/rnorm(n = 1, mean = flatdur_mean, sd = flatdur_stddev))
	flatdur_vec <- round(rnorm(n = flatrows, mean = flatdur_mean, sd = flatdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
	flatdur_vec
	sum(flatdur_vec)
	avgdur <- sum(flatdur_vec)/flatrows

	flatqty <- replicate(n = flatrows, 0) # no need to randomise qty

	# Now long periods (adding randomisation for position sizes)
	lsratio <- 0.5
	longdur <- round((targetdur - flatdur) * lsratio,0)
	longdur
	longdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
	longdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
	longrows <- round(longdur/rnorm(n = 1, mean = longdur_mean, sd = longdur_stddev))
	longdur_vec <- round(rnorm(n = longrows, mean = longdur_mean, sd = longdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
	longdur_vec
	sum(longdur_vec)
	avgdur <- sum(longdur_vec)/longrows

	longqty <- round(rnorm(n = longrows, mean = 100, sd = 10), 0)
	longqty

	# Now short periods (adding randomisation for position sizes)
	lsratio <- lsratio
	shortdur <- targetdur - flatdur - longdur
	shortdur
	shortdur_mean <- 45 # choose a flat duration mean that makes sense to your random use case
	shortdur_stddev <- 15 # choose a flat duration std dev that makes sense to your random use case
	shortrows <- round(shortdur/rnorm(n = 1, mean = shortdur_mean, sd = shortdur_stddev))
	shortdur_vec <- round(rnorm(n = shortrows, mean = shortdur_mean, sd = shortdur_stddev),0) # compute vector of elements for flatdur, using fudgefactor to ensure extends at least beyond flatdur
	shortdur_vec
	sum(shortdur_vec)
	avgdur <- sum(shortdur_vec)/shortrows

	shortqty <- round(rnorm(n = shortrows, mean = -100, sd = 10), 0)
	shortqty

	# TODO: randomise levels

	subsample <- function(svector, targetdur, replacement=TRUE, ...,duration, qty) {
	#`trades` already exists in function scope

	dur <- 0 # initialize duration counter
	tdf <- data.frame() #initialize output data.frame
	nsamples <- round(length(svector) * fudgefactor, 0)
	while (dur < targetdur) {
	s <- sample(svector, nsamples, replace = replacement)
	# sdf <- data.frame(duration = trades[s,'duration'],
	# quantity = trades[s,'quantity'])
	sdf <- data.frame(duration = duration[s],
	quantity = qty[s])
	if (is.null(tdf$duration)) {
	tdf <- sdf
	} else {
	tdf <- rbind(tdf, sdf)
	}
	dur <- sum(tdf$duration)
	nsamples <- round(((targetdur - dur) / avgdur) * fudgefactor, 0)
	nsamples <- ifelse(nsamples == 0, 1, nsamples)
	# print(nsamples) # for debugging
	dur
	}
	# could truncate data frame here to correct total duration
	# the row which takes our duration over the target
	xsrow <- last(which(cumsum(as.numeric(tdf$duration)) < (targetdur))) + 1
	if (xsrow == nrow(tdf)) {
	# the last row sampled takes us over targetdur
	adjxsrow <- sum(tdf$duration) - targetdur
	tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
	} else if (xsrow < nrow(tdf)) {
	# the last iteration of the while loop added more than one row
	# which took our duration over the target
	tdf <- tdf[-seq.int(xsrow + 1, nrow(tdf), 1), ]
	adjxsrow <- sum(tdf$duration) - targetdur
	tdf$duration[xsrow] <- tdf$duration[xsrow] - adjxsrow
	}

	tdf # return target data frame
	} # end subsample

	#sample long, short, flat periods
	if(flatdur > 0){
	a <- flatdur/sum(flatdur_vec)
	fudgefactor <- ceiling(a * 100) / 100
	flatdf <- subsample(svector = flatrows, targetdur = flatdur, duration = flatdur_vec, qty = flatqty)
	} else {
	flatdf <- NULL
	}
	if(longdur > 0){ # ie. there are long round turn trades in the strategy
	a <- longdur/sum(longdur_vec)
	fudgefactor <- ceiling(a * 100) / 100
	longdf <- subsample(svector = longrows, targetdur = longdur, duration = longdur_vec, qty = longqty)
	} else {
	longdf <- NULL
	}
	if(shortdur > 0){ # ie. there are short round turn trades in the strategy
	a <- shortdur/sum(shortdur_vec)
	fudgefactor <- ceiling(a * 100) / 100
	shortdf <- subsample(svector = shortrows, targetdur = shortdur, duration = shortdur_vec, qty = shortqty)
	} else {
	shortdf <- NULL
	}
	# Lines 129-162 are directly from txnsim...as we are not levelling for now, there is no need to concern ourselves with layers
	# #browser()
	# # make the first layer
	# # 1. start with flat periods
	# firstlayer <- flatdf
	# # 2. segment trades for first layer
	# targetlongdur <- structure(round((calendardur-flatdur)*lsratio),units='secs',class='difftime')
	# if(!is.null(longdf)){ # ie. there are long round turn trades in the strategy
	# targetlongrow <- last(which(cumsum(as.numeric(longdf$duration))<targetlongdur))
	# firstlayer <- rbind(firstlayer,longdf[1:targetlongrow,])
	# } else {
	# targetlongrow <- 0
	# }
	# # firstlayer <- rbind(firstlayer,longdf[1:targetlongrow,])
	# if(!is.null(shortdf)){ # ie. there are short round turn trades in the strategy
	# targetshortrow <- last( which( cumsum(as.numeric(shortdf$duration))<(calendardur-sum(firstlayer$duration)) ) )
	# firstlayer <- rbind(firstlayer,shortdf[1:targetshortrow,])
	# } else {
	# targetshortrow <- 0
	# }
	# firstlayer <- firstlayer[sample(nrow(firstlayer),replace=FALSE),]
	# # firstlayer should be just slightly longer than calendardur, we'll truncate later
	#
	# tdf <- firstlayer # establish target data.frame
	#
	# # build a vector of start times
	# start <- first(trades$start) + cumsum(as.numeric(tdf$duration))
	# # add the first start time back in
	# start <- c(first(trades$start), start)
	# # take off the last end time, since we won't put in a closing trade
	# start <- start[-length(start)]
	# # add start column to tdf
	# tdf$start <- start
	# # rearrange columns for consistency
	# tdf <- tdf[, c("start", "duration", "quantity")]

	# Since we now have flatdf, longdf and shortdf we are in a position to combine the dataframes
	# to make a single dataframe of durations and quantities for our random strategy which we will
	# add transaction based on. To start, we will randomly select a flat pertiod and then sample
	# the remaining rows. This is to mirror production backtests which generally require lead time
	# before rules are activated and positions opened.
	startrow <- sample(flatrows, size = 1)
	# idx <- c(startrow, as.numeric(row.names((flatdf[-startrow,]))))
	startdf <- flatdf[startrow,]
	flatdf <- flatdf[-startrow,]
	mergedf <- rbind(flatdf, longdf, shortdf)
	mergedf <- mergedf[order(sample(nrow(mergedf), size = nrow(mergedf))),]
	mergedf <- rbind(startdf, mergedf)
	mergedf

	# Add transactions
	# First get prices
	dargs <- list()
	if (!is.null(dargs$env)) {
	env <- dargs$env
	} else {
	env <- .GlobalEnv
	}
	if (!is.null(dargs$prefer)) {
	prefer <- dargs$prefer
	} else {
	prefer <- NULL
	}
	# prices <- get('GSPC')
	prices <- getPrice(get("GSPC", pos = env), prefer = prefer)[, 1]
	txns <- list()

	dur_cumsum <- cumsum(mergedf$duration)
	# Now for tranactions
	for (r in 1:nrow(mergedf)) {
	# opening trade
	open <- data.frame(
	if(r == 1) {
	start = index(prices[dur_cumsum[r] - mergedf[r,1] + 1])
	} else {
	start = index(prices[dur_cumsum[r] - mergedf[r,1]])
	},
	TxnQty = mergedf[r, "quantity"],
	TxnPrice = as.numeric(prices[start])
	)
	colnames(open) <- c("start","TxnQty","TxnPrice")
	# closing trade
	close <-
	data.frame(
	start = index(prices[dur_cumsum[r]]),
	TxnQty = -1 * mergedf[r, "quantity"],
	TxnPrice = as.numeric(prices[dur_cumsum[r]])
	)
	txns[[r]] <- rbind(open, close)
	} # end loop over rows
	txns <- do.call(rbind, txns)
	txns <- xts(txns[, c("TxnQty", "TxnPrice")], order.by = txns[, 1])
	txns <- txns[which(txns$TxnQty != 0), ]
	txns

	# portname <- "random_trader"
	symbol <- "GSPC"
	# initPortf(portname, "GSPC")
	addTxns(Portfolio = "txnsim_rnorm_port",
	Symbol = symbol,
	TxnData = txns)
	updatePortf(Portfolio = "txnsim_rnorm_port")
	chart.Posn("random_trader", "GSPC")

	ex.txnsim <- function(Portfolio
	,n=10
	,replacement=FALSE
	, tradeDef='increased.to.reduced'
	, chart=FALSE
	)
	{
	out <- txnsim(Portfolio,n,replacement, tradeDef = tradeDef)
	if(isTRUE(chart)) {
	portnames <- blotter:::txnsim.portnames(Portfolio, replacement, n)
	for (i in 1:n){
	p<- portnames[i]
	symbols<-names(getPortfolio(p)$symbols)
	for(symbol in symbols) {
	dev.new()
	chart.Posn(p,symbol)
	}
	}
	}
	invisible(out)
	} # end ex.txnsim

	rnorm.wr <- ex.txnsim('txnsim_rnorm_port',1000, replacement = TRUE, chart = FALSE)
	plot(rnorm.wr)
	rnorm.wr$pvalues
	hist(rnorm.wr)