Skip to content

Instantly share code, notes, and snippets.

@flare9x
Created January 24, 2019 13:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save flare9x/d2ffa653a08bcdf795e46cb8cd20a2e9 to your computer and use it in GitHub Desktop.
Save flare9x/d2ffa653a08bcdf795e46cb8cd20a2e9 to your computer and use it in GitHub Desktop.
Back Test FDA Events
# Scrape Historical FDA events
# Clean Data - Extract Date from text
# Clean up Catalyst Column - remove date, white space
# Sort by ascending date order
# Note 2x non ticker symbols, these are removed
require(XML)
require(dplyr)
require(PerformanceAnalytics)
require(data.table)
require(xts)
require(RCurl)
require(dplyr)
require(lubridate)
# Import html data to data.frame
url <- "https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar"
urldata <- getURL(url)
fda.table <- as.data.frame(readHTMLTable(urldata, stringsAsFactors = FALSE))
# Rename Columns
colnames(fda.table)[1] <- "Ticker"
colnames(fda.table)[2] <- "Drug"
colnames(fda.table)[3] <- "Stage"
colnames(fda.table)[4] <- "Catalyst"
# Extrat Date from character text string
# Extracting first 10 elements as order of date positioning is consistent throughout the table
fda.table$Catalyst.Date <- substring(fda.table$Catalyst, 1, last = 10)
# Convert string Date to Date format
# Use lubridate package
fda.table$Catalyst.Date <- mdy(fda.table$Catalyst.Date)
# Sort data by date ascending order
# Use dplyr
fda.table <- arrange(fda.table, Catalyst.Date)
nrow(fda.table)
# Clean up text in Catalyst Column
# Remove white space and drop the date
#fda.table$Catalyst <- gsub("\\d+/\\d+/\\d+\\n\\s+(.+)$", "\\1", fda.table$Catalyst)
#\\W match a non-word character \\d+ match one or more numbers
#Put those two in brackets which means match either one of those.
#+ match either of the above pronouns more than once
#([a-zA-Z].*) match any letter (upper or lowercase), and then the remainder of the string (.* = 0 or more of any character) and capture it in a group.
#We return the group with \\1
fda.table$Catalyst <- gsub("^[\\W\\d+]+([a-zA-Z].*)","\\1",fda.table$Catalyst,perl=TRUE)
# Remove 2x non ticker symbols at bottom two rows of data frame
n <- dim(fda.table)[1]
fda.table <- fda.table[1:(n-2),]
# Subset List of Tickers
# Note used in function below
tickers <- fda.table$Ticker
#tickers <- unique(tickers) # Remove duplicate tickers
# Subset list of Dates
# Note used in function below
dates <- fda.table$Catalyst.Date
# Filter date frame to remove duplicates
#moo <- fda.table[!duplicated(fda.table$Ticker), ]
# Export symbol column as .txt file
#write.table(tickers,"D:/R Projects/biotech_catalyst.txt",append=TRUE,quote=FALSE,row.names=FALSE,col.names=FALSE)
# Back Test Script
# 10.19.2017
# Andrew Bannerman
require(lubridate)
require(dplyr)
require(magrittr)
require(TTR)
require(zoo)
require(data.table)
require(xts)
require(PerformanceAnalytics)
# Data Dir
input.data.dir <- "D:/R Projects/Biotech Catalyst Study/data"
png.output.dir <- "D:/R Projects/Biotech Catalyst Study/Biotech_catalyst_rsi2_chart_output"
row.names = tickers
#files = list.files(path = "C:/R Projects/Data/etf_list_nasdaq_dot_com/etf_13min", pattern = ".", full.names = TRUE)
# Initialize the output dataframe to which we will rowbind the results
cum_ret <- data.frame() # Cumualtive / Annualized Return
data_output_df <- data.frame() # Daily Differences, Running 5 day count until FDA date
# Back test function
genCHART = function(x){
next.png <- tickers[i]
date.list <- dates[i]
input.file <- paste0("D:/R Projects/Biotech Catalyst Study/data/",next.png,".csv")
next.file <- read.table(input.file,header=TRUE, sep=",", stringsAsFactors=FALSE)
new.df <- data.frame(next.file)
# For TS Data
#colnames(new.df)[1] <- "Date"
#colnames(new.df)[2] <- "Open"
#colnames(new.df)[3] <- "High"
#colnames(new.df)[4] <- "Low"
#colnames(new.df)[5] <- "Close"
# Convert Values To Numeric
cols <-c(3:8) # note TS is 2:5 and needs column names above
new.df[,cols] %<>% lapply(function(x) as.numeric(as.character(x)))
#Convert Date Column [1]
new.df$Date <- ymd(new.df$Date) #mdy for .txt, mdt)hms for TS intraday
# Merge Date and Time Column to column Date
# new.df$Date <- paste(new.df$Date, new.df$Time)
#new.df$Date <- as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M")
# Use TTR package to create rolling SMA n day moving average
# Create function and loop in order to repeat the desired number of SMAs for example 2:30
getSMA <- function(numdays) {
function(new.df) {
SMA(new.df[,"Close"], numdays) # Calls TTR package to create SMA
}
}
# Create a matrix to put the SMAs in
sma.matrix <- matrix(nrow=nrow(new.df), ncol=0)
# Loop for filling it
for (i in 2:3) {
sma.matrix <- cbind(sma.matrix, getSMA(i)(new.df))
}
# Rename columns
colnames(sma.matrix) <- sapply(2:3, function(n)paste("close.sma.n", n, sep=""))
# Bind to existing dataframe
new.df <- cbind(new.df, sma.matrix)
# Use TTR package to create rolling Standard Deviation
# Create function and loop in order to repeat the desired number of Stdev for example 2:30
getSD <- function(numdays) {
function(new.df) {
runSD(new.df$Close, numdays, cumulative = FALSE) # Calls TTR package to create SMA
}
}
# Create a matrix to put the SD's in
sd.matrix <- matrix(nrow=nrow(new.df), ncol=0)
# Loop for filling it
for (i in 2:3) {
sd.matrix <- cbind(sd.matrix, getSD(i)(new.df))
}
# Rename columns
colnames(sd.matrix) <- sapply(2:3, function(n)paste("close.sd.n", n, sep=""))
# Bind to existing dataframe
new.df <- cbind(new.df, sd.matrix)
# Use TTR package to create n day SMA RSI
getRSI.SMA <- function(numdays) {
function(new.df) {
RSI(new.df$Close, n=numdays, maType = SMA) # Calls TTR package to create RSI
}
}
# Create a matrix to put the RSI SMA's in
rsi.sma.matrix <- matrix(nrow=nrow(new.df), ncol=0)
# Loop for filling it
for (i in 2:3) {
rsi.sma.matrix <- cbind(rsi.sma.matrix, getRSI.SMA(i)(new.df))
}
# Rename columns
colnames(rsi.sma.matrix) <- sapply(2:3, function(n)paste("rsi.sma.n", n, sep=""))
# Bind to existing dataframe
new.df <- cbind(new.df, rsi.sma.matrix)
# Use TTR package to create n day EMA RSI
getRSI.EMA <- function(numdays) {
function(new.df) {
RSI(new.df$Close, n=numdays, maType = EMA) # Calls TTR package to create RSI
}
}
# Create a matrix to put the RSI EMAs's in
rsi.ema.matrix <- matrix(nrow=nrow(new.df), ncol=0)
# Loop for filling it
for (i in 2:3) {
rsi.ema.matrix <- cbind(rsi.ema.matrix, getRSI.EMA(i)(new.df))
}
# Rename columns
colnames(rsi.ema.matrix) <- sapply(2:3, function(n)paste("rsi.ema.n", n, sep=""))
# Bind to existing dataframe
new.df <- cbind(new.df, rsi.ema.matrix)
# Use base R to work out the rolling z-score (Close - roll mean) / stdev
#new.df$close.zscore.n2 <- apply(new.df[,c('Close','close.sma.n2', 'close.sd.n2')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n3 <- apply(new.df[,c('Close','close.sma.n3', 'close.sd.n3')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n4 <- apply(new.df[,c('Close','close.sma.n4', 'close.sd.n4')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n5 <- apply(new.df[,c('Close','close.sma.n5', 'close.sd.n5')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n6 <- apply(new.df[,c('Close','close.sma.n6', 'close.sd.n6')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n7 <- apply(new.df[,c('Close','close.sma.n7', 'close.sd.n7')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n8 <- apply(new.df[,c('Close','close.sma.n8', 'close.sd.n8')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n9 <- apply(new.df[,c('Close','close.sma.n9', 'close.sd.n9')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n10 <- apply(new.df[,c('Close','close.sma.n10', 'close.sd.n10')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n11 <- apply(new.df[,c('Close','close.sma.n11', 'close.sd.n11')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n12 <- apply(new.df[,c('Close','close.sma.n12', 'close.sd.n12')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n13 <- apply(new.df[,c('Close','close.sma.n13', 'close.sd.n13')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n14 <- apply(new.df[,c('Close','close.sma.n14', 'close.sd.n14')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n15 <- apply(new.df[,c('Close','close.sma.n15', 'close.sd.n15')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n16 <- apply(new.df[,c('Close','close.sma.n16', 'close.sd.n16')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n17 <- apply(new.df[,c('Close','close.sma.n17', 'close.sd.n17')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n18 <- apply(new.df[,c('Close','close.sma.n18', 'close.sd.n18')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n19 <- apply(new.df[,c('Close','close.sma.n19', 'close.sd.n19')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n20 <- apply(new.df[,c('Close','close.sma.n20', 'close.sd.n20')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n21 <- apply(new.df[,c('Close','close.sma.n21', 'close.sd.n21')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n22 <- apply(new.df[,c('Close','close.sma.n22', 'close.sd.n22')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n23 <- apply(new.df[,c('Close','close.sma.n23', 'close.sd.n23')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n24 <- apply(new.df[,c('Close','close.sma.n24', 'close.sd.n24')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n25 <- apply(new.df[,c('Close','close.sma.n25', 'close.sd.n25')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n26 <- apply(new.df[,c('Close','close.sma.n26', 'close.sd.n26')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n27 <- apply(new.df[,c('Close','close.sma.n27', 'close.sd.n27')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n28 <- apply(new.df[,c('Close','close.sma.n28', 'close.sd.n28')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n29 <- apply(new.df[,c('Close','close.sma.n29', 'close.sd.n29')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n30 <- apply(new.df[,c('Close','close.sma.n30', 'close.sd.n30')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n31 <- apply(new.df[,c('Close','close.sma.n31', 'close.sd.n31')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n32 <- apply(new.df[,c('Close','close.sma.n32', 'close.sd.n32')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n33 <- apply(new.df[,c('Close','close.sma.n33', 'close.sd.n33')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n34 <- apply(new.df[,c('Close','close.sma.n34', 'close.sd.n34')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n35 <- apply(new.df[,c('Close','close.sma.n35', 'close.sd.n35')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n36 <- apply(new.df[,c('Close','close.sma.n36', 'close.sd.n36')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n37 <- apply(new.df[,c('Close','close.sma.n37', 'close.sd.n37')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n38 <- apply(new.df[,c('Close','close.sma.n38', 'close.sd.n38')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n39 <- apply(new.df[,c('Close','close.sma.n39', 'close.sd.n39')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n40 <- apply(new.df[,c('Close','close.sma.n40', 'close.sd.n40')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n41 <- apply(new.df[,c('Close','close.sma.n41', 'close.sd.n41')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n42 <- apply(new.df[,c('Close','close.sma.n42', 'close.sd.n42')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n43 <- apply(new.df[,c('Close','close.sma.n43', 'close.sd.n43')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n44 <- apply(new.df[,c('Close','close.sma.n44', 'close.sd.n44')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n45 <- apply(new.df[,c('Close','close.sma.n45', 'close.sd.n45')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n46 <- apply(new.df[,c('Close','close.sma.n46', 'close.sd.n46')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n47 <- apply(new.df[,c('Close','close.sma.n47', 'close.sd.n47')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n48 <- apply(new.df[,c('Close','close.sma.n48', 'close.sd.n48')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n49 <- apply(new.df[,c('Close','close.sma.n49', 'close.sd.n49')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n50 <- apply(new.df[,c('Close','close.sma.n50', 'close.sd.n50')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n51 <- apply(new.df[,c('Close','close.sma.n51', 'close.sd.n51')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n52 <- apply(new.df[,c('Close','close.sma.n52', 'close.sd.n52')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n53 <- apply(new.df[,c('Close','close.sma.n53', 'close.sd.n53')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n54 <- apply(new.df[,c('Close','close.sma.n54', 'close.sd.n54')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n55 <- apply(new.df[,c('Close','close.sma.n55', 'close.sd.n55')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n56 <- apply(new.df[,c('Close','close.sma.n56', 'close.sd.n56')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n57 <- apply(new.df[,c('Close','close.sma.n57', 'close.sd.n57')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n58 <- apply(new.df[,c('Close','close.sma.n58', 'close.sd.n58')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n59 <- apply(new.df[,c('Close','close.sma.n59', 'close.sd.n59')], 1, function(x) { (x[1]-x[2])/x[3] } )
#new.df$close.zscore.n60 <- apply(new.df[,c('Close','close.sma.n60', 'close.sd.n60')], 1, function(x) { (x[1]-x[2])/x[3] } )
# Use TTR package to create rolling MACD
fast <- 3 #3
slow <- 40 #40
signal <- 5 # 5 tests good
new.df$ema.fast <- EMA(new.df[,"Close"], fast)
new.df$ema.slow <- EMA(new.df[,"Close"], slow)
new.df$macd <- apply(new.df[,c('ema.fast','ema.slow')], 1, function(x) { (x[1]-x[2])} )
new.df$signal <- EMA(new.df[,"macd"], signal)
# Calculate quartiles, where close is relation to range (Close - High) / (High - Low)
#new.df$quartile <- apply(new.df[,c('Close', 'Low', 'High')], 1, function(x) { (x[1]-x[2])/(x[3]-x[2])} )
# Compute daily price differences
# We replicate NA 1 time in order to maintain correct positioning of differences
# Within the data frame
new.df$close.diff <- c(rep(NA, 1), diff(new.df$Close, lag = 1, differences = 1, arithmetic = TRUE, na.pad = TRUE))
# Convert all NA to 0
new.df[is.na(new.df)] <- 0
# Calculate Returns from open to close
new.df$ocret <- apply(new.df[,c('Open', 'Close')], 1, function(x) { (x[2]-x[1])/x[1]} )
# Calculate Close-to-Close returns
new.df$clret <- ROC(new.df$Close, type = c("discrete"))
new.df$clret[1] <- 0
# Subset Date
# Pick date of event subtract 6 months
start.date <- date.list - 6*30
#date.list <- date.list + 3*30
new.df <- subset(new.df, Date >= as.Date(start.date) & Date <= as.Date(date.list))
# Name indicators #
#train.indicator <- train.set$close.zscore.n10
#test.indicator <- test.set$close.zscore.n10
macd <- new.df$macd
signal <- new.df$signal
rsi.sma <- new.df$rsi.sma.n2
rsi.ema <- new.df$rsi.ema.n2
# Enter buy / sell rules
#new.df$enter.momo <- ifelse(macd > signal, 1,0)
#new.df$exit.momo <- ifelse(macd < signal, 1,0)
#new.df$enter.mean.rev <- ifelse(macd < signal, 1,0)
#new.df$exit.mean.rev <- ifelse(macd > signal, 1,0)
new.df$enter.momo <- ifelse(rsi.sma > 70, 1,0)
new.df$exit.momo <- ifelse(rsi.sma < 85, 1,0)
new.df$enter.mean.rev <- ifelse(rsi.sma < 20, 1,0)
new.df$exit.mean.rev <- ifelse(rsi.sma > 70, 1,0)
#plot(new.df$rsi.sma.n2,type="l")
# Momo
new.df <- new.df %>%
dplyr::mutate(sig.momo = ifelse(enter.momo == 1, 1,
ifelse(exit.momo ==1, 0, 0)))
# Mean Rev
new.df <- new.df %>%
dplyr::mutate(sig.mean.rev = ifelse(enter.mean.rev == 1, 1,
ifelse(exit.mean.rev == 1, 0, 0)))
# lag signal by one forward day to signal entry next day
new.df$sig.momo <- lag(new.df$sig.momo,1) # Note k=1 implies a move *forward*
new.df$sig.mean.rev <- lag(new.df$sig.mean.rev,1) # Note k=1 implies a move *forward*
new.df[is.na(new.df)] <- 0 # Set NA to 0
# Calculate equity curves
# Momentum
new.df <- new.df %>%
dplyr::mutate(RunID = rleid(sig.momo)) %>%
group_by(RunID) %>%
dplyr::mutate(equity.curve.momo = ifelse(sig.momo == 0, 0,
ifelse(row_number() == 1, ocret, clret))) %>%
ungroup() %>%
select(-RunID)
# Mean Rev
new.df <- new.df %>%
dplyr::mutate(RunID = rleid(sig.mean.rev)) %>%
group_by(RunID) %>%
dplyr::mutate(equity.curve.mean.rev = ifelse(sig.mean.rev == 0, 0,
ifelse(row_number() == 1, ocret, clret))) %>%
ungroup() %>%
select(-RunID)
# Pull select columns from data frame to make XTS whilst retaining formats
xts1 = xts(new.df$equity.curve.momo, order.by=as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M"))
xts2 = xts(new.df$equity.curve.mean.rev, order.by=as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M"))
xts3 = xts(new.df$clret, order.by=as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M"))
# Join XTS together
compare <- cbind(xts1,xts2,xts3)
# Use the PerformanceAnalytics package for trade statistics
colnames(compare) <- c("Momentum","Mean Reversion","Buy And Hold")
charts.PerformanceSummary(compare,main="Cumulative Returns", wealth.index=TRUE, colorset=rainbow12equal)
#performance.table <- rbind(maxDrawdown(compare), CalmarRatio(compare),table.DownsideRisk(compare))
#drawdown.table <- rbind(table.Drawdowns(compare))
#table.AnnualizedReturns(compare)
#dev.off()
#logRets <- log(cumprod(1+compare))
#chart.TimeSeries(logRets, legend.loc='topleft', colorset=rainbow12equal)
# Place time in a vector for .png file name differentiation
time <- as.character(Sys.time())
time <- gsub("[^[:alnum:]]", "", time)
#print(performance.table)
#print(drawdown.table)
# Save plot out put to folder
mypath <- file.path(png.output.dir,paste(time,next.png[1],".png"))
# mypath <- file.path(png.output.dir,paste(next.png[1], ".png", sep = ""))
png(file=mypath,width= 1400, height=1000)
mytitle = paste0(next.png[1])
#png(file=sprintf("Rplot_%s_%s",mypath,".png", format(Sys.time(), '%d%m%Y_%H_%M'), '%03d'))
#charts.PerformanceSummary(compare,main=mytitle,wealth.index=TRUE,colorset=rainbow12equal)
chart.CumReturns(compare,main=mytitle,wealth.index=TRUE,colorset=rainbow12equal)
dev.off()
#png(filename = "C:/R Projects/moo.png") #width= 1000, height=800)
#chart.CumReturns(compare,wealth.index=TRUE,colorset=rainbow12equal)
#chart.Drawdown(compare,geometric =TRUE,colorset=rainbow12equal)
#dev.off()
#charts.PerformanceSummary(compare,main="Cumulative Returns"=TRUE,colorset=rainbow12equal)
# Prepare Data For Cumulative and compound returns
# Calculate annualized and cumulatuve return
# Make prices vector
# Momoentum
momo.prices <- new.df[,"equity.curve.momo", drop = FALSE]
colnames(new.df)
# Denote n the number of time periods:
#n <- nrow(momo.prices)
# Calculate close to close returns
# lead in with rep,NA,1 to maintain length of vector comparible to data frame
#momo.close_ret <- c(rep(NA, 1),(momo.prices[2:n, 1] - momo.prices[1:(n-1), 1])/momo.prices[1:(n-1), 1])
#close_ret[1] <- 0
# Compute continuously returns (log returns)
#close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1])
# Compute gross returns
momo.close_gret <- 1 + momo.prices # use close to close ret
# Compute future values
momo.close_fv <- cumprod(momo.close_gret)
# Obtain first and last values
momo.ret.last <- tail(momo.close_fv, n=1)
momo.ret.first <- head(momo.close_fv, n=1)
momo.cum.ret <- (momo.ret.last-momo.ret.first)/momo.ret.first
# Get First And Last row to calculate time between
momo.ret.first.row <- head(new.df$Date, n=1)
momo.ret.last.row <- tail(new.df$Date, n=1)
# Time diff
#trading.years.between <- as.numeric(difftime(as.Date(ret.last.row), as.Date(ret.first.row), unit="weeks"))/52.25
# Find time diff
momo.ret.time <- momo.ret.last.row - momo.ret.first.row
momo.ret.trading.years.between <- momo.ret.time/365 #252 trading days or 365
momo.ret.trading.years.between <- as.numeric(momo.ret.trading.years.between, units="days") # Extract numerical value from time difference 'Time difference of 2837.208 days'
# Annualized return
# (1 + % diff of final) / (last balance to the power of 1/time first and last balance) -1
momo.ret.annual.return <- (1+momo.cum.ret) ^ (1/momo.ret.trading.years.between) -1
# Prepare Data For Cumulative and compound returns
# Calculate annualized and cumulatuve return
# Make prices vector
# Mean Reversion
mean.rev.prices <- new.df[,"equity.curve.mean.rev", drop = FALSE]
# Denote n the number of time periods:
#n <- nrow(mean.rev.prices)
# Calculate close to close returns
# lead in with rep,NA,1 to maintain length of vector comparible to data frame
#momo.close_ret <- c(rep(NA, 1),(momo.prices[2:n, 1] - momo.prices[1:(n-1), 1])/momo.prices[1:(n-1), 1])
#close_ret[1] <- 0
# Compute continuously returns (log returns)
#close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1])
# Compute gross returns
mean.rev.close_gret <- 1 + mean.rev.prices # use close to close ret
# Compute future values
mean.rev.close_fv <- cumprod(mean.rev.close_gret)
# Obtain first and last values
mean.rev.ret.last <- tail(mean.rev.close_fv, n=1)
mean.rev.ret.first <- head(mean.rev.close_fv, n=1)
mean.rev.cum.ret <- (mean.rev.ret.last-mean.rev.ret.first)/mean.rev.ret.first
# Get First And Last row to calculate time between
mean.rev.ret.first.row <- head(new.df$Date, n=1)
mean.rev.ret.last.row <- tail(new.df$Date, n=1)
# Time diff
#trading.years.between <- as.numeric(difftime(as.Date(ret.last.row), as.Date(ret.first.row), unit="weeks"))/52.25
# Find time diff
mean.rev.ret.time <- mean.rev.ret.last.row - momo.ret.first.row
mean.rev.ret.trading.years.between <- mean.rev.ret.time/365 #252 trading days or 365
mean.rev.ret.trading.years.between <- as.numeric(mean.rev.ret.trading.years.between, units="days") # Extract numerical value from time difference 'Time difference of 2837.208 days'
# Annualized return
# (1 + % diff of final) / (last balance to the power of 1/time first and last balance) -1
mean.rev.ret.annual.return <- (1+mean.rev.cum.ret) ^ (1/mean.rev.ret.trading.years.between) -1
# Buy And Hold Annualized / Cumulative
# Extract by and hold equity curve
buy.hold.prices <- new.df[,"clret", drop = FALSE]
# Denote n the number of time periods:
#n <- nrow(mean.rev.prices)
# Calculate close to close returns
# lead in with rep,NA,1 to maintain length of vector comparible to data frame
#momo.close_ret <- c(rep(NA, 1),(momo.prices[2:n, 1] - momo.prices[1:(n-1), 1])/momo.prices[1:(n-1), 1])
#close_ret[1] <- 0
# Compute continuously returns (log returns)
#close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1])
# Compute gross returns
buy.hold.close.gret <- 1 + buy.hold.prices # use close to close ret
# Compute future values
buy.hold.close_fv <- cumprod(buy.hold.close.gret)
# Obtain first and last values
buy.hold.ret.last <- tail(buy.hold.close_fv, n=1)
buy.hold.ret.first <- head(buy.hold.close_fv, n=1)
buy.hold.cum.ret <- (buy.hold.ret.last-buy.hold.ret.first)/buy.hold.ret.first
# Get First And Last row to calculate time between
buy.hold.ret.first.row <- head(new.df$Date, n=1)
buy.hold.ret.last.row <- tail(new.df$Date, n=1)
# Time diff
#trading.years.between <- as.numeric(difftime(as.Date(ret.last.row), as.Date(ret.first.row), unit="weeks"))/52.25
# Find time diff
buy.hold.ret.time <- buy.hold.ret.last.row - momo.ret.first.row
buy.hold.ret.trading.years.between <- buy.hold.ret.time/365 #252 trading days or 365
buy.hold.ret.trading.years.between <- as.numeric(buy.hold.ret.trading.years.between, units="days") # Extract numerical value from time difference 'Time difference of 2837.208 days'
# Annualized return
# (1 + % diff of final) / (last balance to the power of 1/time first and last balance) -1
buy.hold.ret.annual.return <- (1+buy.hold.cum.ret) ^ (1/buy.hold.ret.trading.years.between) -1
# Count days 6 months before FDA date (6*30 days prior)
new.df$rep <- rep(1:NROW(new.df), each = 1, len = NROW(new.df))
# Group by day rep number
#final <- new.df %>%
# group_by(rep) %>%
#summarize(n=n(),mn=mean(close.diff),sd=sd(close.diff))
# Create the output of the function : a named data.frame
#out_df <- data.frame("Momo Annualized Return" = momo.ret.annual.return[,1],
# "Momo Cumulative Return" = momo.cum.ret[,1],
# "Mean Reversion Annualized Return" = mean.rev.ret.annual.return[,1],
# "Mean Reversion Cumulative Return" = mean.rev.cum.ret[,1],
# "Buy And Hold Annualized Return" = buy.hold.ret.annual.return[,1],
# "Buy And Hold Cumulative Return" = buy.hold.cum.ret[,1])
# Create data output of rep and close.diff columns rbind
data_output_df <- data.frame("Ticker" = new.df$Ticker,
"Rep" = new.df$rep,
#"Close Differences" = new.df$close.diff )
"Close Ret" = new.df$clret)
# Sanity Check performanceAnalystics to confirm cum / annual return
#Return.annualized(xts1)
#Return.cumulative(xts1,geometric = TRUE)
#return(out_df)
#return(data_output_df)
}
# Study results in data frame
# Note this used out_df that is initialized top of script
# Make row names a column
cum_ret.results <- tibble::rownames_to_column(cum_ret,"Symbol")
#cum_ret1$decile <- ntile(cum_ret$Momo.Cumulative.Return,10)
# Sort Results
cum_ret.results <- arrange(cum_ret.results,desc(Momo.Cumulative.Return)) # Sort by Momentum cumulative return
head(cum_ret.results,50) # Print console top 50
cum_ret.results <- arrange(cum_ret.results,desc(Mean.Reversion.Cumulative.Return)) # Sort by Mean Reversion cumulative return
NROW(cum_ret.results)
head(cum_ret.results,50) # Print console top 50
cum_ret.results <- arrange(cum_ret.results,desc(Buy.And.Hold.Cumulative.Return)) # Sort by Buy and Hold cumulative return
head(cum_ret.results,50) # Print console top 50
# Split by month... 1,2,3,4,5,6
# Take mean daily change per time period for each month
# See if anything notable per time
# to do
# Group by event type
#to do
# Some Statistics
# Positive cumulative, neg, total, min max
pos <- sum(cum_ret.results$Buy.And.Hold.Cumulative.Return >0, na.rm = TRUE)
neg <- sum(cum_ret.results$Buy.And.Hold.Cumulative.Return <0, na.rm = TRUE)
total <- pos+neg
pos/750
sum(cum_ret.results$Buy.And.Hold.Cumulative.Return ==0, na.rm = TRUE)
mean(cum_ret.results$Buy.And.Hold.Cumulative.Return)
max(cum_ret.results$Buy.And.Hold.Cumulative.Return)
# Subset by cumulative return
cum.pos <- filter(cum_ret.results, Buy.And.Hold.Cumulative.Return >0)
str(cum.pos)
mean(cum.pos$Buy.And.Hold.Cumulative.Return)
plot(cum.pos$Buy.And.Hold.Cumulative.Return)
tail(data_output_df)
# Perform mean of all results
mean <- data_output_df %<>%
group_by(Rep) %>%
summarise(mean=mean(Close.Differences,na.rm = TRUE))
# Plot
library(ggplot2)
ggplot(mean, aes(Rep, mean)) +
geom_col()+
theme_classic()+
scale_x_continuous(breaks = seq(0, 127, by = 7))+
ggtitle("Mean Daily Spread By Trading Day Pre/Post FDA Event - Day 63 = FDA Event", subtitle = "Biopharmcatlyst") +
labs(x="Day",y="Mean Daily Spread")+
theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5))
# Loop for running function over all files in fda list (note files to be held on hard drive)
for (i in 1:length(tickers)){
tryCatch({
temp <- genCHART(tickers[[i]])
#rownames(temp) <- paste0("Daily_",row.names[i])
#cum_ret <- rbind.data.frame(cum_ret, temp)
data_output_df <- rbind.data.frame(data_output_df,temp)
ptm0 <- proc.time()
Sys.sleep(0.1)
ptm1=proc.time() - ptm0
time=as.numeric(ptm1[3])
cat('\n','Iteration',i,'took', time, "seconds to complete")
}, error = function(e) { print(paste("i =", i, "failed:")) })
}
#Save file
# Write output to file
#write.csv(cum_ret.results,file="D:/R Projects/Biotech Catalyst Study/fda.first.output.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment