Created
January 24, 2019 13:09
-
-
Save flare9x/d2ffa653a08bcdf795e46cb8cd20a2e9 to your computer and use it in GitHub Desktop.
Back Test FDA Events
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Scrape Historical FDA events | |
# Clean Data - Extract Date from text | |
# Clean up Catalyst Column - remove date, white space | |
# Sort by ascending date order | |
# Note 2x non ticker symbols, these are removed | |
require(XML) | |
require(dplyr) | |
require(PerformanceAnalytics) | |
require(data.table) | |
require(xts) | |
require(RCurl) | |
require(dplyr) | |
require(lubridate) | |
# Import html data to data.frame | |
url <- "https://www.biopharmcatalyst.com/calendars/historical-catalyst-calendar" | |
urldata <- getURL(url) | |
fda.table <- as.data.frame(readHTMLTable(urldata, stringsAsFactors = FALSE)) | |
# Rename Columns | |
colnames(fda.table)[1] <- "Ticker" | |
colnames(fda.table)[2] <- "Drug" | |
colnames(fda.table)[3] <- "Stage" | |
colnames(fda.table)[4] <- "Catalyst" | |
# Extrat Date from character text string | |
# Extracting first 10 elements as order of date positioning is consistent throughout the table | |
fda.table$Catalyst.Date <- substring(fda.table$Catalyst, 1, last = 10) | |
# Convert string Date to Date format | |
# Use lubridate package | |
fda.table$Catalyst.Date <- mdy(fda.table$Catalyst.Date) | |
# Sort data by date ascending order | |
# Use dplyr | |
fda.table <- arrange(fda.table, Catalyst.Date) | |
nrow(fda.table) | |
# Clean up text in Catalyst Column | |
# Remove white space and drop the date | |
#fda.table$Catalyst <- gsub("\\d+/\\d+/\\d+\\n\\s+(.+)$", "\\1", fda.table$Catalyst) | |
#\\W match a non-word character \\d+ match one or more numbers | |
#Put those two in brackets which means match either one of those. | |
#+ match either of the above pronouns more than once | |
#([a-zA-Z].*) match any letter (upper or lowercase), and then the remainder of the string (.* = 0 or more of any character) and capture it in a group. | |
#We return the group with \\1 | |
fda.table$Catalyst <- gsub("^[\\W\\d+]+([a-zA-Z].*)","\\1",fda.table$Catalyst,perl=TRUE) | |
# Remove 2x non ticker symbols at bottom two rows of data frame | |
n <- dim(fda.table)[1] | |
fda.table <- fda.table[1:(n-2),] | |
# Subset List of Tickers | |
# Note used in function below | |
tickers <- fda.table$Ticker | |
#tickers <- unique(tickers) # Remove duplicate tickers | |
# Subset list of Dates | |
# Note used in function below | |
dates <- fda.table$Catalyst.Date | |
# Filter date frame to remove duplicates | |
#moo <- fda.table[!duplicated(fda.table$Ticker), ] | |
# Export symbol column as .txt file | |
#write.table(tickers,"D:/R Projects/biotech_catalyst.txt",append=TRUE,quote=FALSE,row.names=FALSE,col.names=FALSE) | |
# Back Test Script | |
# 10.19.2017 | |
# Andrew Bannerman | |
require(lubridate) | |
require(dplyr) | |
require(magrittr) | |
require(TTR) | |
require(zoo) | |
require(data.table) | |
require(xts) | |
require(PerformanceAnalytics) | |
# Data Dir | |
input.data.dir <- "D:/R Projects/Biotech Catalyst Study/data" | |
png.output.dir <- "D:/R Projects/Biotech Catalyst Study/Biotech_catalyst_rsi2_chart_output" | |
row.names = tickers | |
#files = list.files(path = "C:/R Projects/Data/etf_list_nasdaq_dot_com/etf_13min", pattern = ".", full.names = TRUE) | |
# Initialize the output dataframe to which we will rowbind the results | |
cum_ret <- data.frame() # Cumualtive / Annualized Return | |
data_output_df <- data.frame() # Daily Differences, Running 5 day count until FDA date | |
# Back test function | |
genCHART = function(x){ | |
next.png <- tickers[i] | |
date.list <- dates[i] | |
input.file <- paste0("D:/R Projects/Biotech Catalyst Study/data/",next.png,".csv") | |
next.file <- read.table(input.file,header=TRUE, sep=",", stringsAsFactors=FALSE) | |
new.df <- data.frame(next.file) | |
# For TS Data | |
#colnames(new.df)[1] <- "Date" | |
#colnames(new.df)[2] <- "Open" | |
#colnames(new.df)[3] <- "High" | |
#colnames(new.df)[4] <- "Low" | |
#colnames(new.df)[5] <- "Close" | |
# Convert Values To Numeric | |
cols <-c(3:8) # note TS is 2:5 and needs column names above | |
new.df[,cols] %<>% lapply(function(x) as.numeric(as.character(x))) | |
#Convert Date Column [1] | |
new.df$Date <- ymd(new.df$Date) #mdy for .txt, mdt)hms for TS intraday | |
# Merge Date and Time Column to column Date | |
# new.df$Date <- paste(new.df$Date, new.df$Time) | |
#new.df$Date <- as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M") | |
# Use TTR package to create rolling SMA n day moving average | |
# Create function and loop in order to repeat the desired number of SMAs for example 2:30 | |
getSMA <- function(numdays) { | |
function(new.df) { | |
SMA(new.df[,"Close"], numdays) # Calls TTR package to create SMA | |
} | |
} | |
# Create a matrix to put the SMAs in | |
sma.matrix <- matrix(nrow=nrow(new.df), ncol=0) | |
# Loop for filling it | |
for (i in 2:3) { | |
sma.matrix <- cbind(sma.matrix, getSMA(i)(new.df)) | |
} | |
# Rename columns | |
colnames(sma.matrix) <- sapply(2:3, function(n)paste("close.sma.n", n, sep="")) | |
# Bind to existing dataframe | |
new.df <- cbind(new.df, sma.matrix) | |
# Use TTR package to create rolling Standard Deviation | |
# Create function and loop in order to repeat the desired number of Stdev for example 2:30 | |
getSD <- function(numdays) { | |
function(new.df) { | |
runSD(new.df$Close, numdays, cumulative = FALSE) # Calls TTR package to create SMA | |
} | |
} | |
# Create a matrix to put the SD's in | |
sd.matrix <- matrix(nrow=nrow(new.df), ncol=0) | |
# Loop for filling it | |
for (i in 2:3) { | |
sd.matrix <- cbind(sd.matrix, getSD(i)(new.df)) | |
} | |
# Rename columns | |
colnames(sd.matrix) <- sapply(2:3, function(n)paste("close.sd.n", n, sep="")) | |
# Bind to existing dataframe | |
new.df <- cbind(new.df, sd.matrix) | |
# Use TTR package to create n day SMA RSI | |
getRSI.SMA <- function(numdays) { | |
function(new.df) { | |
RSI(new.df$Close, n=numdays, maType = SMA) # Calls TTR package to create RSI | |
} | |
} | |
# Create a matrix to put the RSI SMA's in | |
rsi.sma.matrix <- matrix(nrow=nrow(new.df), ncol=0) | |
# Loop for filling it | |
for (i in 2:3) { | |
rsi.sma.matrix <- cbind(rsi.sma.matrix, getRSI.SMA(i)(new.df)) | |
} | |
# Rename columns | |
colnames(rsi.sma.matrix) <- sapply(2:3, function(n)paste("rsi.sma.n", n, sep="")) | |
# Bind to existing dataframe | |
new.df <- cbind(new.df, rsi.sma.matrix) | |
# Use TTR package to create n day EMA RSI | |
getRSI.EMA <- function(numdays) { | |
function(new.df) { | |
RSI(new.df$Close, n=numdays, maType = EMA) # Calls TTR package to create RSI | |
} | |
} | |
# Create a matrix to put the RSI EMAs's in | |
rsi.ema.matrix <- matrix(nrow=nrow(new.df), ncol=0) | |
# Loop for filling it | |
for (i in 2:3) { | |
rsi.ema.matrix <- cbind(rsi.ema.matrix, getRSI.EMA(i)(new.df)) | |
} | |
# Rename columns | |
colnames(rsi.ema.matrix) <- sapply(2:3, function(n)paste("rsi.ema.n", n, sep="")) | |
# Bind to existing dataframe | |
new.df <- cbind(new.df, rsi.ema.matrix) | |
# Use base R to work out the rolling z-score (Close - roll mean) / stdev | |
#new.df$close.zscore.n2 <- apply(new.df[,c('Close','close.sma.n2', 'close.sd.n2')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n3 <- apply(new.df[,c('Close','close.sma.n3', 'close.sd.n3')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n4 <- apply(new.df[,c('Close','close.sma.n4', 'close.sd.n4')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n5 <- apply(new.df[,c('Close','close.sma.n5', 'close.sd.n5')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n6 <- apply(new.df[,c('Close','close.sma.n6', 'close.sd.n6')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n7 <- apply(new.df[,c('Close','close.sma.n7', 'close.sd.n7')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n8 <- apply(new.df[,c('Close','close.sma.n8', 'close.sd.n8')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n9 <- apply(new.df[,c('Close','close.sma.n9', 'close.sd.n9')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n10 <- apply(new.df[,c('Close','close.sma.n10', 'close.sd.n10')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n11 <- apply(new.df[,c('Close','close.sma.n11', 'close.sd.n11')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n12 <- apply(new.df[,c('Close','close.sma.n12', 'close.sd.n12')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n13 <- apply(new.df[,c('Close','close.sma.n13', 'close.sd.n13')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n14 <- apply(new.df[,c('Close','close.sma.n14', 'close.sd.n14')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n15 <- apply(new.df[,c('Close','close.sma.n15', 'close.sd.n15')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n16 <- apply(new.df[,c('Close','close.sma.n16', 'close.sd.n16')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n17 <- apply(new.df[,c('Close','close.sma.n17', 'close.sd.n17')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n18 <- apply(new.df[,c('Close','close.sma.n18', 'close.sd.n18')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n19 <- apply(new.df[,c('Close','close.sma.n19', 'close.sd.n19')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n20 <- apply(new.df[,c('Close','close.sma.n20', 'close.sd.n20')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n21 <- apply(new.df[,c('Close','close.sma.n21', 'close.sd.n21')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n22 <- apply(new.df[,c('Close','close.sma.n22', 'close.sd.n22')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n23 <- apply(new.df[,c('Close','close.sma.n23', 'close.sd.n23')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n24 <- apply(new.df[,c('Close','close.sma.n24', 'close.sd.n24')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n25 <- apply(new.df[,c('Close','close.sma.n25', 'close.sd.n25')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n26 <- apply(new.df[,c('Close','close.sma.n26', 'close.sd.n26')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n27 <- apply(new.df[,c('Close','close.sma.n27', 'close.sd.n27')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n28 <- apply(new.df[,c('Close','close.sma.n28', 'close.sd.n28')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n29 <- apply(new.df[,c('Close','close.sma.n29', 'close.sd.n29')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n30 <- apply(new.df[,c('Close','close.sma.n30', 'close.sd.n30')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n31 <- apply(new.df[,c('Close','close.sma.n31', 'close.sd.n31')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n32 <- apply(new.df[,c('Close','close.sma.n32', 'close.sd.n32')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n33 <- apply(new.df[,c('Close','close.sma.n33', 'close.sd.n33')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n34 <- apply(new.df[,c('Close','close.sma.n34', 'close.sd.n34')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n35 <- apply(new.df[,c('Close','close.sma.n35', 'close.sd.n35')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n36 <- apply(new.df[,c('Close','close.sma.n36', 'close.sd.n36')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n37 <- apply(new.df[,c('Close','close.sma.n37', 'close.sd.n37')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n38 <- apply(new.df[,c('Close','close.sma.n38', 'close.sd.n38')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n39 <- apply(new.df[,c('Close','close.sma.n39', 'close.sd.n39')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n40 <- apply(new.df[,c('Close','close.sma.n40', 'close.sd.n40')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n41 <- apply(new.df[,c('Close','close.sma.n41', 'close.sd.n41')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n42 <- apply(new.df[,c('Close','close.sma.n42', 'close.sd.n42')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n43 <- apply(new.df[,c('Close','close.sma.n43', 'close.sd.n43')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n44 <- apply(new.df[,c('Close','close.sma.n44', 'close.sd.n44')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n45 <- apply(new.df[,c('Close','close.sma.n45', 'close.sd.n45')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n46 <- apply(new.df[,c('Close','close.sma.n46', 'close.sd.n46')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n47 <- apply(new.df[,c('Close','close.sma.n47', 'close.sd.n47')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n48 <- apply(new.df[,c('Close','close.sma.n48', 'close.sd.n48')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n49 <- apply(new.df[,c('Close','close.sma.n49', 'close.sd.n49')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n50 <- apply(new.df[,c('Close','close.sma.n50', 'close.sd.n50')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n51 <- apply(new.df[,c('Close','close.sma.n51', 'close.sd.n51')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n52 <- apply(new.df[,c('Close','close.sma.n52', 'close.sd.n52')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n53 <- apply(new.df[,c('Close','close.sma.n53', 'close.sd.n53')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n54 <- apply(new.df[,c('Close','close.sma.n54', 'close.sd.n54')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n55 <- apply(new.df[,c('Close','close.sma.n55', 'close.sd.n55')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n56 <- apply(new.df[,c('Close','close.sma.n56', 'close.sd.n56')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n57 <- apply(new.df[,c('Close','close.sma.n57', 'close.sd.n57')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n58 <- apply(new.df[,c('Close','close.sma.n58', 'close.sd.n58')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n59 <- apply(new.df[,c('Close','close.sma.n59', 'close.sd.n59')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
#new.df$close.zscore.n60 <- apply(new.df[,c('Close','close.sma.n60', 'close.sd.n60')], 1, function(x) { (x[1]-x[2])/x[3] } ) | |
# Use TTR package to create rolling MACD | |
fast <- 3 #3 | |
slow <- 40 #40 | |
signal <- 5 # 5 tests good | |
new.df$ema.fast <- EMA(new.df[,"Close"], fast) | |
new.df$ema.slow <- EMA(new.df[,"Close"], slow) | |
new.df$macd <- apply(new.df[,c('ema.fast','ema.slow')], 1, function(x) { (x[1]-x[2])} ) | |
new.df$signal <- EMA(new.df[,"macd"], signal) | |
# Calculate quartiles, where close is relation to range (Close - High) / (High - Low) | |
#new.df$quartile <- apply(new.df[,c('Close', 'Low', 'High')], 1, function(x) { (x[1]-x[2])/(x[3]-x[2])} ) | |
# Compute daily price differences | |
# We replicate NA 1 time in order to maintain correct positioning of differences | |
# Within the data frame | |
new.df$close.diff <- c(rep(NA, 1), diff(new.df$Close, lag = 1, differences = 1, arithmetic = TRUE, na.pad = TRUE)) | |
# Convert all NA to 0 | |
new.df[is.na(new.df)] <- 0 | |
# Calculate Returns from open to close | |
new.df$ocret <- apply(new.df[,c('Open', 'Close')], 1, function(x) { (x[2]-x[1])/x[1]} ) | |
# Calculate Close-to-Close returns | |
new.df$clret <- ROC(new.df$Close, type = c("discrete")) | |
new.df$clret[1] <- 0 | |
# Subset Date | |
# Pick date of event subtract 6 months | |
start.date <- date.list - 6*30 | |
#date.list <- date.list + 3*30 | |
new.df <- subset(new.df, Date >= as.Date(start.date) & Date <= as.Date(date.list)) | |
# Name indicators # | |
#train.indicator <- train.set$close.zscore.n10 | |
#test.indicator <- test.set$close.zscore.n10 | |
macd <- new.df$macd | |
signal <- new.df$signal | |
rsi.sma <- new.df$rsi.sma.n2 | |
rsi.ema <- new.df$rsi.ema.n2 | |
# Enter buy / sell rules | |
#new.df$enter.momo <- ifelse(macd > signal, 1,0) | |
#new.df$exit.momo <- ifelse(macd < signal, 1,0) | |
#new.df$enter.mean.rev <- ifelse(macd < signal, 1,0) | |
#new.df$exit.mean.rev <- ifelse(macd > signal, 1,0) | |
new.df$enter.momo <- ifelse(rsi.sma > 70, 1,0) | |
new.df$exit.momo <- ifelse(rsi.sma < 85, 1,0) | |
new.df$enter.mean.rev <- ifelse(rsi.sma < 20, 1,0) | |
new.df$exit.mean.rev <- ifelse(rsi.sma > 70, 1,0) | |
#plot(new.df$rsi.sma.n2,type="l") | |
# Momo | |
new.df <- new.df %>% | |
dplyr::mutate(sig.momo = ifelse(enter.momo == 1, 1, | |
ifelse(exit.momo ==1, 0, 0))) | |
# Mean Rev | |
new.df <- new.df %>% | |
dplyr::mutate(sig.mean.rev = ifelse(enter.mean.rev == 1, 1, | |
ifelse(exit.mean.rev == 1, 0, 0))) | |
# lag signal by one forward day to signal entry next day | |
new.df$sig.momo <- lag(new.df$sig.momo,1) # Note k=1 implies a move *forward* | |
new.df$sig.mean.rev <- lag(new.df$sig.mean.rev,1) # Note k=1 implies a move *forward* | |
new.df[is.na(new.df)] <- 0 # Set NA to 0 | |
# Calculate equity curves | |
# Momentum | |
new.df <- new.df %>% | |
dplyr::mutate(RunID = rleid(sig.momo)) %>% | |
group_by(RunID) %>% | |
dplyr::mutate(equity.curve.momo = ifelse(sig.momo == 0, 0, | |
ifelse(row_number() == 1, ocret, clret))) %>% | |
ungroup() %>% | |
select(-RunID) | |
# Mean Rev | |
new.df <- new.df %>% | |
dplyr::mutate(RunID = rleid(sig.mean.rev)) %>% | |
group_by(RunID) %>% | |
dplyr::mutate(equity.curve.mean.rev = ifelse(sig.mean.rev == 0, 0, | |
ifelse(row_number() == 1, ocret, clret))) %>% | |
ungroup() %>% | |
select(-RunID) | |
# Pull select columns from data frame to make XTS whilst retaining formats | |
xts1 = xts(new.df$equity.curve.momo, order.by=as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M")) | |
xts2 = xts(new.df$equity.curve.mean.rev, order.by=as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M")) | |
xts3 = xts(new.df$clret, order.by=as.POSIXct(new.df$Date, format="%Y-%m-%d %H:%M")) | |
# Join XTS together | |
compare <- cbind(xts1,xts2,xts3) | |
# Use the PerformanceAnalytics package for trade statistics | |
colnames(compare) <- c("Momentum","Mean Reversion","Buy And Hold") | |
charts.PerformanceSummary(compare,main="Cumulative Returns", wealth.index=TRUE, colorset=rainbow12equal) | |
#performance.table <- rbind(maxDrawdown(compare), CalmarRatio(compare),table.DownsideRisk(compare)) | |
#drawdown.table <- rbind(table.Drawdowns(compare)) | |
#table.AnnualizedReturns(compare) | |
#dev.off() | |
#logRets <- log(cumprod(1+compare)) | |
#chart.TimeSeries(logRets, legend.loc='topleft', colorset=rainbow12equal) | |
# Place time in a vector for .png file name differentiation | |
time <- as.character(Sys.time()) | |
time <- gsub("[^[:alnum:]]", "", time) | |
#print(performance.table) | |
#print(drawdown.table) | |
# Save plot out put to folder | |
mypath <- file.path(png.output.dir,paste(time,next.png[1],".png")) | |
# mypath <- file.path(png.output.dir,paste(next.png[1], ".png", sep = "")) | |
png(file=mypath,width= 1400, height=1000) | |
mytitle = paste0(next.png[1]) | |
#png(file=sprintf("Rplot_%s_%s",mypath,".png", format(Sys.time(), '%d%m%Y_%H_%M'), '%03d')) | |
#charts.PerformanceSummary(compare,main=mytitle,wealth.index=TRUE,colorset=rainbow12equal) | |
chart.CumReturns(compare,main=mytitle,wealth.index=TRUE,colorset=rainbow12equal) | |
dev.off() | |
#png(filename = "C:/R Projects/moo.png") #width= 1000, height=800) | |
#chart.CumReturns(compare,wealth.index=TRUE,colorset=rainbow12equal) | |
#chart.Drawdown(compare,geometric =TRUE,colorset=rainbow12equal) | |
#dev.off() | |
#charts.PerformanceSummary(compare,main="Cumulative Returns"=TRUE,colorset=rainbow12equal) | |
# Prepare Data For Cumulative and compound returns | |
# Calculate annualized and cumulatuve return | |
# Make prices vector | |
# Momoentum | |
momo.prices <- new.df[,"equity.curve.momo", drop = FALSE] | |
colnames(new.df) | |
# Denote n the number of time periods: | |
#n <- nrow(momo.prices) | |
# Calculate close to close returns | |
# lead in with rep,NA,1 to maintain length of vector comparible to data frame | |
#momo.close_ret <- c(rep(NA, 1),(momo.prices[2:n, 1] - momo.prices[1:(n-1), 1])/momo.prices[1:(n-1), 1]) | |
#close_ret[1] <- 0 | |
# Compute continuously returns (log returns) | |
#close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1]) | |
# Compute gross returns | |
momo.close_gret <- 1 + momo.prices # use close to close ret | |
# Compute future values | |
momo.close_fv <- cumprod(momo.close_gret) | |
# Obtain first and last values | |
momo.ret.last <- tail(momo.close_fv, n=1) | |
momo.ret.first <- head(momo.close_fv, n=1) | |
momo.cum.ret <- (momo.ret.last-momo.ret.first)/momo.ret.first | |
# Get First And Last row to calculate time between | |
momo.ret.first.row <- head(new.df$Date, n=1) | |
momo.ret.last.row <- tail(new.df$Date, n=1) | |
# Time diff | |
#trading.years.between <- as.numeric(difftime(as.Date(ret.last.row), as.Date(ret.first.row), unit="weeks"))/52.25 | |
# Find time diff | |
momo.ret.time <- momo.ret.last.row - momo.ret.first.row | |
momo.ret.trading.years.between <- momo.ret.time/365 #252 trading days or 365 | |
momo.ret.trading.years.between <- as.numeric(momo.ret.trading.years.between, units="days") # Extract numerical value from time difference 'Time difference of 2837.208 days' | |
# Annualized return | |
# (1 + % diff of final) / (last balance to the power of 1/time first and last balance) -1 | |
momo.ret.annual.return <- (1+momo.cum.ret) ^ (1/momo.ret.trading.years.between) -1 | |
# Prepare Data For Cumulative and compound returns | |
# Calculate annualized and cumulatuve return | |
# Make prices vector | |
# Mean Reversion | |
mean.rev.prices <- new.df[,"equity.curve.mean.rev", drop = FALSE] | |
# Denote n the number of time periods: | |
#n <- nrow(mean.rev.prices) | |
# Calculate close to close returns | |
# lead in with rep,NA,1 to maintain length of vector comparible to data frame | |
#momo.close_ret <- c(rep(NA, 1),(momo.prices[2:n, 1] - momo.prices[1:(n-1), 1])/momo.prices[1:(n-1), 1]) | |
#close_ret[1] <- 0 | |
# Compute continuously returns (log returns) | |
#close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1]) | |
# Compute gross returns | |
mean.rev.close_gret <- 1 + mean.rev.prices # use close to close ret | |
# Compute future values | |
mean.rev.close_fv <- cumprod(mean.rev.close_gret) | |
# Obtain first and last values | |
mean.rev.ret.last <- tail(mean.rev.close_fv, n=1) | |
mean.rev.ret.first <- head(mean.rev.close_fv, n=1) | |
mean.rev.cum.ret <- (mean.rev.ret.last-mean.rev.ret.first)/mean.rev.ret.first | |
# Get First And Last row to calculate time between | |
mean.rev.ret.first.row <- head(new.df$Date, n=1) | |
mean.rev.ret.last.row <- tail(new.df$Date, n=1) | |
# Time diff | |
#trading.years.between <- as.numeric(difftime(as.Date(ret.last.row), as.Date(ret.first.row), unit="weeks"))/52.25 | |
# Find time diff | |
mean.rev.ret.time <- mean.rev.ret.last.row - momo.ret.first.row | |
mean.rev.ret.trading.years.between <- mean.rev.ret.time/365 #252 trading days or 365 | |
mean.rev.ret.trading.years.between <- as.numeric(mean.rev.ret.trading.years.between, units="days") # Extract numerical value from time difference 'Time difference of 2837.208 days' | |
# Annualized return | |
# (1 + % diff of final) / (last balance to the power of 1/time first and last balance) -1 | |
mean.rev.ret.annual.return <- (1+mean.rev.cum.ret) ^ (1/mean.rev.ret.trading.years.between) -1 | |
# Buy And Hold Annualized / Cumulative | |
# Extract by and hold equity curve | |
buy.hold.prices <- new.df[,"clret", drop = FALSE] | |
# Denote n the number of time periods: | |
#n <- nrow(mean.rev.prices) | |
# Calculate close to close returns | |
# lead in with rep,NA,1 to maintain length of vector comparible to data frame | |
#momo.close_ret <- c(rep(NA, 1),(momo.prices[2:n, 1] - momo.prices[1:(n-1), 1])/momo.prices[1:(n-1), 1]) | |
#close_ret[1] <- 0 | |
# Compute continuously returns (log returns) | |
#close_ccret <- log(prices[2:n, 1]) - log(prices[1:(n-1), 1]) | |
# Compute gross returns | |
buy.hold.close.gret <- 1 + buy.hold.prices # use close to close ret | |
# Compute future values | |
buy.hold.close_fv <- cumprod(buy.hold.close.gret) | |
# Obtain first and last values | |
buy.hold.ret.last <- tail(buy.hold.close_fv, n=1) | |
buy.hold.ret.first <- head(buy.hold.close_fv, n=1) | |
buy.hold.cum.ret <- (buy.hold.ret.last-buy.hold.ret.first)/buy.hold.ret.first | |
# Get First And Last row to calculate time between | |
buy.hold.ret.first.row <- head(new.df$Date, n=1) | |
buy.hold.ret.last.row <- tail(new.df$Date, n=1) | |
# Time diff | |
#trading.years.between <- as.numeric(difftime(as.Date(ret.last.row), as.Date(ret.first.row), unit="weeks"))/52.25 | |
# Find time diff | |
buy.hold.ret.time <- buy.hold.ret.last.row - momo.ret.first.row | |
buy.hold.ret.trading.years.between <- buy.hold.ret.time/365 #252 trading days or 365 | |
buy.hold.ret.trading.years.between <- as.numeric(buy.hold.ret.trading.years.between, units="days") # Extract numerical value from time difference 'Time difference of 2837.208 days' | |
# Annualized return | |
# (1 + % diff of final) / (last balance to the power of 1/time first and last balance) -1 | |
buy.hold.ret.annual.return <- (1+buy.hold.cum.ret) ^ (1/buy.hold.ret.trading.years.between) -1 | |
# Count days 6 months before FDA date (6*30 days prior) | |
new.df$rep <- rep(1:NROW(new.df), each = 1, len = NROW(new.df)) | |
# Group by day rep number | |
#final <- new.df %>% | |
# group_by(rep) %>% | |
#summarize(n=n(),mn=mean(close.diff),sd=sd(close.diff)) | |
# Create the output of the function : a named data.frame | |
#out_df <- data.frame("Momo Annualized Return" = momo.ret.annual.return[,1], | |
# "Momo Cumulative Return" = momo.cum.ret[,1], | |
# "Mean Reversion Annualized Return" = mean.rev.ret.annual.return[,1], | |
# "Mean Reversion Cumulative Return" = mean.rev.cum.ret[,1], | |
# "Buy And Hold Annualized Return" = buy.hold.ret.annual.return[,1], | |
# "Buy And Hold Cumulative Return" = buy.hold.cum.ret[,1]) | |
# Create data output of rep and close.diff columns rbind | |
data_output_df <- data.frame("Ticker" = new.df$Ticker, | |
"Rep" = new.df$rep, | |
#"Close Differences" = new.df$close.diff ) | |
"Close Ret" = new.df$clret) | |
# Sanity Check performanceAnalystics to confirm cum / annual return | |
#Return.annualized(xts1) | |
#Return.cumulative(xts1,geometric = TRUE) | |
#return(out_df) | |
#return(data_output_df) | |
} | |
# Study results in data frame | |
# Note this used out_df that is initialized top of script | |
# Make row names a column | |
cum_ret.results <- tibble::rownames_to_column(cum_ret,"Symbol") | |
#cum_ret1$decile <- ntile(cum_ret$Momo.Cumulative.Return,10) | |
# Sort Results | |
cum_ret.results <- arrange(cum_ret.results,desc(Momo.Cumulative.Return)) # Sort by Momentum cumulative return | |
head(cum_ret.results,50) # Print console top 50 | |
cum_ret.results <- arrange(cum_ret.results,desc(Mean.Reversion.Cumulative.Return)) # Sort by Mean Reversion cumulative return | |
NROW(cum_ret.results) | |
head(cum_ret.results,50) # Print console top 50 | |
cum_ret.results <- arrange(cum_ret.results,desc(Buy.And.Hold.Cumulative.Return)) # Sort by Buy and Hold cumulative return | |
head(cum_ret.results,50) # Print console top 50 | |
# Split by month... 1,2,3,4,5,6 | |
# Take mean daily change per time period for each month | |
# See if anything notable per time | |
# to do | |
# Group by event type | |
#to do | |
# Some Statistics | |
# Positive cumulative, neg, total, min max | |
pos <- sum(cum_ret.results$Buy.And.Hold.Cumulative.Return >0, na.rm = TRUE) | |
neg <- sum(cum_ret.results$Buy.And.Hold.Cumulative.Return <0, na.rm = TRUE) | |
total <- pos+neg | |
pos/750 | |
sum(cum_ret.results$Buy.And.Hold.Cumulative.Return ==0, na.rm = TRUE) | |
mean(cum_ret.results$Buy.And.Hold.Cumulative.Return) | |
max(cum_ret.results$Buy.And.Hold.Cumulative.Return) | |
# Subset by cumulative return | |
cum.pos <- filter(cum_ret.results, Buy.And.Hold.Cumulative.Return >0) | |
str(cum.pos) | |
mean(cum.pos$Buy.And.Hold.Cumulative.Return) | |
plot(cum.pos$Buy.And.Hold.Cumulative.Return) | |
tail(data_output_df) | |
# Perform mean of all results | |
mean <- data_output_df %<>% | |
group_by(Rep) %>% | |
summarise(mean=mean(Close.Differences,na.rm = TRUE)) | |
# Plot | |
library(ggplot2) | |
ggplot(mean, aes(Rep, mean)) + | |
geom_col()+ | |
theme_classic()+ | |
scale_x_continuous(breaks = seq(0, 127, by = 7))+ | |
ggtitle("Mean Daily Spread By Trading Day Pre/Post FDA Event - Day 63 = FDA Event", subtitle = "Biopharmcatlyst") + | |
labs(x="Day",y="Mean Daily Spread")+ | |
theme(plot.title = element_text(hjust=0.5),plot.subtitle =element_text(hjust=0.5)) | |
# Loop for running function over all files in fda list (note files to be held on hard drive) | |
for (i in 1:length(tickers)){ | |
tryCatch({ | |
temp <- genCHART(tickers[[i]]) | |
#rownames(temp) <- paste0("Daily_",row.names[i]) | |
#cum_ret <- rbind.data.frame(cum_ret, temp) | |
data_output_df <- rbind.data.frame(data_output_df,temp) | |
ptm0 <- proc.time() | |
Sys.sleep(0.1) | |
ptm1=proc.time() - ptm0 | |
time=as.numeric(ptm1[3]) | |
cat('\n','Iteration',i,'took', time, "seconds to complete") | |
}, error = function(e) { print(paste("i =", i, "failed:")) }) | |
} | |
#Save file | |
# Write output to file | |
#write.csv(cum_ret.results,file="D:/R Projects/Biotech Catalyst Study/fda.first.output.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment