craha22/nascar_stock_port.R

## nascar_stock_port.R
#Performance of a NASCAR portfolio, where weights and stocks are determined by positioning of winners and their sponsors
# https://craha.wordpress.com/2018/04/24/the-nascar-investment-strategy/
library(rvest)
library(tidyverse)
library(XML)
library(tidyquant)
library(plyr)
library(alphavantager)

#Starting URL
URL0 <- 'http://racing-reference.info/race/2018_Food_City_500/W'
#Base URL
baseURL <- 'http://racing-reference.info'
#Object that will hold finishing position and two columns of sposors (some have multiple per car)
master_sponsors <- NULL

for (i in 1:100) {
  file_html <- read_html(URL0)
  tables <- html_nodes(file_html, "table")
  raceTable <- html_table(tables[7])
  #Split on the first slash in sponsor to create two fields and select the finishing position and sponsors
  sponsorsPos <- raceTable[[1]] %>%
    select(Fin, `Sponsor / Owner`) %>%
    mutate(Sponsor=stringr::str_split_fixed(`Sponsor / Owner`,'\\(', n=2)[,1]) %>%
    mutate(Sponsor1=stringr::str_trim(str_split_fixed(`Sponsor`,'/', n=2)[,1])) %>%
    mutate(Sponsor2=stringr::str_trim(str_split_fixed(`Sponsor`,'/', n=2)[,2])) %>%
    select(Fin, Sponsor1, Sponsor2)
  #The date is in the page title, lets get it out
  url_title <- html_nodes(file_html, "title")
  race_date <- str_extract(as.character(url_title),'\\d\\d/\\d\\d/\\d\\d\\d\\d')
  sponsorsPos <- cbind(sponsorsPos, rep(race_date, nrow(sponsorsPos)))
  #append to the master list of positions and sponsors with dates
  master_sponsors <- rbind(master_sponsors, sponsorsPos)
  #Get the URL of the previous race
  a_text <- html_nodes(file_html, "a") %>% html_text()
  url_str <- html_attr(html_nodes(file_html,'a')[which(a_text == "Previous race results")],'href')
  URL0 <- paste(baseURL,url_str, sep = '')
}

#Examine the counts of unique sponsors, note the variation!
agg_sponsors <- c(master_sponsors$Sponsor1, master_sponsors$Sponsor2)
sponsor_table <- table(agg_sponsors)

#General list to map terms to stock symbols
terms2stocks <- rbind(
  c('Shell', "RDS-A"),
  c('Lowe', "LOW"),
  c("Monster", "MNST"),
  c('Pennzoil', "RDS-A"),
  c('Target', 'TGT'),
  c('Miller Lite', 'TAP'),
  c('Coors', 'TAP'),
  c('Miller', 'TAP'),
  c('Caterpillar', 'CAT'),
  c('Mobil', 'XOM'),
  c('NAPA', 'GPC'),
  c('Dr. Pepper', 'DPS'),
  c("McDonald's", 'MCD'),
  c("Cessna", "TXT"),
  c("Dollar General", 'DG'),
  c('Fastenal', 'FAST'),
  c('Kroger', 'KR'),
  c('Stanley', 'SWK'),
  c('FedEx', 'FDX'),
  c('Ford', "F"),
  c('DeWalt', 'SWK'),
  c('Hulu', 'FOXA'),
  c('Coca-Cola', 'KO'),
  c('Arris', 'ARRS'),
  c('Cottonelle', 'KMB'),
  c('Rush', 'RUSHA'),
  c('Grainger', 'GWW'),
  c('GoDaddy', 'GDDY'),
  c('3M', 'MMM'),
  c('MMM', 'MMM'),
  c('CSX', 'CSX'),
  c("Aaron's", 'AAN'),
  c('Beechcraft', 'TXT'),
  c('Busch', 'BUD'),
  c('Camping World', 'CWH'),
  c('Moen', 'FBHS'),
  c('Clorox', 'CLX'),
  c('Scott', 'KMB'),
  c('Budweiser', 'BUD'),
  c('Cheez-It', 'K'),
  c('Kingsford', 'CLX'),
  c('Kobalt', 'LOW'),
  c('Dow', 'DWDP'),
  c('SiriusXM', 'SIRI'),
  c('Valvoline', 'VVV'),
  c('Allstate', 'AAL'),
  c('Dew', 'PEP'),
  c('Comcast', 'CMCSA'),
  c('Cars 3', 'DIS'),
  c('Cheerios', 'GIS'),
  c('Milwaukee', 'TTI'),
  c('Burger King', 'QSR'),
  c('Energizer', 'ENR'),
  c('Kleenex', 'KMB'),
  c('Pepsi', 'PEP'),
  c('Walmart', 'WMT'),
  c('Cat', 'CAT')
)
terms2stocks <- as.data.frame(terms2stocks)
colnames(terms2stocks) <- c('Term', 'Stock')
terms2stocks$Term <- as.character(terms2stocks$Term)

#search for terms in the sponsor field. If there's a match, grab it
tickers <- NULL
for (i in 1:nrow(master_sponsors)) {
  test_row <- master_sponsors[i,]
  if(any(str_detect(test_row$Sponsor1, terms2stocks$Term))) {
    ticker <- as.character(terms2stocks[str_detect(test_row$Sponsor1, terms2stocks$Term),2]  )[1]
  } else if(any(str_detect(test_row$Sponsor2, terms2stocks$Term))){
    ticker <- as.character(terms2stocks[str_detect(test_row$Sponsor2, terms2stocks$Term),2] )[1]
  } else {
    ticker <- "NA"
  }
  tickers <- c(tickers, ticker)
}

masterSponsors <- cbind(master_sponsors,tickers)

#generate an object that has date, rank, and stock symbol
orders <- ddply(masterSponsors[masterSponsors$tickers != "NA",],.(`rep(race_date, nrow(sponsorsPos))`),transform,Order = rank(Fin,ties.method = "first")) %>%
  select(`rep(race_date, nrow(sponsorsPos))`, tickers, Order)

colnames(orders) <- c('Date', 'Ticker', 'Rank')
tickers_in_port <- as.character(unique(orders$Ticker))

#Fetch Data from Alphavantage
APIKEY <- "###"
av_api_key(APIKEY)
List_Stocks <- list()
for (i in 43:length(tickers_in_port)) {
  Sys.sleep(2)
  tryCatch({
    #assign(Stock_List[i], av_get(symbol = Stock_List[i], av_fun = "TIME_SERIES_DAILY", outputsize='full'))
    List_Stocks[[i]] <- av_get(symbol = tickers_in_port[i], av_fun = "TIME_SERIES_WEEKLY", outputsize='full')
    Sys.sleep(2)
    print(tickers_in_port[i])
  }, error = function(e) {cat("ERROR :",conditionMessage(e), "\n")})
}

#Get next period returns for each stock
List_Stocks_Rets <- lapply(List_Stocks, function(x) return(x %>%
                                                             tq_mutate(select=close, mutate_fun = dailyReturn) %>%
                                                             mutate(next_ret=lead(daily.returns))))

names(List_Stocks_Rets) <- tickers_in_port
orders$return <- 0
#Map the next returns to the orders object
for(j in 1:length(tickers_in_port)){
  test_stock_name <- tickers_in_port[j]
  test_stock <- List_Stocks_Rets[test_stock_name][[1]] #MCD
  stock_dates <- as.character(orders[orders$Ticker==test_stock_name,1])
  for (i in 1:length(stock_dates)){
    date_ret <- test_stock %>%
      filter(timestamp >= as.Date(stock_dates[i], format = '%m/%d/%Y')) %>%
      filter(timestamp < as.Date(stock_dates[i], format = '%m/%d/%Y')+7) %>%
      select(timestamp, next_ret)
    orders$return[orders$Date == stock_dates[i] & orders$Ticker == test_stock_name] <- date_ret[,2]
  }
}

race_dates <- as.character(unique(orders$Date))
orders$weights <- 0
#calculate weights for each return based on race positions
for (i in 1:length(race_dates)) {
  race_orders <- subset(orders, Date==race_dates[i])
  ranks <- race_orders$Rank
  ranks <- exp(-1 * ranks)
  ranks <- ranks/sum(ranks)
  race_orders$weights <- ranks
  orders$weights[orders$Date == race_dates[i]] <- ranks
}


orders$return <- as.numeric(orders$return)
orders$Date <- as.Date(orders$Date, format = '%m/%d/%Y')
orders$weighted_ret <- orders$return * orders$weights
#Get the final performance of your strategy and explore
pnl <- aggregate(orders$weighted_ret, by=list(orders$Date), sum, na.rm=TRUE)

colnames(pnl) <- c("Date", "Return")
pnl$year <- year(pnl$Date)


plot(pnl$Date[pnl$year==2015], cumsum(pnl$Return[pnl$year==2015]), type = 'l', ylab="Cumulative Returns", xlab="Date")
title("2015 Season")
plot(pnl$Date[pnl$year==2016], cumsum(pnl$Return[pnl$year==2016]), type = 'l', ylab="Cumulative Returns", xlab="Date")
title("2016 Season")
plot(pnl$Date[pnl$year==2017], cumsum(pnl$Return[pnl$year==2017]), type = 'l', ylab="Cumulative Returns", xlab="Date")
title("2017 Season")
plot(pnl$Date[pnl$year==2018], cumsum(pnl$Return[pnl$year==2018]), type = 'l', ylab="Cumulative Returns", xlab="Date")
title("2018 Season")
	#Performance of a NASCAR portfolio, where weights and stocks are determined by positioning of winners and their sponsors
	# https://craha.wordpress.com/2018/04/24/the-nascar-investment-strategy/
	library(rvest)
	library(tidyverse)
	library(XML)
	library(tidyquant)
	library(plyr)
	library(alphavantager)

	#Starting URL
	URL0 <- 'http://racing-reference.info/race/2018_Food_City_500/W'
	#Base URL
	baseURL <- 'http://racing-reference.info'
	#Object that will hold finishing position and two columns of sposors (some have multiple per car)
	master_sponsors <- NULL

	for (i in 1:100) {
	file_html <- read_html(URL0)
	tables <- html_nodes(file_html, "table")
	raceTable <- html_table(tables[7])
	#Split on the first slash in sponsor to create two fields and select the finishing position and sponsors
	sponsorsPos <- raceTable[[1]] %>%
	select(Fin, `Sponsor / Owner`) %>%
	mutate(Sponsor=stringr::str_split_fixed(`Sponsor / Owner`,'\\(', n=2)[,1]) %>%
	mutate(Sponsor1=stringr::str_trim(str_split_fixed(`Sponsor`,'/', n=2)[,1])) %>%
	mutate(Sponsor2=stringr::str_trim(str_split_fixed(`Sponsor`,'/', n=2)[,2])) %>%
	select(Fin, Sponsor1, Sponsor2)
	#The date is in the page title, lets get it out
	url_title <- html_nodes(file_html, "title")
	race_date <- str_extract(as.character(url_title),'\\d\\d/\\d\\d/\\d\\d\\d\\d')
	sponsorsPos <- cbind(sponsorsPos, rep(race_date, nrow(sponsorsPos)))
	#append to the master list of positions and sponsors with dates
	master_sponsors <- rbind(master_sponsors, sponsorsPos)
	#Get the URL of the previous race
	a_text <- html_nodes(file_html, "a") %>% html_text()
	url_str <- html_attr(html_nodes(file_html,'a')[which(a_text == "Previous race results")],'href')
	URL0 <- paste(baseURL,url_str, sep = '')
	}

	#Examine the counts of unique sponsors, note the variation!
	agg_sponsors <- c(master_sponsors$Sponsor1, master_sponsors$Sponsor2)
	sponsor_table <- table(agg_sponsors)

	#General list to map terms to stock symbols
	terms2stocks <- rbind(
	c('Shell', "RDS-A"),
	c('Lowe', "LOW"),
	c("Monster", "MNST"),
	c('Pennzoil', "RDS-A"),
	c('Target', 'TGT'),
	c('Miller Lite', 'TAP'),
	c('Coors', 'TAP'),
	c('Miller', 'TAP'),
	c('Caterpillar', 'CAT'),
	c('Mobil', 'XOM'),
	c('NAPA', 'GPC'),
	c('Dr. Pepper', 'DPS'),
	c("McDonald's", 'MCD'),
	c("Cessna", "TXT"),
	c("Dollar General", 'DG'),
	c('Fastenal', 'FAST'),
	c('Kroger', 'KR'),
	c('Stanley', 'SWK'),
	c('FedEx', 'FDX'),
	c('Ford', "F"),
	c('DeWalt', 'SWK'),
	c('Hulu', 'FOXA'),
	c('Coca-Cola', 'KO'),
	c('Arris', 'ARRS'),
	c('Cottonelle', 'KMB'),
	c('Rush', 'RUSHA'),
	c('Grainger', 'GWW'),
	c('GoDaddy', 'GDDY'),
	c('3M', 'MMM'),
	c('MMM', 'MMM'),
	c('CSX', 'CSX'),
	c("Aaron's", 'AAN'),
	c('Beechcraft', 'TXT'),
	c('Busch', 'BUD'),
	c('Camping World', 'CWH'),
	c('Moen', 'FBHS'),
	c('Clorox', 'CLX'),
	c('Scott', 'KMB'),
	c('Budweiser', 'BUD'),
	c('Cheez-It', 'K'),
	c('Kingsford', 'CLX'),
	c('Kobalt', 'LOW'),
	c('Dow', 'DWDP'),
	c('SiriusXM', 'SIRI'),
	c('Valvoline', 'VVV'),
	c('Allstate', 'AAL'),
	c('Dew', 'PEP'),
	c('Comcast', 'CMCSA'),
	c('Cars 3', 'DIS'),
	c('Cheerios', 'GIS'),
	c('Milwaukee', 'TTI'),
	c('Burger King', 'QSR'),
	c('Energizer', 'ENR'),
	c('Kleenex', 'KMB'),
	c('Pepsi', 'PEP'),
	c('Walmart', 'WMT'),
	c('Cat', 'CAT')
	)
	terms2stocks <- as.data.frame(terms2stocks)
	colnames(terms2stocks) <- c('Term', 'Stock')
	terms2stocks$Term <- as.character(terms2stocks$Term)

	#search for terms in the sponsor field. If there's a match, grab it
	tickers <- NULL
	for (i in 1:nrow(master_sponsors)) {
	test_row <- master_sponsors[i,]
	if(any(str_detect(test_row$Sponsor1, terms2stocks$Term))) {
	ticker <- as.character(terms2stocks[str_detect(test_row$Sponsor1, terms2stocks$Term),2] )[1]
	} else if(any(str_detect(test_row$Sponsor2, terms2stocks$Term))){
	ticker <- as.character(terms2stocks[str_detect(test_row$Sponsor2, terms2stocks$Term),2] )[1]
	} else {
	ticker <- "NA"
	}
	tickers <- c(tickers, ticker)
	}

	masterSponsors <- cbind(master_sponsors,tickers)

	#generate an object that has date, rank, and stock symbol
	orders <- ddply(masterSponsors[masterSponsors$tickers != "NA",],.(`rep(race_date, nrow(sponsorsPos))`),transform,Order = rank(Fin,ties.method = "first")) %>%
	select(`rep(race_date, nrow(sponsorsPos))`, tickers, Order)

	colnames(orders) <- c('Date', 'Ticker', 'Rank')
	tickers_in_port <- as.character(unique(orders$Ticker))

	#Fetch Data from Alphavantage
	APIKEY <- "###"
	av_api_key(APIKEY)
	List_Stocks <- list()
	for (i in 43:length(tickers_in_port)) {
	Sys.sleep(2)
	tryCatch({
	#assign(Stock_List[i], av_get(symbol = Stock_List[i], av_fun = "TIME_SERIES_DAILY", outputsize='full'))
	List_Stocks[[i]] <- av_get(symbol = tickers_in_port[i], av_fun = "TIME_SERIES_WEEKLY", outputsize='full')
	Sys.sleep(2)
	print(tickers_in_port[i])
	}, error = function(e) {cat("ERROR :",conditionMessage(e), "\n")})
	}

	#Get next period returns for each stock
	List_Stocks_Rets <- lapply(List_Stocks, function(x) return(x %>%
	tq_mutate(select=close, mutate_fun = dailyReturn) %>%
	mutate(next_ret=lead(daily.returns))))

	names(List_Stocks_Rets) <- tickers_in_port
	orders$return <- 0
	#Map the next returns to the orders object
	for(j in 1:length(tickers_in_port)){
	test_stock_name <- tickers_in_port[j]
	test_stock <- List_Stocks_Rets[test_stock_name][[1]] #MCD
	stock_dates <- as.character(orders[orders$Ticker==test_stock_name,1])
	for (i in 1:length(stock_dates)){
	date_ret <- test_stock %>%
	filter(timestamp >= as.Date(stock_dates[i], format = '%m/%d/%Y')) %>%
	filter(timestamp < as.Date(stock_dates[i], format = '%m/%d/%Y')+7) %>%
	select(timestamp, next_ret)
	orders$return[orders$Date == stock_dates[i] & orders$Ticker == test_stock_name] <- date_ret[,2]
	}
	}

	race_dates <- as.character(unique(orders$Date))
	orders$weights <- 0
	#calculate weights for each return based on race positions
	for (i in 1:length(race_dates)) {
	race_orders <- subset(orders, Date==race_dates[i])
	ranks <- race_orders$Rank
	ranks <- exp(-1 * ranks)
	ranks <- ranks/sum(ranks)
	race_orders$weights <- ranks
	orders$weights[orders$Date == race_dates[i]] <- ranks
	}


	orders$return <- as.numeric(orders$return)
	orders$Date <- as.Date(orders$Date, format = '%m/%d/%Y')
	orders$weighted_ret <- orders$return * orders$weights
	#Get the final performance of your strategy and explore
	pnl <- aggregate(orders$weighted_ret, by=list(orders$Date), sum, na.rm=TRUE)

	colnames(pnl) <- c("Date", "Return")
	pnl$year <- year(pnl$Date)


	plot(pnl$Date[pnl$year==2015], cumsum(pnl$Return[pnl$year==2015]), type = 'l', ylab="Cumulative Returns", xlab="Date")
	title("2015 Season")
	plot(pnl$Date[pnl$year==2016], cumsum(pnl$Return[pnl$year==2016]), type = 'l', ylab="Cumulative Returns", xlab="Date")
	title("2016 Season")
	plot(pnl$Date[pnl$year==2017], cumsum(pnl$Return[pnl$year==2017]), type = 'l', ylab="Cumulative Returns", xlab="Date")
	title("2017 Season")
	plot(pnl$Date[pnl$year==2018], cumsum(pnl$Return[pnl$year==2018]), type = 'l', ylab="Cumulative Returns", xlab="Date")
	title("2018 Season")