DavisVaughan/dynamic-rolling-lm.R

## dynamic-rolling-lm.R
# Rolling Parallelization Exampl e
library(tidyquant)
library(tibbletime)
library(multidplyr)

# price time series
price_df <- c("FB", "AAPL", "GOOG", "NFLX") %>%
  tq_get(
    get = "stock.prices",
    from = "2010-01-01",
    to = Sys.Date()-1) %>%
  group_by(symbol) %>%
  tq_transmute(
    select ="adjusted",
    mutate_fun = periodReturn,
    period = "daily",
    col_rename = "daily_return"
  )

# defining index names as a vector so they can be called in function executio n later
indexes <- c("^GSPC", "XLK")

# pull index time series
indx_df <- indexes %>%
  tq_get(
    get = "stock.prices",
    from = "2010-01-01",
    to = Sys.Date()-1) %>%
  group_by(symbol) %>%
  tq_transmute(
    select = adjusted,
    mutate_fun = periodReturn,
    period = "daily") %>%
  spread(key = symbol, value = daily.returns)

# This assumes that the FIRST argument will be y
# The other arguments will be the x values
lm_dynamic <- function(...) {
  .dots <- list(...)
  n <- length(.dots)
  y <- "y"
  x <- paste0("x_", seq_len(n-1))

  .dots_named <- purrr::set_names(.dots, nm = c(y, x))
  .dots_tbl   <- tibble::as_tibble(.dots_named)

  RHS <- as.name(paste0(x, collapse = " + "))
  LHS <- as.name(y)
  lm_char_formula <- paste0(LHS, "~", RHS)
  lm_formula <- as.formula(lm_char_formula)
  lm(lm_formula, data = .dots_tbl)
}

rolling_regr_dyn <- rollify(lm_dynamic, window = 252, unlist = FALSE)

# create cluster to run rolling regressions in parallel
cl <- create_cluster(cores = 4)
#> Initialising 4 core cluster.
cl %>%
  cluster_copy(one_of) %>%
  cluster_copy(rolling_regr_dyn) %>%
  cluster_assign_value("indexes", indexes)

# join price_df and index_df and run rolling regression
rolling_regr_df <- price_df %>%
  left_join(indx_df, by = "date") %>%
  partition(symbol, cluster = cl) %>%
  mutate(rolling_regr = rolling_regr_dyn(daily_return, `^GSPC`, `XLK`))

# Notice the x_1 and x_2
rolling_regr_df %>% as.data.frame() %>% slice(253) %>% pull(rolling_regr) %>% .[[1]]
#>
#> Call:
#> lm(formula = lm_formula, data = .dots_tbl)
#>
#> Coefficients:
#> (Intercept)          x_1          x_2
#>    0.001344    -0.394468     1.489119

rolling_regr_df2 <- price_df %>%
  left_join(indx_df, by = "date") %>%
  partition(symbol, cluster = cl) %>%
  mutate(rolling_regr = rolling_regr_dyn(daily_return, `^GSPC`)) # no xlk

# Now just x_1
rolling_regr_df2 %>% as.data.frame() %>% slice(253) %>% pull(rolling_regr) %>% .[[1]]
#>
#> Call:
#> lm(formula = lm_formula, data = .dots_tbl)
#>
#> Coefficients:
#> (Intercept)          x_1
#>    0.001303     1.056704


# If you REALLY want to use your indexes variable you can do this
# library(rlang)
index_quos <- quos(!!!map(indexes, ~as.name(.x)))

rolling_regr_df3 <- price_df %>%
  left_join(indx_df, by = "date") %>%
  partition(symbol, cluster = cl) %>%
  mutate(rolling_regr = rolling_regr_dyn(daily_return, !!! index_quos)) # unquote them

rolling_regr_df3 %>% as.data.frame() %>% slice(253) %>% pull(rolling_regr) %>% .[[1]]
#>
#> Call:
#> lm(formula = lm_formula, data = .dots_tbl)
#>
#> Coefficients:
#> (Intercept)          x_1          x_2
#>    0.001344    -0.394468     1.489119

#' Created on 2018-04-13 by the [reprex package](http://reprex.tidyverse.org) (v0.2.0).
	# Rolling Parallelization Exampl e
	library(tidyquant)
	library(tibbletime)
	library(multidplyr)

	# price time series
	price_df <- c("FB", "AAPL", "GOOG", "NFLX") %>%
	tq_get(
	get = "stock.prices",
	from = "2010-01-01",
	to = Sys.Date()-1) %>%
	group_by(symbol) %>%
	tq_transmute(
	select ="adjusted",
	mutate_fun = periodReturn,
	period = "daily",
	col_rename = "daily_return"
	)

	# defining index names as a vector so they can be called in function executio n later
	indexes <- c("^GSPC", "XLK")

	# pull index time series
	indx_df <- indexes %>%
	tq_get(
	get = "stock.prices",
	from = "2010-01-01",
	to = Sys.Date()-1) %>%
	group_by(symbol) %>%
	tq_transmute(
	select = adjusted,
	mutate_fun = periodReturn,
	period = "daily") %>%
	spread(key = symbol, value = daily.returns)

	# This assumes that the FIRST argument will be y
	# The other arguments will be the x values
	lm_dynamic <- function(...) {
	.dots <- list(...)
	n <- length(.dots)
	y <- "y"
	x <- paste0("x_", seq_len(n-1))

	.dots_named <- purrr::set_names(.dots, nm = c(y, x))
	.dots_tbl <- tibble::as_tibble(.dots_named)

	RHS <- as.name(paste0(x, collapse = " + "))
	LHS <- as.name(y)
	lm_char_formula <- paste0(LHS, "~", RHS)
	lm_formula <- as.formula(lm_char_formula)
	lm(lm_formula, data = .dots_tbl)
	}

	rolling_regr_dyn <- rollify(lm_dynamic, window = 252, unlist = FALSE)

	# create cluster to run rolling regressions in parallel
	cl <- create_cluster(cores = 4)
	#> Initialising 4 core cluster.
	cl %>%
	cluster_copy(one_of) %>%
	cluster_copy(rolling_regr_dyn) %>%
	cluster_assign_value("indexes", indexes)

	# join price_df and index_df and run rolling regression
	rolling_regr_df <- price_df %>%
	left_join(indx_df, by = "date") %>%
	partition(symbol, cluster = cl) %>%
	mutate(rolling_regr = rolling_regr_dyn(daily_return, `^GSPC`, `XLK`))

	# Notice the x_1 and x_2
	rolling_regr_df %>% as.data.frame() %>% slice(253) %>% pull(rolling_regr) %>% .[[1]]
	#>
	#> Call:
	#> lm(formula = lm_formula, data = .dots_tbl)
	#>
	#> Coefficients:
	#> (Intercept) x_1 x_2
	#> 0.001344 -0.394468 1.489119

	rolling_regr_df2 <- price_df %>%
	left_join(indx_df, by = "date") %>%
	partition(symbol, cluster = cl) %>%
	mutate(rolling_regr = rolling_regr_dyn(daily_return, `^GSPC`)) # no xlk

	# Now just x_1
	rolling_regr_df2 %>% as.data.frame() %>% slice(253) %>% pull(rolling_regr) %>% .[[1]]
	#>
	#> Call:
	#> lm(formula = lm_formula, data = .dots_tbl)
	#>
	#> Coefficients:
	#> (Intercept) x_1
	#> 0.001303 1.056704


	# If you REALLY want to use your indexes variable you can do this
	# library(rlang)
	index_quos <- quos(!!!map(indexes, ~as.name(.x)))

	rolling_regr_df3 <- price_df %>%
	left_join(indx_df, by = "date") %>%
	partition(symbol, cluster = cl) %>%
	mutate(rolling_regr = rolling_regr_dyn(daily_return, !!! index_quos)) # unquote them

	rolling_regr_df3 %>% as.data.frame() %>% slice(253) %>% pull(rolling_regr) %>% .[[1]]
	#>
	#> Call:
	#> lm(formula = lm_formula, data = .dots_tbl)
	#>
	#> Coefficients:
	#> (Intercept) x_1 x_2
	#> 0.001344 -0.394468 1.489119

	#' Created on 2018-04-13 by the [reprex package](http://reprex.tidyverse.org) (v0.2.0).