Skip to content

Instantly share code, notes, and snippets.

@nanxstats
Last active May 31, 2023 05:30
Show Gist options
  • Save nanxstats/43ef32acbff6e946637fdda24a052710 to your computer and use it in GitHub Desktop.
Save nanxstats/43ef32acbff6e946637fdda24a052710 to your computer and use it in GitHub Desktop.
Sparse index tracking with a two-stage procedure using {msaenet} and {CVXR}
# Load data and split training/test set ----------------------------------------
library("xts")
index2010 <- sparseIndexTracking::INDEX_2010
x_tr <- index2010$X[1:126]
x_te <- index2010$X[127:252]
r_tr <- index2010$SP500[1:126]
r_te <- index2010$SP500[127:252]
# Wrapper functions for P&L plotting -------------------------------------------
df_pnl <- function(x_te, beta, y_test, title) {
df <- cbind(
cumprod(1 + x_te %*% beta),
cumprod(1 + y_test)
)
names(df)[1] <- title
df
}
plot_pnl <- function(object, title = "Cumulative P&L") {
ggplot2::autoplot(object, facets = NULL) +
ggplot2::ggtitle(title) +
cowplot::theme_minimal_hgrid() +
ggsci::scale_color_d3() +
ggplot2::scale_y_continuous(breaks = seq(1, 1.5, .05), limits = c(1, 1.4)) +
ggplot2::scale_x_date(date_breaks = "1 months", date_labels = "%b") +
ggplot2::theme(
axis.title.x = ggplot2::element_blank(),
legend.position = "bottom"
)
}
# L0 + empirical tracking error ------------------------------------------------
w_ete <- sparseIndexTracking::spIndexTrack(
x_tr,
r = r_tr, lambda = 1e-7, u = 0.5, measure = "ete"
)
names(w_ete[w_ete > 1e-6])
data.frame(sort(w_ete, decreasing = TRUE), fix.empty.names = FALSE)
df_pnl(x_te, w_ete, r_te, "PortfolioETE") |> plot_pnl()
# Wrapper functions for extracting {msaenet} coefficients ----------------------
get_beta <- function(object) {
beta <- coef(object)
names(beta) <- if (inherits(object, "msaenet.msaenet")) {
rownames(object$beta)
} else {
colnames(object$model$X)
}
beta
}
get_nzv <- function(object) {
beta <- get_beta(object)
names(beta[which(beta != 0)])
}
# Constrained OLS with coefficients sum to one using {CVXR} --------------------
library("CVXR")
storeg <- function(x, y) {
x <- as.matrix(x)
y <- as.vector(y)
p <- ncol(x)
beta <- Variable(p)
obj <- sum((y - x %*% beta)^2)
constr <- list(beta >= 0, sum(beta) == 1)
prob <- Problem(Minimize(obj), constr)
result <- solve(prob)
structure(
list(
result = result,
beta = result$getValue(beta)
),
class = "storeg"
)
}
# Get asset names and coefficients in the portfolio ----------------------------
get_portfolio <- function(beta, nzv) {
as.vector(beta) |>
setNames(nzv) |>
sort(decreasing = TRUE) |>
data.frame(fix.empty.names = FALSE)
}
# Fit msaenet-storeg hybrid model ----------------------------------------------
fit_enet <- msaenet::msaenet(
x = as.matrix(x_tr),
y = as.vector(r_tr),
family = "gaussian",
init = "ridge",
tune = "cv",
nsteps = 10,
lower.limits = 0,
verbose = FALSE,
seed = 42
)
fit_enet |> get_nzv()
fit_enet_sto <- storeg(x_tr[, get_nzv(fit_enet)], r_tr)
get_portfolio(fit_enet_sto$beta, get_nzv(fit_enet))
df_pnl(x_te[, get_nzv(fit_enet)], fit_enet_sto$beta, r_te, "Portfolio.msaenet") |>
plot_pnl()
# Stability of solution --------------------------------------------------------
library("doParallel")
registerDoParallel(detectCores())
fit_rep <- foreach::foreach(seed = 1:100) %dopar% {
msaenet::msaenet(
x = as.matrix(x_tr),
y = as.vector(r_tr),
family = "gaussian",
init = "ridge",
tune = "cv",
nsteps = 10,
lower.limits = 0,
verbose = FALSE,
seed = seed
)
}
as.data.frame(table(unlist(sapply(fit_rep, get_nzv)))) |>
ggplot2::ggplot(ggplot2::aes(x = Freq, y = Var1)) +
ggplot2::geom_point(size = 3, color = ggsci::pal_d3()(1)) +
ggplot2::scale_x_continuous(
name = "Selection frequency out of 100 experiments",
limits = c(0, 100),
expand = c(0, 5)
) +
ggplot2::scale_y_discrete(name = NULL, expand = c(0, 0.5)) +
cowplot::theme_minimal_grid()
# Fit msasnet-storeg hybrid model ----------------------------------------------
fit_snet <- suppressWarnings(msaenet::msasnet(
x = as.matrix(x_tr),
y = as.vector(r_tr),
family = "gaussian",
init = "ridge",
tune = "cv",
nsteps = 10,
verbose = FALSE,
seed = 42
))
fit_snet |> get_nzv()
fit_snet_sto <- storeg(x_tr[, get_nzv(fit_snet)], r_tr)
get_portfolio(fit_snet_sto$beta, get_nzv(fit_snet))
df_pnl(x_te[, get_nzv(fit_snet)], fit_snet_sto$beta, r_te, "Portfolio.msasnet") |>
plot_pnl()
# Fit msamnet-storeg hybrid model ----------------------------------------------
fit_mnet <- suppressWarnings(msaenet::msamnet(
x = as.matrix(x_tr),
y = as.vector(r_tr),
family = "gaussian",
init = "ridge",
tune = "cv",
nsteps = 10,
verbose = FALSE,
seed = 42
))
fit_mnet |> get_nzv()
fit_mnet_sto <- storeg(x_tr[, get_nzv(fit_mnet)], r_tr)
get_portfolio(fit_mnet_sto$beta, get_nzv(fit_mnet))
df_pnl(x_te[, get_nzv(fit_mnet)], fit_mnet_sto$beta, r_te, "Portfolio.msamnet") |>
plot_pnl()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment