Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 4 You must be signed in to fork a gist
  • Save alexchinco/467325abbf11d5c8f565 to your computer and use it in GitHub Desktop.
Save alexchinco/467325abbf11d5c8f565 to your computer and use it in GitHub Desktop.
Simulation analysis of using the LASSO to forecast stock returns.
###########################################################################
###########################################################################
## @purpose: Simulate return data with dense shocks and show that the
## LASSO doesn't add any forecasting power.
## ------------------------------------------------------------------------
## @author: Alex Chinco
## @date: 04-DEC-2015
###########################################################################
###########################################################################
###########################################################################
###########################################################################
## @section: Prep workspace
###########################################################################
###########################################################################
options(width=120, digits=6, digits.secs=6)
rm(list=ls())
library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(reshape)
library(vars)
library(lars)
library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
c("\\usepackage{tikz}\n",
"\\usepackage[active,tightpage,psfixbb]{preview}\n",
"\\PreviewEnvironment{pgfpicture}\n",
"\\setlength\\PreviewBorder{0pt}\n",
"\\usepackage{amsmath}\n",
"\\usepackage{pxfonts}\n",
"\\usepackage{bm}\n",
"\\usepackage{xfrac}\n"
)
)
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))
library(foreach)
library(doMC)
registerDoMC(7)
scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"
set.seed(1234)
###########################################################################
###########################################################################
## @section: Define global parameters
###########################################################################
###########################################################################
scl.int.ITER <- 10^3 # Number of iterations to run
scl.int.T_FIT <- 50 # Number of minutes used to fit models
scl.int.T_LAM <- 100 # Number of minutes used to pick lambda
scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes
scl.int.N <- 100 # Number of stocks
scl.int.K <- 75 # Number of predictors
scl.flt.SIG <- 0.001 # Noise volatility
scl.flt.TET <- 0.150 * 5 / 75 # Size of dense shocks (same total magnitude as sparse shocks)
scl.flt.PROB_SWITCH <- 0.01 # Expected time before switch is (1 - probSwitch)/probSwitch.
###########################################################################
###########################################################################
## @section: Simulate data and forecast returns
###########################################################################
###########################################################################
if (TRUE == FALSE) {
mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))
## @subsection: Initialize data objects
mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)
mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
mat.flt.R[1, ] <- 0
mat.int.PRED <- matrix(rep(NA, scl.int.T * scl.int.K), ncol = scl.int.K)
mat.int.PRED[1, ] <- sample(scl.int.N, scl.int.K)
vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results
vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results
## @subsection: Simulate data
for (t in 2:scl.int.T) {
mat.flt.R[t, ] <- scl.flt.TET * sum(mat.flt.R[t-1, mat.int.PRED[t-1, ]]) + mat.flt.E[t, ]
mat.int.PRED[t, ] <- mat.int.PRED[t-1, ]
vec.flt.RAND <- runif(scl.int.K, 0, 1)
for (p in 1:scl.int.K) {
if (vec.flt.RAND[p] <= scl.flt.PROB_SWITCH) {
vec.int.CHOICES <- which(!(seq(1, scl.int.N) %in% mat.int.PRED[t, ]) == TRUE)
mat.int.PRED[t, p] <- sample(vec.int.CHOICES, 1)
}
}
}
## @subsection: Forecast returns using AR(1)
for (t in 1:(scl.int.T-scl.int.T_FIT)) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef
vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
}
## @subsection: Forecast returns using LASSO
vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005)
scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
for (t in 1:scl.int.T_LAM) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)
if (t <= scl.int.T_LAM) {
mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
}
}
vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
for (p in 1:scl.int.LAM_CHOICES_LEN) {
vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
}
scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)
vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit
}
## @subsection: Estimate model fit
vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]
vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)
vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)
scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared
## @subsection: Return results
return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH))
}
save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = ""))
}
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = ""))
###########################################################################
###########################################################################
## @section: Plot R^2 distributions
###########################################################################
###########################################################################
## @subsection: Create plot data
vec.flt.X <- seq(-0.005, 0.02, length.out = 101)
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT <- data.frame(x = vec.flt.X,
ar1 = NA,
lasso = NA,
both = NA
)
for (i in 1:100) {
mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2
names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO")
mat.df.PLOT <- melt(mat.df.PLOT, c("x"))
mat.df.PLOT$xmin <- mat.df.PLOT$x - scl.flt.dX/2
mat.df.PLOT$xmax <- mat.df.PLOT$x + scl.flt.dX/2
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ]
mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"),
avg = apply(mat.flt.SIM[, c(2,3,4)], 2, mean),
label =
c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""),
paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""),
paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="")
)
)
## @subsection: Plot results
theme_set(theme_bw())
scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--dense-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
obj.gg2.PLOT <- ggplot()
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
aes(xmin = xmin,
xmax = xmax,
ymin = 0,
ymax = value,
group = variable
)
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
size = 0.25,
colour = "black"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
aes(xintercept = avg,
group = variable
),
colour = "red",
size = 1.25
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
aes(x = avg + 0.001,
y = 0.40 * 4/5,
label = label
),
colour = "red",
angle = 270,
size = 4
)
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.005, 0.020),
breaks = c(0.00, 0.005, 0.010, 0.015),
labels = c("$0.0{\\scriptstyle \\%}$", "$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$", "$1.5{\\scriptstyle \\%}$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.40),
breaks = c(0, 0.10, 0.20, 0.30, 0.40),
labels = c("$0.00$","$0.10$", "$0.20$", "$0.30$", "$0.40$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_text(vjust = 1.75),
panel.grid.minor = element_blank(),
legend.position = "none"
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: Dense Shocks")
print(obj.gg2.PLOT)
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
###########################################################################
###########################################################################
## @purpose: Simulate return data with no shocks and show that the LASSO
## does add any predictive power.
## ------------------------------------------------------------------------
## @author: Alex Chinco
## @date: 04-DEC-2015
###########################################################################
###########################################################################
###########################################################################
###########################################################################
## @section: Prep workspace
###########################################################################
###########################################################################
options(width=120, digits=6, digits.secs=6)
rm(list=ls())
library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(reshape)
library(vars)
library(lars)
library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
c("\\usepackage{tikz}\n",
"\\usepackage[active,tightpage,psfixbb]{preview}\n",
"\\PreviewEnvironment{pgfpicture}\n",
"\\setlength\\PreviewBorder{0pt}\n",
"\\usepackage{amsmath}\n",
"\\usepackage{pxfonts}\n",
"\\usepackage{bm}\n",
"\\usepackage{xfrac}\n"
)
)
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))
library(foreach)
library(doMC)
registerDoMC(7)
scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"
set.seed(1234)
###########################################################################
###########################################################################
## @section: Define global parameters
###########################################################################
###########################################################################
scl.int.ITER <- 10^3 # Number of iterations to run
scl.int.T_FIT <- 50 # Number of minutes used to fit models
scl.int.T_LAM <- 100 # Number of minutes used to pick lambda
scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes
scl.int.N <- 100 # Number of stocks
scl.int.K <- 5 # Number of predictors
scl.flt.SIG <- 0.001 # Noise volatility
###########################################################################
###########################################################################
## @section: Simulate data and forecast returns
###########################################################################
###########################################################################
if (TRUE == FALSE) {
mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))
## @subsection: Initialize data objects
mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)
mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
mat.flt.R[1, ] <- 0
vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results
vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results
## @subsection: Simulate data
for (t in 2:scl.int.T) {
mat.flt.R[t, ] <- mat.flt.E[t, ]
}
## @subsection: Forecast returns using AR(1)
for (t in 1:(scl.int.T-scl.int.T_FIT)) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef
vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
}
## @subsection: Forecast returns using LASSO
vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005)
scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
for (t in 1:scl.int.T_LAM) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)
if (t <= scl.int.T_LAM) {
mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
}
}
vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
for (p in 1:scl.int.LAM_CHOICES_LEN) {
vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
}
scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)
vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit
}
## @subsection: Estimate model fit
vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]
vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)
vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)
scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared
## @subsection: Return results
return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH))
}
save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = ""))
}
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = ""))
###########################################################################
###########################################################################
## @section: Plot R^2 distributions
###########################################################################
###########################################################################
## @subsection: Create plot data
vec.flt.X <- seq(-0.005, 0.0125, length.out = 101)
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT <- data.frame(x = vec.flt.X,
ar1 = NA,
lasso = NA,
both = NA
)
for (i in 1:100) {
mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2
names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO")
mat.df.PLOT <- melt(mat.df.PLOT, c("x"))
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ]
mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"),
avg = apply(mat.flt.SIM[, c(2,3,4)], 2, mean),
label =
c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""),
paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""),
paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="")
)
)
## @subsection: Plot results
theme_set(theme_bw())
scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--no-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
obj.gg2.PLOT <- ggplot()
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
aes(xmin = x - scl.flt.dX/2,
xmax = x + scl.flt.dX/2,
ymin = 0,
ymax = value,
group = variable
)
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
size = 0.25,
colour = "black"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
aes(xintercept = avg,
group = variable
),
colour = "red",
size = 1.25
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
aes(x = avg + 0.00065,
y = 0.21 * 4/5,
label = label
),
colour = "red",
angle = 270,
size = 4
)
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.0025, 0.0125),
breaks = c(0.00, 0.005, 0.01),
labels = c("$0.0{\\scriptstyle \\%}$","$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.21),
breaks = c(0, 0.10, 0.20),
labels = c("$0.00$","$0.10$", "$0.20$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_text(vjust = 1.75),
panel.grid.minor = element_blank(),
legend.position = "none"
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: No Shocks")
print(obj.gg2.PLOT)
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
###########################################################################
###########################################################################
## @purpose: Simulate return data with sparse shocks and show that the
## LASSO can identify these shocks while AR(1) can't.
## ------------------------------------------------------------------------
## @author: Alex Chinco
## @date: 04-DEC-2015
###########################################################################
###########################################################################
###########################################################################
###########################################################################
## @section: Prep workspace
###########################################################################
###########################################################################
options(width=120, digits=6, digits.secs=6)
rm(list=ls())
library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(reshape)
library(vars)
library(lars)
library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
c("\\usepackage{tikz}\n",
"\\usepackage[active,tightpage,psfixbb]{preview}\n",
"\\PreviewEnvironment{pgfpicture}\n",
"\\setlength\\PreviewBorder{0pt}\n",
"\\usepackage{amsmath}\n",
"\\usepackage{pxfonts}\n",
"\\usepackage{bm}\n",
"\\usepackage{xfrac}\n"
)
)
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))
library(foreach)
library(doMC)
registerDoMC(7)
scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"
set.seed(1234)
###########################################################################
###########################################################################
## @section: Define global parameters
###########################################################################
###########################################################################
scl.int.ITER <- 10^3 # Number of iterations to run
scl.int.T_FIT <- 50 # Number of minutes used to fit models
scl.int.T_LAM <- 100 # Number of minutes used to pick lambda
scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes
scl.int.N <- 100 # Number of stocks
scl.int.K <- 5 # Number of predictors
scl.flt.SIG <- 0.001 # Noise volatility
scl.flt.TET <- 0.150 # Size of sparse shocks
scl.flt.PROB_SWITCH <- 0.01 # Expected time before switch is (1 - probSwitch)/probSwitch.
###########################################################################
###########################################################################
## @section: Simulate data and forecast returns
###########################################################################
###########################################################################
if (TRUE == FALSE) {
mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))
## @subsection: Initialize data objects
mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)
mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
mat.flt.R[1, ] <- 0
mat.int.PRED <- matrix(rep(NA, scl.int.T * scl.int.K), ncol = scl.int.K)
mat.int.PRED[1, ] <- sample(scl.int.N, scl.int.K)
vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results
vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results
vec.flt.ORACLE <- rep(NA, scl.int.T) # ORACLE forecast results
vec.int.TOT_PRED <- rep(NA, scl.int.T) # Total number of predictors selected by the LASSO
vec.int.COR_PRED <- rep(NA, scl.int.T) # Number of correct predictors selected by the LASSO
## @subsection: Simulate data
for (t in 2:scl.int.T) {
mat.flt.R[t, ] <- scl.flt.TET * sum(mat.flt.R[t-1, mat.int.PRED[t-1, ]]) + mat.flt.E[t, ]
mat.int.PRED[t, ] <- mat.int.PRED[t-1, ]
vec.flt.RAND <- runif(scl.int.K, 0, 1)
for (p in 1:scl.int.K) {
if (vec.flt.RAND[p] <= scl.flt.PROB_SWITCH) {
vec.int.CHOICES <- which(!(seq(1, scl.int.N) %in% mat.int.PRED[t, ]) == TRUE)
mat.int.PRED[t, p] <- sample(vec.int.CHOICES, 1)
}
}
}
## @subsection: Forecast returns using AR(1)
for (t in 1:(scl.int.T-scl.int.T_FIT)) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef
vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
}
## @subsection: Forecast returns using oracle
for (t in 1:(scl.int.T-scl.int.T_FIT)) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
mat.flt.X <- foreach(r=t:(t+(scl.int.T_FIT-2)), .combine = "rbind") %do% {
vec.flt.X <- mat.flt.R[r, mat.int.PRED[r, ]]
return(vec.flt.X)
}
vec.flt.COEF <- matrix(lm(vec.flt.Y ~ mat.flt.X)$coef, ncol = 1)
vec.flt.NEWX <- matrix(c(1,mat.flt.R[t+(scl.int.T_FIT-1), mat.int.PRED[t+(scl.int.T_FIT-1), ]]), ncol = 1)
vec.flt.ORACLE[t+scl.int.T_FIT] <- t(vec.flt.COEF) %*% vec.flt.NEWX
}
## @subsection: Forecast returns using LASSO
vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005)
scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
for (t in 1:scl.int.T_LAM) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)
if (t <= scl.int.T_LAM) {
mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
}
}
vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
for (p in 1:scl.int.LAM_CHOICES_LEN) {
vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
}
scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)
vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit
vec.flt.COEF <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "coef", mode = "lambda")$coef
vec.int.TOT_PRED[t+scl.int.T_FIT] <- sum(vec.flt.COEF != 0)
vec.int.COR_PRED[t+scl.int.T_FIT] <- sum(mat.int.PRED[(t+(scl.int.T_FIT-1)),] %in% which(vec.flt.COEF != 0))
}
## @subsection: Estimate model fit
vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]
vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)
vec.flt.ORACLE <- vec.flt.ORACLE[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
vec.flt.ORACLE <- (vec.flt.ORACLE - mean(vec.flt.ORACLE))/sd(vec.flt.ORACLE)
vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)
scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
scl.flt.R2_ORACLE <- summary(lm(vec.flt.R ~ vec.flt.ORACLE))$adj.r.squared
scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared
scl.flt.TOT_PRED <- mean(vec.int.TOT_PRED, na.rm = TRUE)
scl.flt.COR_PRED <- mean(vec.int.COR_PRED, na.rm = TRUE)
## @subsection: Return results
return(c(i, scl.flt.R2_AR1, scl.flt.R2_ORACLE, scl.flt.R2_LASSO, scl.flt.R2_BOTH, scl.flt.LAM, scl.flt.TOT_PRED, scl.flt.COR_PRED))
}
save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--sparse-shocks--04dec2015.Rdata", sep = ""))
}
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--sparse-shocks--04dec2015.Rdata", sep = ""))
mat.flt.SIM <- mat.flt.SIM[mat.flt.SIM[,6] < 0.15, ]
###########################################################################
###########################################################################
## @section: Plot R^2 distributions
###########################################################################
###########################################################################
## @subsection: Create plot data
vec.flt.X <- seq(0, 0.20, length.out = 101)
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT <- data.frame(x = vec.flt.X,
ar1 = NA,
lasso = NA,
both = NA,
oracle = NA
)
for (i in 1:100) {
mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,5] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,5] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
mat.df.PLOT$oracle[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2
names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO", "Oracle")
mat.df.PLOT <- melt(mat.df.PLOT, c("x"))
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ]
mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO", "Oracle"),
avg = apply(mat.flt.SIM[, c(2,4,5,3)], 2, mean),
label =
c(paste("$\\phantom{1}",round(mean(mat.flt.SIM[,2]*100),2),"{\\scriptstyle \\%}$", sep=""),
paste("$\\phantom{1}",round(mean(mat.flt.SIM[,4]*100),2),"0{\\scriptstyle \\%}$", sep=""),
paste("$\\phantom{1}",round(mean(mat.flt.SIM[,5]*100),2),"{\\scriptstyle \\%}$", sep=""),
paste("$",round(mean(mat.flt.SIM[,3]*100),2),"{\\scriptstyle \\%}$", sep="")
)
)
## @subsection: Plot results
theme_set(theme_bw())
scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--sparse-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
obj.gg2.PLOT <- ggplot()
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
aes(xmin = x - scl.flt.dX/2,
xmax = x + scl.flt.dX/2,
ymin = 0,
ymax = value,
group = variable
)
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
size = 0.25,
colour = "black"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
aes(xintercept = avg,
group = variable
),
colour = "red",
size = 1.25
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
aes(x = avg + 0.011,
y = 0.105 * 4/5,
label = label
),
colour = "red",
angle = 270,
size = 4
)
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(0, 0.20),
breaks = c(0, 0.05, 0.10, 0.15, 0.20),
labels = c("$\\phantom{1}0{\\scriptstyle \\%}$","$\\phantom{1}5{\\scriptstyle \\%}$", "$10{\\scriptstyle \\%}$", "$15{\\scriptstyle \\%}$", "$20{\\scriptstyle \\%}$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.105),
breaks = c(0, 0.02, 0.04, 0.06, 0.08, 0.10),
labels = c("$0.00$","$0.02$", "$0.04$", "$0.06$", "$0.08$", "$0.10$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_text(vjust = 1.75),
panel.grid.minor = element_blank(),
legend.position = "none"
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: Sparse Shocks")
print(obj.gg2.PLOT)
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
###########################################################################
###########################################################################
## @section: Plot lambda distribution
###########################################################################
###########################################################################
## @subsection: Create plot data
vec.flt.X <- seq(0, max(mat.flt.SIM[,6]), length.out = 101)
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT <- data.frame(x = vec.flt.X, value = NA)
for (i in 1:100) {
mat.df.PLOT$value[i] <- length(which((mat.flt.SIM[,6] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,6] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2
mat.df.PLOT$xmax <- mat.df.PLOT$x + scl.flt.dX/2
mat.df.PLOT$xmin <- mat.df.PLOT$x - scl.flt.dX/2
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ]
## @subsection: Plot results
theme_set(theme_bw())
scl.str.RAW_FILE <- 'plot--lambda-distribution--simulated-data--sparse-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
obj.gg2.PLOT <- ggplot(data = mat.df.PLOT)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(aes(xmin = xmin,
xmax = xmax,
ymin = 0,
ymax = value
)
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
size = 0.25,
colour = "black"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(aes(xintercept = mean(mat.flt.SIM[,6])),
colour = "red",
size = 1.25
)
obj.gg2.PLOT <- obj.gg2.PLOT + annotate("text",
x = mean(mat.flt.SIM[,6]) + 0.00008,
y = 0.30 * 4/5,
label = paste("$", round(mean(mat.flt.SIM[,6]),4), "$", sep=""),
colour = "red",
angle = 270,
size = 4
)
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, \\lambda = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(0, 0.006),
breaks = c(0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006),
labels = c("$0.000$","$0.001$","$0.002$","$0.003$","$0.004$","$0.005$","$0.006$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.30),
breaks = c(0, 0.10, 0.20, 0.30),
labels = c("$0.00$","$0.10$", "$0.20$", "$0.30$")
)
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_text(vjust = 1.75),
panel.grid.minor = element_blank(),
legend.position = "none"
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("$\\lambda$ Distribution: Sparse Shocks")
print(obj.gg2.PLOT)
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
###########################################################################
###########################################################################
## @section: Plot number of predictors distribution
###########################################################################
###########################################################################
## @subsection: Create plot data, total number of predictors
scl.int.NUM_BUCKETS <- 100
vec.flt.X <- seq(0, max(mat.flt.SIM[,7]), length.out = (scl.int.NUM_BUCKETS + 1))
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT1 <- data.frame(x = vec.flt.X, value = NA)
for (i in 1:scl.int.NUM_BUCKETS) {
mat.df.PLOT1$value[i] <- length(which((mat.flt.SIM[,7] >= mat.df.PLOT1$x[i]) & (mat.flt.SIM[,7] < mat.df.PLOT1$x[i+1])))/scl.int.ITER
}
mat.df.PLOT1$x <- mat.df.PLOT1$x + scl.flt.dX/2
mat.df.PLOT1$xmax <- mat.df.PLOT1$x + scl.flt.dX/2
mat.df.PLOT1$xmin <- mat.df.PLOT1$x - scl.flt.dX/2
mat.df.PLOT1 <- mat.df.PLOT1[is.na(mat.df.PLOT1$value) == FALSE, ]
mat.df.PLOT1 <- mat.df.PLOT1[mat.df.PLOT1$value > 0, ]
## @subsection: Create plot data, number of correct predictors
scl.int.NUM_BUCKETS <- 100
vec.flt.X <- seq(0, max(mat.flt.SIM[,8]), length.out = (scl.int.NUM_BUCKETS + 1))
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT2 <- data.frame(x = vec.flt.X, value = NA)
for (i in 1:scl.int.NUM_BUCKETS) {
mat.df.PLOT2$value[i] <- length(which((mat.flt.SIM[,8] >= mat.df.PLOT2$x[i]) & (mat.flt.SIM[,8] < mat.df.PLOT2$x[i+1])))/scl.int.ITER
}
mat.df.PLOT2$x <- mat.df.PLOT2$x + scl.flt.dX/2
mat.df.PLOT2$xmax <- mat.df.PLOT2$x + scl.flt.dX/2
mat.df.PLOT2$xmin <- mat.df.PLOT2$x - scl.flt.dX/2
mat.df.PLOT2 <- mat.df.PLOT2[is.na(mat.df.PLOT2$value) == FALSE, ]
mat.df.PLOT2 <- mat.df.PLOT2[mat.df.PLOT2$value > 0, ]
## @subsection: Plot results
obj.vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
theme_set(theme_bw())
scl.str.RAW_FILE <- 'plot--predictor-distribution--simulated-data--sparse-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
## Left panel
obj.gg2.PLOT1 <- ggplot(data = mat.df.PLOT1)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_rect(aes(xmin = xmin,
xmax = xmax,
ymin = 0,
ymax = value
)
)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_hline(yintercept = 0,
size = 0.25,
colour = "black"
)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_vline(aes(xintercept = mean(mat.flt.SIM[,7])),
colour = "red",
size = 1.25
)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + annotate("text",
x = mean(mat.flt.SIM[,7]) + 30/35,
y = 0.10 * 4/5,
label = paste("$", round(mean(mat.flt.SIM[,7]),2), "$", sep=""),
colour = "red",
angle = 270,
size = 4
)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + xlab("")
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + ylab("$\\mathrm{Pr}[ \\langle\\text{\\# Predictors}\\rangle = x ]$")
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_y_continuous(limits = c(0, 0.10),
breaks = c(0, 0.02, 0.04, 0.06, 0.08, 0.10),
labels = c("$0.00$", "$0.02$", "$0.04$", "$0.06$", "$0.08$", "$0.10$")
)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_x_continuous(limits = c(0, 30),
breaks = c(0, 10, 20, 30),
labels = c("$\\phantom{1}0$", "$10$", "$20$", "$30$")
)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + theme(plot.margin = unit(c(0,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_blank(),
panel.grid.minor = element_blank()
)
## Right panel
obj.gg2.PLOT2 <- ggplot(data = mat.df.PLOT2)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_rect(aes(xmin = xmin,
xmax = xmax,
ymin = 0,
ymax = value
)
)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_hline(yintercept = 0,
size = 0.25,
colour = "black"
)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_vline(aes(xintercept = mean(mat.flt.SIM[,8])),
colour = "red",
size = 1.25
)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + annotate("text",
x = mean(mat.flt.SIM[,8]) + 2.5/35,
y = 0.08 * 4/5,
label = paste("$\\phantom{1}", round(mean(mat.flt.SIM[,8]),2), "$", sep=""),
colour = "red",
angle = 270,
size = 4
)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + xlab("")
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + ylab("$\\mathrm{Pr}[ \\langle\\text{\\# Correct}\\rangle = x ]$")
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_y_continuous(limits = c(0, 0.08),
breaks = c(0, 0.02, 0.04, 0.06, 0.08),
labels = c("$0.00$", "$0.02$", "$0.04$", "$0.06$", "$0.08$")
)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_x_continuous(limits = c(0, 2.5),
breaks = c(0, 0.5, 1.0, 1.5, 2.0, 2.5),
labels = c("$0.0$", "$0.5$", "$1.0$", "$1.5$", "$2.0$", "$2.5$")
)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + theme(plot.margin = unit(c(0,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_blank(),
panel.grid.minor = element_blank()
)
## Generate output
pushViewport(viewport(layout = grid.layout(5, 2)))
grid.text("Predictor Distribution: Sparse Shocks", vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2), vjust = 0.20, gp = gpar(fontsize=15))
print(obj.gg2.PLOT1, vp = obj.vplayout(2:5,1))
print(obj.gg2.PLOT2, vp = obj.vplayout(2:5,2))
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment