Created
December 5, 2015 17:07
-
-
Save alexchinco/467325abbf11d5c8f565 to your computer and use it in GitHub Desktop.
Simulation analysis of using the LASSO to forecast stock returns.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################### | |
########################################################################### | |
## @purpose: Simulate return data with dense shocks and show that the | |
## LASSO doesn't add any forecasting power. | |
## ------------------------------------------------------------------------ | |
## @author: Alex Chinco | |
## @date: 04-DEC-2015 | |
########################################################################### | |
########################################################################### | |
########################################################################### | |
########################################################################### | |
## @section: Prep workspace | |
########################################################################### | |
########################################################################### | |
options(width=120, digits=6, digits.secs=6) | |
rm(list=ls()) | |
library(foreign) | |
library(grid) | |
library(plyr) | |
library(ggplot2) | |
library(reshape) | |
library(vars) | |
library(lars) | |
library(tikzDevice) | |
print(options('tikzLatexPackages')) | |
options(tikzLatexPackages = | |
c("\\usepackage{tikz}\n", | |
"\\usepackage[active,tightpage,psfixbb]{preview}\n", | |
"\\PreviewEnvironment{pgfpicture}\n", | |
"\\setlength\\PreviewBorder{0pt}\n", | |
"\\usepackage{amsmath}\n", | |
"\\usepackage{pxfonts}\n", | |
"\\usepackage{bm}\n", | |
"\\usepackage{xfrac}\n" | |
) | |
) | |
setTikzDefaults(overwrite = FALSE) | |
print(options('tikzLatexPackages')) | |
library(foreach) | |
library(doMC) | |
registerDoMC(7) | |
scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/" | |
scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/" | |
set.seed(1234) | |
########################################################################### | |
########################################################################### | |
## @section: Define global parameters | |
########################################################################### | |
########################################################################### | |
scl.int.ITER <- 10^3 # Number of iterations to run | |
scl.int.T_FIT <- 50 # Number of minutes used to fit models | |
scl.int.T_LAM <- 100 # Number of minutes used to pick lambda | |
scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes | |
scl.int.N <- 100 # Number of stocks | |
scl.int.K <- 75 # Number of predictors | |
scl.flt.SIG <- 0.001 # Noise volatility | |
scl.flt.TET <- 0.150 * 5 / 75 # Size of dense shocks (same total magnitude as sparse shocks) | |
scl.flt.PROB_SWITCH <- 0.01 # Expected time before switch is (1 - probSwitch)/probSwitch. | |
########################################################################### | |
########################################################################### | |
## @section: Simulate data and forecast returns | |
########################################################################### | |
########################################################################### | |
if (TRUE == FALSE) { | |
mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% { | |
print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = "")) | |
## @subsection: Initialize data objects | |
mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N) | |
mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N) | |
mat.flt.R[1, ] <- 0 | |
mat.int.PRED <- matrix(rep(NA, scl.int.T * scl.int.K), ncol = scl.int.K) | |
mat.int.PRED[1, ] <- sample(scl.int.N, scl.int.K) | |
vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results | |
vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results | |
## @subsection: Simulate data | |
for (t in 2:scl.int.T) { | |
mat.flt.R[t, ] <- scl.flt.TET * sum(mat.flt.R[t-1, mat.int.PRED[t-1, ]]) + mat.flt.E[t, ] | |
mat.int.PRED[t, ] <- mat.int.PRED[t-1, ] | |
vec.flt.RAND <- runif(scl.int.K, 0, 1) | |
for (p in 1:scl.int.K) { | |
if (vec.flt.RAND[p] <= scl.flt.PROB_SWITCH) { | |
vec.int.CHOICES <- which(!(seq(1, scl.int.N) %in% mat.int.PRED[t, ]) == TRUE) | |
mat.int.PRED[t, p] <- sample(vec.int.CHOICES, 1) | |
} | |
} | |
} | |
## @subsection: Forecast returns using AR(1) | |
for (t in 1:(scl.int.T-scl.int.T_FIT)) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1] | |
vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef | |
vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1) | |
} | |
## @subsection: Forecast returns using LASSO | |
vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005) | |
scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES) | |
mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN) | |
for (t in 1:scl.int.T_LAM) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ] | |
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE) | |
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1) | |
if (t <= scl.int.T_LAM) { | |
mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit | |
} | |
} | |
vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN) | |
for (p in 1:scl.int.LAM_CHOICES_LEN) { | |
vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared | |
} | |
scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1) | |
for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ] | |
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE) | |
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1) | |
vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit | |
} | |
## @subsection: Estimate model fit | |
vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1] | |
vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T] | |
vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1) | |
vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T] | |
vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO) | |
scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared | |
scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared | |
scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared | |
## @subsection: Return results | |
return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH)) | |
} | |
save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = "")) | |
} | |
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = "")) | |
########################################################################### | |
########################################################################### | |
## @section: Plot R^2 distributions | |
########################################################################### | |
########################################################################### | |
## @subsection: Create plot data | |
vec.flt.X <- seq(-0.005, 0.02, length.out = 101) | |
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1] | |
mat.df.PLOT <- data.frame(x = vec.flt.X, | |
ar1 = NA, | |
lasso = NA, | |
both = NA | |
) | |
for (i in 1:100) { | |
mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
} | |
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2 | |
names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO") | |
mat.df.PLOT <- melt(mat.df.PLOT, c("x")) | |
mat.df.PLOT$xmin <- mat.df.PLOT$x - scl.flt.dX/2 | |
mat.df.PLOT$xmax <- mat.df.PLOT$x + scl.flt.dX/2 | |
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ] | |
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ] | |
mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"), | |
avg = apply(mat.flt.SIM[, c(2,3,4)], 2, mean), | |
label = | |
c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""), | |
paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""), | |
paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="") | |
) | |
) | |
## @subsection: Plot results | |
theme_set(theme_bw()) | |
scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--dense-shocks' | |
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='') | |
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='') | |
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='') | |
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='') | |
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='') | |
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE) | |
obj.gg2.PLOT <- ggplot() | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1") | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT, | |
aes(xmin = xmin, | |
xmax = xmax, | |
ymin = 0, | |
ymax = value, | |
group = variable | |
) | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0, | |
size = 0.25, | |
colour = "black" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2, | |
aes(xintercept = avg, | |
group = variable | |
), | |
colour = "red", | |
size = 1.25 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2, | |
aes(x = avg + 0.001, | |
y = 0.40 * 4/5, | |
label = label | |
), | |
colour = "red", | |
angle = 270, | |
size = 4 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("") | |
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$") | |
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.005, 0.020), | |
breaks = c(0.00, 0.005, 0.010, 0.015), | |
labels = c("$0.0{\\scriptstyle \\%}$", "$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$", "$1.5{\\scriptstyle \\%}$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.40), | |
breaks = c(0, 0.10, 0.20, 0.30, 0.40), | |
labels = c("$0.00$","$0.10$", "$0.20$", "$0.30$", "$0.40$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"), | |
axis.text = element_text(size = 10), | |
axis.title = element_text(size = 10), | |
plot.title = element_text(vjust = 1.75), | |
panel.grid.minor = element_blank(), | |
legend.position = "none" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: Dense Shocks") | |
print(obj.gg2.PLOT) | |
dev.off() | |
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE) | |
system(paste('rm ', scl.str.TEX_FILE, sep = '')) | |
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = '')) | |
system(paste('rm ', scl.str.AUX_FILE, sep = '')) | |
system(paste('rm ', scl.str.LOG_FILE, sep = '')) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################### | |
########################################################################### | |
## @purpose: Simulate return data with no shocks and show that the LASSO | |
## does add any predictive power. | |
## ------------------------------------------------------------------------ | |
## @author: Alex Chinco | |
## @date: 04-DEC-2015 | |
########################################################################### | |
########################################################################### | |
########################################################################### | |
########################################################################### | |
## @section: Prep workspace | |
########################################################################### | |
########################################################################### | |
options(width=120, digits=6, digits.secs=6) | |
rm(list=ls()) | |
library(foreign) | |
library(grid) | |
library(plyr) | |
library(ggplot2) | |
library(reshape) | |
library(vars) | |
library(lars) | |
library(tikzDevice) | |
print(options('tikzLatexPackages')) | |
options(tikzLatexPackages = | |
c("\\usepackage{tikz}\n", | |
"\\usepackage[active,tightpage,psfixbb]{preview}\n", | |
"\\PreviewEnvironment{pgfpicture}\n", | |
"\\setlength\\PreviewBorder{0pt}\n", | |
"\\usepackage{amsmath}\n", | |
"\\usepackage{pxfonts}\n", | |
"\\usepackage{bm}\n", | |
"\\usepackage{xfrac}\n" | |
) | |
) | |
setTikzDefaults(overwrite = FALSE) | |
print(options('tikzLatexPackages')) | |
library(foreach) | |
library(doMC) | |
registerDoMC(7) | |
scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/" | |
scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/" | |
set.seed(1234) | |
########################################################################### | |
########################################################################### | |
## @section: Define global parameters | |
########################################################################### | |
########################################################################### | |
scl.int.ITER <- 10^3 # Number of iterations to run | |
scl.int.T_FIT <- 50 # Number of minutes used to fit models | |
scl.int.T_LAM <- 100 # Number of minutes used to pick lambda | |
scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes | |
scl.int.N <- 100 # Number of stocks | |
scl.int.K <- 5 # Number of predictors | |
scl.flt.SIG <- 0.001 # Noise volatility | |
########################################################################### | |
########################################################################### | |
## @section: Simulate data and forecast returns | |
########################################################################### | |
########################################################################### | |
if (TRUE == FALSE) { | |
mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% { | |
print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = "")) | |
## @subsection: Initialize data objects | |
mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N) | |
mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N) | |
mat.flt.R[1, ] <- 0 | |
vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results | |
vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results | |
## @subsection: Simulate data | |
for (t in 2:scl.int.T) { | |
mat.flt.R[t, ] <- mat.flt.E[t, ] | |
} | |
## @subsection: Forecast returns using AR(1) | |
for (t in 1:(scl.int.T-scl.int.T_FIT)) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1] | |
vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef | |
vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1) | |
} | |
## @subsection: Forecast returns using LASSO | |
vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005) | |
scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES) | |
mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN) | |
for (t in 1:scl.int.T_LAM) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ] | |
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE) | |
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1) | |
if (t <= scl.int.T_LAM) { | |
mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit | |
} | |
} | |
vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN) | |
for (p in 1:scl.int.LAM_CHOICES_LEN) { | |
vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared | |
} | |
scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1) | |
for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ] | |
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE) | |
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1) | |
vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit | |
} | |
## @subsection: Estimate model fit | |
vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1] | |
vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T] | |
vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1) | |
vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T] | |
vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO) | |
scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared | |
scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared | |
scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared | |
## @subsection: Return results | |
return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH)) | |
} | |
save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = "")) | |
} | |
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = "")) | |
########################################################################### | |
########################################################################### | |
## @section: Plot R^2 distributions | |
########################################################################### | |
########################################################################### | |
## @subsection: Create plot data | |
vec.flt.X <- seq(-0.005, 0.0125, length.out = 101) | |
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1] | |
mat.df.PLOT <- data.frame(x = vec.flt.X, | |
ar1 = NA, | |
lasso = NA, | |
both = NA | |
) | |
for (i in 1:100) { | |
mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
} | |
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2 | |
names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO") | |
mat.df.PLOT <- melt(mat.df.PLOT, c("x")) | |
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ] | |
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ] | |
mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"), | |
avg = apply(mat.flt.SIM[, c(2,3,4)], 2, mean), | |
label = | |
c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""), | |
paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""), | |
paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="") | |
) | |
) | |
## @subsection: Plot results | |
theme_set(theme_bw()) | |
scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--no-shocks' | |
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='') | |
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='') | |
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='') | |
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='') | |
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='') | |
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE) | |
obj.gg2.PLOT <- ggplot() | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1") | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT, | |
aes(xmin = x - scl.flt.dX/2, | |
xmax = x + scl.flt.dX/2, | |
ymin = 0, | |
ymax = value, | |
group = variable | |
) | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0, | |
size = 0.25, | |
colour = "black" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2, | |
aes(xintercept = avg, | |
group = variable | |
), | |
colour = "red", | |
size = 1.25 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2, | |
aes(x = avg + 0.00065, | |
y = 0.21 * 4/5, | |
label = label | |
), | |
colour = "red", | |
angle = 270, | |
size = 4 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("") | |
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$") | |
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.0025, 0.0125), | |
breaks = c(0.00, 0.005, 0.01), | |
labels = c("$0.0{\\scriptstyle \\%}$","$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.21), | |
breaks = c(0, 0.10, 0.20), | |
labels = c("$0.00$","$0.10$", "$0.20$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"), | |
axis.text = element_text(size = 10), | |
axis.title = element_text(size = 10), | |
plot.title = element_text(vjust = 1.75), | |
panel.grid.minor = element_blank(), | |
legend.position = "none" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: No Shocks") | |
print(obj.gg2.PLOT) | |
dev.off() | |
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE) | |
system(paste('rm ', scl.str.TEX_FILE, sep = '')) | |
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = '')) | |
system(paste('rm ', scl.str.AUX_FILE, sep = '')) | |
system(paste('rm ', scl.str.LOG_FILE, sep = '')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################################### | |
########################################################################### | |
## @purpose: Simulate return data with sparse shocks and show that the | |
## LASSO can identify these shocks while AR(1) can't. | |
## ------------------------------------------------------------------------ | |
## @author: Alex Chinco | |
## @date: 04-DEC-2015 | |
########################################################################### | |
########################################################################### | |
########################################################################### | |
########################################################################### | |
## @section: Prep workspace | |
########################################################################### | |
########################################################################### | |
options(width=120, digits=6, digits.secs=6) | |
rm(list=ls()) | |
library(foreign) | |
library(grid) | |
library(plyr) | |
library(ggplot2) | |
library(reshape) | |
library(vars) | |
library(lars) | |
library(tikzDevice) | |
print(options('tikzLatexPackages')) | |
options(tikzLatexPackages = | |
c("\\usepackage{tikz}\n", | |
"\\usepackage[active,tightpage,psfixbb]{preview}\n", | |
"\\PreviewEnvironment{pgfpicture}\n", | |
"\\setlength\\PreviewBorder{0pt}\n", | |
"\\usepackage{amsmath}\n", | |
"\\usepackage{pxfonts}\n", | |
"\\usepackage{bm}\n", | |
"\\usepackage{xfrac}\n" | |
) | |
) | |
setTikzDefaults(overwrite = FALSE) | |
print(options('tikzLatexPackages')) | |
library(foreach) | |
library(doMC) | |
registerDoMC(7) | |
scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/" | |
scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/" | |
set.seed(1234) | |
########################################################################### | |
########################################################################### | |
## @section: Define global parameters | |
########################################################################### | |
########################################################################### | |
scl.int.ITER <- 10^3 # Number of iterations to run | |
scl.int.T_FIT <- 50 # Number of minutes used to fit models | |
scl.int.T_LAM <- 100 # Number of minutes used to pick lambda | |
scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes | |
scl.int.N <- 100 # Number of stocks | |
scl.int.K <- 5 # Number of predictors | |
scl.flt.SIG <- 0.001 # Noise volatility | |
scl.flt.TET <- 0.150 # Size of sparse shocks | |
scl.flt.PROB_SWITCH <- 0.01 # Expected time before switch is (1 - probSwitch)/probSwitch. | |
########################################################################### | |
########################################################################### | |
## @section: Simulate data and forecast returns | |
########################################################################### | |
########################################################################### | |
if (TRUE == FALSE) { | |
mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% { | |
print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = "")) | |
## @subsection: Initialize data objects | |
mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N) | |
mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N) | |
mat.flt.R[1, ] <- 0 | |
mat.int.PRED <- matrix(rep(NA, scl.int.T * scl.int.K), ncol = scl.int.K) | |
mat.int.PRED[1, ] <- sample(scl.int.N, scl.int.K) | |
vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results | |
vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results | |
vec.flt.ORACLE <- rep(NA, scl.int.T) # ORACLE forecast results | |
vec.int.TOT_PRED <- rep(NA, scl.int.T) # Total number of predictors selected by the LASSO | |
vec.int.COR_PRED <- rep(NA, scl.int.T) # Number of correct predictors selected by the LASSO | |
## @subsection: Simulate data | |
for (t in 2:scl.int.T) { | |
mat.flt.R[t, ] <- scl.flt.TET * sum(mat.flt.R[t-1, mat.int.PRED[t-1, ]]) + mat.flt.E[t, ] | |
mat.int.PRED[t, ] <- mat.int.PRED[t-1, ] | |
vec.flt.RAND <- runif(scl.int.K, 0, 1) | |
for (p in 1:scl.int.K) { | |
if (vec.flt.RAND[p] <= scl.flt.PROB_SWITCH) { | |
vec.int.CHOICES <- which(!(seq(1, scl.int.N) %in% mat.int.PRED[t, ]) == TRUE) | |
mat.int.PRED[t, p] <- sample(vec.int.CHOICES, 1) | |
} | |
} | |
} | |
## @subsection: Forecast returns using AR(1) | |
for (t in 1:(scl.int.T-scl.int.T_FIT)) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1] | |
vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef | |
vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1) | |
} | |
## @subsection: Forecast returns using oracle | |
for (t in 1:(scl.int.T-scl.int.T_FIT)) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
mat.flt.X <- foreach(r=t:(t+(scl.int.T_FIT-2)), .combine = "rbind") %do% { | |
vec.flt.X <- mat.flt.R[r, mat.int.PRED[r, ]] | |
return(vec.flt.X) | |
} | |
vec.flt.COEF <- matrix(lm(vec.flt.Y ~ mat.flt.X)$coef, ncol = 1) | |
vec.flt.NEWX <- matrix(c(1,mat.flt.R[t+(scl.int.T_FIT-1), mat.int.PRED[t+(scl.int.T_FIT-1), ]]), ncol = 1) | |
vec.flt.ORACLE[t+scl.int.T_FIT] <- t(vec.flt.COEF) %*% vec.flt.NEWX | |
} | |
## @subsection: Forecast returns using LASSO | |
vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005) | |
scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES) | |
mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN) | |
for (t in 1:scl.int.T_LAM) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ] | |
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE) | |
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1) | |
if (t <= scl.int.T_LAM) { | |
mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit | |
} | |
} | |
vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN) | |
for (p in 1:scl.int.LAM_CHOICES_LEN) { | |
vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared | |
} | |
scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1) | |
for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) { | |
vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1] | |
mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ] | |
obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE) | |
vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1) | |
vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit | |
vec.flt.COEF <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "coef", mode = "lambda")$coef | |
vec.int.TOT_PRED[t+scl.int.T_FIT] <- sum(vec.flt.COEF != 0) | |
vec.int.COR_PRED[t+scl.int.T_FIT] <- sum(mat.int.PRED[(t+(scl.int.T_FIT-1)),] %in% which(vec.flt.COEF != 0)) | |
} | |
## @subsection: Estimate model fit | |
vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1] | |
vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T] | |
vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1) | |
vec.flt.ORACLE <- vec.flt.ORACLE[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T] | |
vec.flt.ORACLE <- (vec.flt.ORACLE - mean(vec.flt.ORACLE))/sd(vec.flt.ORACLE) | |
vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T] | |
vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO) | |
scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared | |
scl.flt.R2_ORACLE <- summary(lm(vec.flt.R ~ vec.flt.ORACLE))$adj.r.squared | |
scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared | |
scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared | |
scl.flt.TOT_PRED <- mean(vec.int.TOT_PRED, na.rm = TRUE) | |
scl.flt.COR_PRED <- mean(vec.int.COR_PRED, na.rm = TRUE) | |
## @subsection: Return results | |
return(c(i, scl.flt.R2_AR1, scl.flt.R2_ORACLE, scl.flt.R2_LASSO, scl.flt.R2_BOTH, scl.flt.LAM, scl.flt.TOT_PRED, scl.flt.COR_PRED)) | |
} | |
save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--sparse-shocks--04dec2015.Rdata", sep = "")) | |
} | |
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--sparse-shocks--04dec2015.Rdata", sep = "")) | |
mat.flt.SIM <- mat.flt.SIM[mat.flt.SIM[,6] < 0.15, ] | |
########################################################################### | |
########################################################################### | |
## @section: Plot R^2 distributions | |
########################################################################### | |
########################################################################### | |
## @subsection: Create plot data | |
vec.flt.X <- seq(0, 0.20, length.out = 101) | |
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1] | |
mat.df.PLOT <- data.frame(x = vec.flt.X, | |
ar1 = NA, | |
lasso = NA, | |
both = NA, | |
oracle = NA | |
) | |
for (i in 1:100) { | |
mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,5] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,5] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
mat.df.PLOT$oracle[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
} | |
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2 | |
names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO", "Oracle") | |
mat.df.PLOT <- melt(mat.df.PLOT, c("x")) | |
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ] | |
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ] | |
mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO", "Oracle"), | |
avg = apply(mat.flt.SIM[, c(2,4,5,3)], 2, mean), | |
label = | |
c(paste("$\\phantom{1}",round(mean(mat.flt.SIM[,2]*100),2),"{\\scriptstyle \\%}$", sep=""), | |
paste("$\\phantom{1}",round(mean(mat.flt.SIM[,4]*100),2),"0{\\scriptstyle \\%}$", sep=""), | |
paste("$\\phantom{1}",round(mean(mat.flt.SIM[,5]*100),2),"{\\scriptstyle \\%}$", sep=""), | |
paste("$",round(mean(mat.flt.SIM[,3]*100),2),"{\\scriptstyle \\%}$", sep="") | |
) | |
) | |
## @subsection: Plot results | |
theme_set(theme_bw()) | |
scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--sparse-shocks' | |
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='') | |
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='') | |
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='') | |
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='') | |
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='') | |
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE) | |
obj.gg2.PLOT <- ggplot() | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1") | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT, | |
aes(xmin = x - scl.flt.dX/2, | |
xmax = x + scl.flt.dX/2, | |
ymin = 0, | |
ymax = value, | |
group = variable | |
) | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0, | |
size = 0.25, | |
colour = "black" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2, | |
aes(xintercept = avg, | |
group = variable | |
), | |
colour = "red", | |
size = 1.25 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2, | |
aes(x = avg + 0.011, | |
y = 0.105 * 4/5, | |
label = label | |
), | |
colour = "red", | |
angle = 270, | |
size = 4 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("") | |
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$") | |
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(0, 0.20), | |
breaks = c(0, 0.05, 0.10, 0.15, 0.20), | |
labels = c("$\\phantom{1}0{\\scriptstyle \\%}$","$\\phantom{1}5{\\scriptstyle \\%}$", "$10{\\scriptstyle \\%}$", "$15{\\scriptstyle \\%}$", "$20{\\scriptstyle \\%}$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.105), | |
breaks = c(0, 0.02, 0.04, 0.06, 0.08, 0.10), | |
labels = c("$0.00$","$0.02$", "$0.04$", "$0.06$", "$0.08$", "$0.10$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"), | |
axis.text = element_text(size = 10), | |
axis.title = element_text(size = 10), | |
plot.title = element_text(vjust = 1.75), | |
panel.grid.minor = element_blank(), | |
legend.position = "none" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: Sparse Shocks") | |
print(obj.gg2.PLOT) | |
dev.off() | |
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE) | |
system(paste('rm ', scl.str.TEX_FILE, sep = '')) | |
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = '')) | |
system(paste('rm ', scl.str.AUX_FILE, sep = '')) | |
system(paste('rm ', scl.str.LOG_FILE, sep = '')) | |
########################################################################### | |
########################################################################### | |
## @section: Plot lambda distribution | |
########################################################################### | |
########################################################################### | |
## @subsection: Create plot data | |
vec.flt.X <- seq(0, max(mat.flt.SIM[,6]), length.out = 101) | |
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1] | |
mat.df.PLOT <- data.frame(x = vec.flt.X, value = NA) | |
for (i in 1:100) { | |
mat.df.PLOT$value[i] <- length(which((mat.flt.SIM[,6] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,6] <= mat.df.PLOT$x[i+1])))/scl.int.ITER | |
} | |
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2 | |
mat.df.PLOT$xmax <- mat.df.PLOT$x + scl.flt.dX/2 | |
mat.df.PLOT$xmin <- mat.df.PLOT$x - scl.flt.dX/2 | |
mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ] | |
mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ] | |
## @subsection: Plot results | |
theme_set(theme_bw()) | |
scl.str.RAW_FILE <- 'plot--lambda-distribution--simulated-data--sparse-shocks' | |
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='') | |
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='') | |
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='') | |
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='') | |
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='') | |
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE) | |
obj.gg2.PLOT <- ggplot(data = mat.df.PLOT) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1") | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(aes(xmin = xmin, | |
xmax = xmax, | |
ymin = 0, | |
ymax = value | |
) | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0, | |
size = 0.25, | |
colour = "black" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(aes(xintercept = mean(mat.flt.SIM[,6])), | |
colour = "red", | |
size = 1.25 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + annotate("text", | |
x = mean(mat.flt.SIM[,6]) + 0.00008, | |
y = 0.30 * 4/5, | |
label = paste("$", round(mean(mat.flt.SIM[,6]),4), "$", sep=""), | |
colour = "red", | |
angle = 270, | |
size = 4 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("") | |
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, \\lambda = x \\, ]$") | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(0, 0.006), | |
breaks = c(0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006), | |
labels = c("$0.000$","$0.001$","$0.002$","$0.003$","$0.004$","$0.005$","$0.006$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.30), | |
breaks = c(0, 0.10, 0.20, 0.30), | |
labels = c("$0.00$","$0.10$", "$0.20$", "$0.30$") | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"), | |
axis.text = element_text(size = 10), | |
axis.title = element_text(size = 10), | |
plot.title = element_text(vjust = 1.75), | |
panel.grid.minor = element_blank(), | |
legend.position = "none" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("$\\lambda$ Distribution: Sparse Shocks") | |
print(obj.gg2.PLOT) | |
dev.off() | |
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE) | |
system(paste('rm ', scl.str.TEX_FILE, sep = '')) | |
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = '')) | |
system(paste('rm ', scl.str.AUX_FILE, sep = '')) | |
system(paste('rm ', scl.str.LOG_FILE, sep = '')) | |
########################################################################### | |
########################################################################### | |
## @section: Plot number of predictors distribution | |
########################################################################### | |
########################################################################### | |
## @subsection: Create plot data, total number of predictors | |
scl.int.NUM_BUCKETS <- 100 | |
vec.flt.X <- seq(0, max(mat.flt.SIM[,7]), length.out = (scl.int.NUM_BUCKETS + 1)) | |
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1] | |
mat.df.PLOT1 <- data.frame(x = vec.flt.X, value = NA) | |
for (i in 1:scl.int.NUM_BUCKETS) { | |
mat.df.PLOT1$value[i] <- length(which((mat.flt.SIM[,7] >= mat.df.PLOT1$x[i]) & (mat.flt.SIM[,7] < mat.df.PLOT1$x[i+1])))/scl.int.ITER | |
} | |
mat.df.PLOT1$x <- mat.df.PLOT1$x + scl.flt.dX/2 | |
mat.df.PLOT1$xmax <- mat.df.PLOT1$x + scl.flt.dX/2 | |
mat.df.PLOT1$xmin <- mat.df.PLOT1$x - scl.flt.dX/2 | |
mat.df.PLOT1 <- mat.df.PLOT1[is.na(mat.df.PLOT1$value) == FALSE, ] | |
mat.df.PLOT1 <- mat.df.PLOT1[mat.df.PLOT1$value > 0, ] | |
## @subsection: Create plot data, number of correct predictors | |
scl.int.NUM_BUCKETS <- 100 | |
vec.flt.X <- seq(0, max(mat.flt.SIM[,8]), length.out = (scl.int.NUM_BUCKETS + 1)) | |
scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1] | |
mat.df.PLOT2 <- data.frame(x = vec.flt.X, value = NA) | |
for (i in 1:scl.int.NUM_BUCKETS) { | |
mat.df.PLOT2$value[i] <- length(which((mat.flt.SIM[,8] >= mat.df.PLOT2$x[i]) & (mat.flt.SIM[,8] < mat.df.PLOT2$x[i+1])))/scl.int.ITER | |
} | |
mat.df.PLOT2$x <- mat.df.PLOT2$x + scl.flt.dX/2 | |
mat.df.PLOT2$xmax <- mat.df.PLOT2$x + scl.flt.dX/2 | |
mat.df.PLOT2$xmin <- mat.df.PLOT2$x - scl.flt.dX/2 | |
mat.df.PLOT2 <- mat.df.PLOT2[is.na(mat.df.PLOT2$value) == FALSE, ] | |
mat.df.PLOT2 <- mat.df.PLOT2[mat.df.PLOT2$value > 0, ] | |
## @subsection: Plot results | |
obj.vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y) | |
theme_set(theme_bw()) | |
scl.str.RAW_FILE <- 'plot--predictor-distribution--simulated-data--sparse-shocks' | |
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='') | |
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='') | |
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='') | |
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='') | |
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='') | |
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE) | |
## Left panel | |
obj.gg2.PLOT1 <- ggplot(data = mat.df.PLOT1) | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_colour_brewer(palette="Set1") | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_rect(aes(xmin = xmin, | |
xmax = xmax, | |
ymin = 0, | |
ymax = value | |
) | |
) | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_hline(yintercept = 0, | |
size = 0.25, | |
colour = "black" | |
) | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_vline(aes(xintercept = mean(mat.flt.SIM[,7])), | |
colour = "red", | |
size = 1.25 | |
) | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + annotate("text", | |
x = mean(mat.flt.SIM[,7]) + 30/35, | |
y = 0.10 * 4/5, | |
label = paste("$", round(mean(mat.flt.SIM[,7]),2), "$", sep=""), | |
colour = "red", | |
angle = 270, | |
size = 4 | |
) | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + xlab("") | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + ylab("$\\mathrm{Pr}[ \\langle\\text{\\# Predictors}\\rangle = x ]$") | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_y_continuous(limits = c(0, 0.10), | |
breaks = c(0, 0.02, 0.04, 0.06, 0.08, 0.10), | |
labels = c("$0.00$", "$0.02$", "$0.04$", "$0.06$", "$0.08$", "$0.10$") | |
) | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_x_continuous(limits = c(0, 30), | |
breaks = c(0, 10, 20, 30), | |
labels = c("$\\phantom{1}0$", "$10$", "$20$", "$30$") | |
) | |
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + theme(plot.margin = unit(c(0,0.15,-0.85,0.15), "lines"), | |
axis.text = element_text(size = 10), | |
axis.title = element_text(size = 10), | |
plot.title = element_blank(), | |
panel.grid.minor = element_blank() | |
) | |
## Right panel | |
obj.gg2.PLOT2 <- ggplot(data = mat.df.PLOT2) | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_colour_brewer(palette="Set1") | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_rect(aes(xmin = xmin, | |
xmax = xmax, | |
ymin = 0, | |
ymax = value | |
) | |
) | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_hline(yintercept = 0, | |
size = 0.25, | |
colour = "black" | |
) | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_vline(aes(xintercept = mean(mat.flt.SIM[,8])), | |
colour = "red", | |
size = 1.25 | |
) | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + annotate("text", | |
x = mean(mat.flt.SIM[,8]) + 2.5/35, | |
y = 0.08 * 4/5, | |
label = paste("$\\phantom{1}", round(mean(mat.flt.SIM[,8]),2), "$", sep=""), | |
colour = "red", | |
angle = 270, | |
size = 4 | |
) | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + xlab("") | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + ylab("$\\mathrm{Pr}[ \\langle\\text{\\# Correct}\\rangle = x ]$") | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_y_continuous(limits = c(0, 0.08), | |
breaks = c(0, 0.02, 0.04, 0.06, 0.08), | |
labels = c("$0.00$", "$0.02$", "$0.04$", "$0.06$", "$0.08$") | |
) | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_x_continuous(limits = c(0, 2.5), | |
breaks = c(0, 0.5, 1.0, 1.5, 2.0, 2.5), | |
labels = c("$0.0$", "$0.5$", "$1.0$", "$1.5$", "$2.0$", "$2.5$") | |
) | |
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + theme(plot.margin = unit(c(0,0.15,-0.85,0.15), "lines"), | |
axis.text = element_text(size = 10), | |
axis.title = element_text(size = 10), | |
plot.title = element_blank(), | |
panel.grid.minor = element_blank() | |
) | |
## Generate output | |
pushViewport(viewport(layout = grid.layout(5, 2))) | |
grid.text("Predictor Distribution: Sparse Shocks", vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2), vjust = 0.20, gp = gpar(fontsize=15)) | |
print(obj.gg2.PLOT1, vp = obj.vplayout(2:5,1)) | |
print(obj.gg2.PLOT2, vp = obj.vplayout(2:5,2)) | |
dev.off() | |
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE) | |
system(paste('rm ', scl.str.TEX_FILE, sep = '')) | |
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = '')) | |
system(paste('rm ', scl.str.AUX_FILE, sep = '')) | |
system(paste('rm ', scl.str.LOG_FILE, sep = '')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment