alexchinco/prog--lasso-forecast--simulated-data--dense-shocks--04dec2015.R

## prog--lasso-forecast--simulated-data--dense-shocks--04dec2015.R
###########################################################################
###########################################################################
## @purpose: Simulate return data with dense shocks and show that the
##           LASSO doesn't add any forecasting power.
## ------------------------------------------------------------------------
## @author:  Alex Chinco
## @date:    04-DEC-2015
###########################################################################
###########################################################################


###########################################################################
###########################################################################
## @section: Prep workspace
###########################################################################
###########################################################################

options(width=120, digits=6, digits.secs=6)
rm(list=ls())

library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(reshape)
library(vars)
library(lars)

library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
        c("\\usepackage{tikz}\n",
          "\\usepackage[active,tightpage,psfixbb]{preview}\n",
          "\\PreviewEnvironment{pgfpicture}\n",
          "\\setlength\\PreviewBorder{0pt}\n",
          "\\usepackage{amsmath}\n",
          "\\usepackage{pxfonts}\n",
          "\\usepackage{bm}\n",
          "\\usepackage{xfrac}\n"
          )
        )
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))

library(foreach)
library(doMC)
registerDoMC(7)

scl.str.DAT_DIR  <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
scl.str.FIG_DIR  <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"

set.seed(1234)


###########################################################################
###########################################################################
## @section: Define global parameters
###########################################################################
###########################################################################

scl.int.ITER <- 10^3 # Number of iterations to run

scl.int.T_FIT <- 50                                   # Number of minutes used to fit models
scl.int.T_LAM <- 100                                  # Number of minutes used to pick lambda
scl.int.T     <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes

scl.int.N <- 100 # Number of stocks
scl.int.K <- 75   # Number of predictors

scl.flt.SIG <- 0.001          # Noise volatility
scl.flt.TET <- 0.150 * 5 / 75 # Size of dense shocks (same total magnitude as sparse shocks)

scl.flt.PROB_SWITCH <- 0.01 # Expected time before switch is (1 - probSwitch)/probSwitch.


###########################################################################
###########################################################################
## @section: Simulate data and forecast returns
###########################################################################
###########################################################################

if (TRUE == FALSE) {
    mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
        print(paste("    Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))

        ## @subsection: Initialize data objects
        mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)

        mat.flt.R      <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
        mat.flt.R[1, ] <- 0

        mat.int.PRED      <- matrix(rep(NA, scl.int.T * scl.int.K), ncol = scl.int.K)
        mat.int.PRED[1, ] <- sample(scl.int.N, scl.int.K)

        vec.flt.AR1      <- rep(NA, scl.int.T) # AR(1) forecast results
        vec.flt.LASSO    <- rep(NA, scl.int.T) # LASSO forecast results


        ## @subsection: Simulate data
        for (t in 2:scl.int.T) {
            mat.flt.R[t, ] <- scl.flt.TET * sum(mat.flt.R[t-1, mat.int.PRED[t-1, ]]) + mat.flt.E[t, ]

            mat.int.PRED[t, ] <- mat.int.PRED[t-1, ]
            vec.flt.RAND      <- runif(scl.int.K, 0, 1)
            for (p in 1:scl.int.K) {
                if (vec.flt.RAND[p] <= scl.flt.PROB_SWITCH) {
                    vec.int.CHOICES    <- which(!(seq(1, scl.int.N) %in% mat.int.PRED[t, ]) == TRUE)
                    mat.int.PRED[t, p] <- sample(vec.int.CHOICES, 1)
                }
            }
        }


        ## @subsection: Forecast returns using AR(1)
        for (t in 1:(scl.int.T-scl.int.T_FIT)) {
            vec.flt.Y    <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            vec.flt.X    <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
            vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef

            vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
        }


        ## @subsection: Forecast returns using LASSO
        vec.flt.LAM_CHOICES     <- seq(0.0005, 0.15, by = 0.0005)
        scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
        mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
        for (t in 1:scl.int.T_LAM) {
            vec.flt.Y  <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            mat.flt.X  <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

            obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
            vec.flt.NEWX    <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

            if (t <= scl.int.T_LAM) {
                mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
            }
        }
        vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
        for (p in 1:scl.int.LAM_CHOICES_LEN) {
            vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
        }
        scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
        for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
            vec.flt.Y  <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            mat.flt.X  <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

            obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
            vec.flt.NEWX    <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

            vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit
        }

        ## @subsection: Estimate model fit
        vec.flt.R   <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]

        vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
        vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)

        vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
        vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)

        scl.flt.R2_AR1    <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
        scl.flt.R2_LASSO  <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
        scl.flt.R2_BOTH   <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared


        ## @subsection: Return results
        return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH))

    }

    save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = ""))

}
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = ""))


###########################################################################
###########################################################################
## @section: Plot R^2 distributions
###########################################################################
###########################################################################

## @subsection: Create plot data
vec.flt.X   <- seq(-0.005, 0.02, length.out = 101)
scl.flt.dX  <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT <- data.frame(x      = vec.flt.X,
                          ar1    = NA,
                          lasso  = NA,
                          both   = NA
                          )
for (i in 1:100) {
    mat.df.PLOT$ar1[i]    <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
    mat.df.PLOT$lasso[i]  <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
    mat.df.PLOT$both[i]   <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x    <- mat.df.PLOT$x + scl.flt.dX/2

names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO")
mat.df.PLOT        <- melt(mat.df.PLOT, c("x"))
mat.df.PLOT$xmin   <- mat.df.PLOT$x - scl.flt.dX/2
mat.df.PLOT$xmax   <- mat.df.PLOT$x + scl.flt.dX/2
mat.df.PLOT        <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT        <- mat.df.PLOT[mat.df.PLOT$value > 0, ]

mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"),
                           avg      = apply(mat.flt.SIM[, c(2,3,4)], 2, mean),
                           label    =
                           c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""),
                             paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""),
                             paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="")
                             )
                           )


## @subsection: Plot results
theme_set(theme_bw())

scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--dense-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')

tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)

obj.gg2.PLOT <- ggplot()
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
                                         aes(xmin   = xmin,
                                             xmax   = xmax,
                                             ymin   = 0,
                                             ymax   = value,
                                             group  = variable
                                             )
                                         )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
                                          size       = 0.25,
                                          colour     = "black"
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
                                          aes(xintercept = avg,
                                              group      = variable
                                              ),
                                          colour = "red",
                                          size   = 1.25
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
                                         aes(x     = avg + 0.001,
                                             y     = 0.40 * 4/5,
                                             label = label
                                             ),
                                         colour = "red",
                                         angle  = 270,
                                         size   = 4
                                         )
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.005, 0.020),
                                                  breaks = c(0.00, 0.005, 0.010, 0.015),
                                                  labels = c("$0.0{\\scriptstyle \\%}$", "$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$", "$1.5{\\scriptstyle \\%}$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.40),
                                                  breaks = c(0, 0.10, 0.20, 0.30, 0.40),
                                                  labels = c("$0.00$","$0.10$", "$0.20$", "$0.30$", "$0.40$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin      = unit(c(1,0.15,-0.85,0.15), "lines"),
                                     axis.text        = element_text(size = 10),
                                     axis.title       = element_text(size = 10),
                                     plot.title       = element_text(vjust = 1.75),
                                     panel.grid.minor = element_blank(),
                                     legend.position  = "none"
                                     )

obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: Dense Shocks")

print(obj.gg2.PLOT)
dev.off()

system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))


## prog--lasso-forecast--simulated-data--no-shocks--04dec2015.R
###########################################################################
###########################################################################
## @purpose: Simulate return data with no shocks and show that the LASSO
##           does add any predictive power.
## ------------------------------------------------------------------------
## @author:  Alex Chinco
## @date:    04-DEC-2015
###########################################################################
###########################################################################


###########################################################################
###########################################################################
## @section: Prep workspace
###########################################################################
###########################################################################

options(width=120, digits=6, digits.secs=6)
rm(list=ls())

library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(reshape)
library(vars)
library(lars)

library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
        c("\\usepackage{tikz}\n",
          "\\usepackage[active,tightpage,psfixbb]{preview}\n",
          "\\PreviewEnvironment{pgfpicture}\n",
          "\\setlength\\PreviewBorder{0pt}\n",
          "\\usepackage{amsmath}\n",
          "\\usepackage{pxfonts}\n",
          "\\usepackage{bm}\n",
          "\\usepackage{xfrac}\n"
          )
        )
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))

library(foreach)
library(doMC)
registerDoMC(7)

scl.str.DAT_DIR  <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
scl.str.FIG_DIR  <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"

set.seed(1234)


###########################################################################
###########################################################################
## @section: Define global parameters
###########################################################################
###########################################################################

scl.int.ITER <- 10^3 # Number of iterations to run

scl.int.T_FIT <- 50                                   # Number of minutes used to fit models
scl.int.T_LAM <- 100                                  # Number of minutes used to pick lambda
scl.int.T     <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes

scl.int.N <- 100 # Number of stocks
scl.int.K <- 5   # Number of predictors

scl.flt.SIG <- 0.001 # Noise volatility


###########################################################################
###########################################################################
## @section: Simulate data and forecast returns
###########################################################################
###########################################################################

if (TRUE == FALSE) {
    mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
        print(paste("    Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))

        ## @subsection: Initialize data objects
        mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)

        mat.flt.R      <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
        mat.flt.R[1, ] <- 0

        vec.flt.AR1      <- rep(NA, scl.int.T) # AR(1) forecast results
        vec.flt.LASSO    <- rep(NA, scl.int.T) # LASSO forecast results


        ## @subsection: Simulate data
        for (t in 2:scl.int.T) {
            mat.flt.R[t, ] <- mat.flt.E[t, ]
        }


        ## @subsection: Forecast returns using AR(1)
        for (t in 1:(scl.int.T-scl.int.T_FIT)) {
            vec.flt.Y    <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            vec.flt.X    <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
            vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef

            vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
        }


        ## @subsection: Forecast returns using LASSO
        vec.flt.LAM_CHOICES     <- seq(0.0005, 0.15, by = 0.0005)
        scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
        mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
        for (t in 1:scl.int.T_LAM) {
            vec.flt.Y  <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            mat.flt.X  <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

            obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
            vec.flt.NEWX    <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

            if (t <= scl.int.T_LAM) {
                mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
            }
        }
        vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
        for (p in 1:scl.int.LAM_CHOICES_LEN) {
            vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
        }
        scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
        for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
            vec.flt.Y  <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            mat.flt.X  <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

            obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
            vec.flt.NEWX    <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

            vec.flt.LASSO[t+scl.int.T_FIT]    <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit
        }

        ## @subsection: Estimate model fit
        vec.flt.R   <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]

        vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
        vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)

        vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
        vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)

        scl.flt.R2_AR1    <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
        scl.flt.R2_LASSO  <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
        scl.flt.R2_BOTH   <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared

        ## @subsection: Return results
        return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH))

    }

    save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = ""))

}
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = ""))


###########################################################################
###########################################################################
## @section: Plot R^2 distributions
###########################################################################
###########################################################################

## @subsection: Create plot data
vec.flt.X   <- seq(-0.005, 0.0125, length.out = 101)
scl.flt.dX  <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT <- data.frame(x      = vec.flt.X,
                          ar1    = NA,
                          lasso  = NA,
                          both   = NA
                          )
for (i in 1:100) {
    mat.df.PLOT$ar1[i]    <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
    mat.df.PLOT$lasso[i]  <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
    mat.df.PLOT$both[i]   <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2

names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO")
mat.df.PLOT        <- melt(mat.df.PLOT, c("x"))
mat.df.PLOT        <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT        <- mat.df.PLOT[mat.df.PLOT$value > 0, ]

mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"),
                           avg      = apply(mat.flt.SIM[, c(2,3,4)], 2, mean),
                           label    =
                           c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""),
                             paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""),
                             paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="")
                             )
                           )


## @subsection: Plot results
theme_set(theme_bw())

scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--no-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')

tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)

obj.gg2.PLOT <- ggplot()
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
                                         aes(xmin   = x - scl.flt.dX/2,
                                             xmax   = x + scl.flt.dX/2,
                                             ymin   = 0,
                                             ymax   = value,
                                             group  = variable
                                             )
                                         )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
                                          size       = 0.25,
                                          colour     = "black"
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
                                          aes(xintercept = avg,
                                              group      = variable
                                              ),
                                          colour = "red",
                                          size   = 1.25
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
                                         aes(x     = avg + 0.00065,
                                             y     = 0.21 * 4/5,
                                             label = label
                                             ),
                                         colour = "red",
                                         angle  = 270,
                                         size   = 4
                                         )
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.0025, 0.0125),
                                                  breaks = c(0.00, 0.005, 0.01),
                                                  labels = c("$0.0{\\scriptstyle \\%}$","$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.21),
                                                  breaks = c(0, 0.10, 0.20),
                                                  labels = c("$0.00$","$0.10$", "$0.20$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin      = unit(c(1,0.15,-0.85,0.15), "lines"),
                                     axis.text        = element_text(size = 10),
                                     axis.title       = element_text(size = 10),
                                     plot.title       = element_text(vjust = 1.75),
                                     panel.grid.minor = element_blank(),
                                     legend.position  = "none"
                                     )

obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: No Shocks")

print(obj.gg2.PLOT)
dev.off()

system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))

## prog--lasso-forecast--simulated-data--sparse-shocks--04dec2015.R
###########################################################################
###########################################################################
## @purpose: Simulate return data with sparse shocks and show that the
##           LASSO can identify these shocks while AR(1) can't.
## ------------------------------------------------------------------------
## @author:  Alex Chinco
## @date:    04-DEC-2015
###########################################################################
###########################################################################


###########################################################################
###########################################################################
## @section: Prep workspace
###########################################################################
###########################################################################

options(width=120, digits=6, digits.secs=6)
rm(list=ls())

library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(reshape)
library(vars)
library(lars)

library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
        c("\\usepackage{tikz}\n",
          "\\usepackage[active,tightpage,psfixbb]{preview}\n",
          "\\PreviewEnvironment{pgfpicture}\n",
          "\\setlength\\PreviewBorder{0pt}\n",
          "\\usepackage{amsmath}\n",
          "\\usepackage{pxfonts}\n",
          "\\usepackage{bm}\n",
          "\\usepackage{xfrac}\n"
          )
        )
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))

library(foreach)
library(doMC)
registerDoMC(7)

scl.str.DAT_DIR  <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
scl.str.FIG_DIR  <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"

set.seed(1234)


###########################################################################
###########################################################################
## @section: Define global parameters
###########################################################################
###########################################################################

scl.int.ITER <- 10^3 # Number of iterations to run

scl.int.T_FIT <- 50                                   # Number of minutes used to fit models
scl.int.T_LAM <- 100                                  # Number of minutes used to pick lambda
scl.int.T     <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes

scl.int.N <- 100 # Number of stocks
scl.int.K <- 5   # Number of predictors

scl.flt.SIG <- 0.001 # Noise volatility
scl.flt.TET <- 0.150 # Size of sparse shocks

scl.flt.PROB_SWITCH <- 0.01 # Expected time before switch is (1 - probSwitch)/probSwitch.


###########################################################################
###########################################################################
## @section: Simulate data and forecast returns
###########################################################################
###########################################################################

if (TRUE == FALSE) {
    mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
        print(paste("    Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))

        ## @subsection: Initialize data objects
        mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)

        mat.flt.R      <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
        mat.flt.R[1, ] <- 0

        mat.int.PRED      <- matrix(rep(NA, scl.int.T * scl.int.K), ncol = scl.int.K)
        mat.int.PRED[1, ] <- sample(scl.int.N, scl.int.K)

        vec.flt.AR1      <- rep(NA, scl.int.T) # AR(1) forecast results
        vec.flt.LASSO    <- rep(NA, scl.int.T) # LASSO forecast results
        vec.flt.ORACLE   <- rep(NA, scl.int.T) # ORACLE forecast results
        vec.int.TOT_PRED <- rep(NA, scl.int.T) # Total number of predictors selected by the LASSO
        vec.int.COR_PRED <- rep(NA, scl.int.T) # Number of correct predictors selected by the LASSO


        ## @subsection: Simulate data
        for (t in 2:scl.int.T) {
            mat.flt.R[t, ] <- scl.flt.TET * sum(mat.flt.R[t-1, mat.int.PRED[t-1, ]]) + mat.flt.E[t, ]

            mat.int.PRED[t, ] <- mat.int.PRED[t-1, ]
            vec.flt.RAND      <- runif(scl.int.K, 0, 1)
            for (p in 1:scl.int.K) {
                if (vec.flt.RAND[p] <= scl.flt.PROB_SWITCH) {
                    vec.int.CHOICES    <- which(!(seq(1, scl.int.N) %in% mat.int.PRED[t, ]) == TRUE)
                    mat.int.PRED[t, p] <- sample(vec.int.CHOICES, 1)
                }
            }
        }


        ## @subsection: Forecast returns using AR(1)
        for (t in 1:(scl.int.T-scl.int.T_FIT)) {
            vec.flt.Y    <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            vec.flt.X    <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
            vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef

            vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
        }


        ## @subsection: Forecast returns using oracle
        for (t in 1:(scl.int.T-scl.int.T_FIT)) {
            vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            mat.flt.X <- foreach(r=t:(t+(scl.int.T_FIT-2)), .combine = "rbind") %do% {
                vec.flt.X <- mat.flt.R[r, mat.int.PRED[r, ]]
                return(vec.flt.X)
            }

            vec.flt.COEF <- matrix(lm(vec.flt.Y ~ mat.flt.X)$coef, ncol = 1)
            vec.flt.NEWX <- matrix(c(1,mat.flt.R[t+(scl.int.T_FIT-1), mat.int.PRED[t+(scl.int.T_FIT-1), ]]), ncol = 1)

            vec.flt.ORACLE[t+scl.int.T_FIT] <- t(vec.flt.COEF) %*% vec.flt.NEWX
        }


        ## @subsection: Forecast returns using LASSO
        vec.flt.LAM_CHOICES     <- seq(0.0005, 0.15, by = 0.0005)
        scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
        mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
        for (t in 1:scl.int.T_LAM) {
            vec.flt.Y  <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            mat.flt.X  <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

            obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
            vec.flt.NEWX    <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

            if (t <= scl.int.T_LAM) {
                mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
            }
        }
        vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
        for (p in 1:scl.int.LAM_CHOICES_LEN) {
            vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
        }
        scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
        for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
            vec.flt.Y  <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
            mat.flt.X  <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

            obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
            vec.flt.NEWX    <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

            vec.flt.LASSO[t+scl.int.T_FIT]    <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit

            vec.flt.COEF                        <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "coef", mode = "lambda")$coef
            vec.int.TOT_PRED[t+scl.int.T_FIT] <- sum(vec.flt.COEF != 0)
            vec.int.COR_PRED[t+scl.int.T_FIT] <- sum(mat.int.PRED[(t+(scl.int.T_FIT-1)),] %in% which(vec.flt.COEF != 0))
        }

        ## @subsection: Estimate model fit
        vec.flt.R   <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]

        vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
        vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)

        vec.flt.ORACLE <- vec.flt.ORACLE[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
        vec.flt.ORACLE <- (vec.flt.ORACLE - mean(vec.flt.ORACLE))/sd(vec.flt.ORACLE)

        vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
        vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)

        scl.flt.R2_AR1    <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
        scl.flt.R2_ORACLE <- summary(lm(vec.flt.R ~ vec.flt.ORACLE))$adj.r.squared
        scl.flt.R2_LASSO  <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
        scl.flt.R2_BOTH   <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared

        scl.flt.TOT_PRED <- mean(vec.int.TOT_PRED, na.rm = TRUE)
        scl.flt.COR_PRED <- mean(vec.int.COR_PRED, na.rm = TRUE)

        ## @subsection: Return results
        return(c(i, scl.flt.R2_AR1, scl.flt.R2_ORACLE, scl.flt.R2_LASSO, scl.flt.R2_BOTH, scl.flt.LAM, scl.flt.TOT_PRED, scl.flt.COR_PRED))

    }

    save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--sparse-shocks--04dec2015.Rdata", sep = ""))

}
load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--sparse-shocks--04dec2015.Rdata", sep = ""))

mat.flt.SIM <- mat.flt.SIM[mat.flt.SIM[,6] < 0.15, ]


###########################################################################
###########################################################################
## @section: Plot R^2 distributions
###########################################################################
###########################################################################

## @subsection: Create plot data
vec.flt.X   <- seq(0, 0.20, length.out = 101)
scl.flt.dX  <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT <- data.frame(x      = vec.flt.X,
                          ar1    = NA,
                          lasso  = NA,
                          both   = NA,
                          oracle = NA
                          )
for (i in 1:100) {
    mat.df.PLOT$ar1[i]    <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
    mat.df.PLOT$lasso[i]  <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
    mat.df.PLOT$both[i]   <- length(which((mat.flt.SIM[,5] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,5] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
    mat.df.PLOT$oracle[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2

names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO", "Oracle")
mat.df.PLOT        <- melt(mat.df.PLOT, c("x"))
mat.df.PLOT        <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT        <- mat.df.PLOT[mat.df.PLOT$value > 0, ]

mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO", "Oracle"),
                           avg      = apply(mat.flt.SIM[, c(2,4,5,3)], 2, mean),
                           label    =
                           c(paste("$\\phantom{1}",round(mean(mat.flt.SIM[,2]*100),2),"{\\scriptstyle \\%}$", sep=""),
                             paste("$\\phantom{1}",round(mean(mat.flt.SIM[,4]*100),2),"0{\\scriptstyle \\%}$", sep=""),
                             paste("$\\phantom{1}",round(mean(mat.flt.SIM[,5]*100),2),"{\\scriptstyle \\%}$", sep=""),
                             paste("$",round(mean(mat.flt.SIM[,3]*100),2),"{\\scriptstyle \\%}$", sep="")
                             )
                           )


## @subsection: Plot results
theme_set(theme_bw())

scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--sparse-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')

tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)

obj.gg2.PLOT <- ggplot()
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
                                         aes(xmin   = x - scl.flt.dX/2,
                                             xmax   = x + scl.flt.dX/2,
                                             ymin   = 0,
                                             ymax   = value,
                                             group  = variable
                                             )
                                         )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
                                          size       = 0.25,
                                          colour     = "black"
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
                                          aes(xintercept = avg,
                                              group      = variable
                                              ),
                                          colour = "red",
                                          size   = 1.25
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
                                         aes(x     = avg + 0.011,
                                             y     = 0.105 * 4/5,
                                             label = label
                                             ),
                                         colour = "red",
                                         angle  = 270,
                                         size   = 4
                                         )
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(0, 0.20),
                                                  breaks = c(0, 0.05, 0.10, 0.15, 0.20),
                                                  labels = c("$\\phantom{1}0{\\scriptstyle \\%}$","$\\phantom{1}5{\\scriptstyle \\%}$", "$10{\\scriptstyle \\%}$", "$15{\\scriptstyle \\%}$", "$20{\\scriptstyle \\%}$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.105),
                                                  breaks = c(0, 0.02, 0.04, 0.06, 0.08, 0.10),
                                                  labels = c("$0.00$","$0.02$", "$0.04$", "$0.06$", "$0.08$", "$0.10$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin      = unit(c(1,0.15,-0.85,0.15), "lines"),
                                     axis.text        = element_text(size = 10),
                                     axis.title       = element_text(size = 10),
                                     plot.title       = element_text(vjust = 1.75),
                                     panel.grid.minor = element_blank(),
                                     legend.position  = "none"
                                     )

obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: Sparse Shocks")

print(obj.gg2.PLOT)
dev.off()

system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))


###########################################################################
###########################################################################
## @section: Plot lambda distribution
###########################################################################
###########################################################################

## @subsection: Create plot data
vec.flt.X        <- seq(0, max(mat.flt.SIM[,6]), length.out = 101)
scl.flt.dX       <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT      <- data.frame(x = vec.flt.X, value = NA)
for (i in 1:100) {
    mat.df.PLOT$value[i] <- length(which((mat.flt.SIM[,6] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,6] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
}
mat.df.PLOT$x    <- mat.df.PLOT$x + scl.flt.dX/2
mat.df.PLOT$xmax <- mat.df.PLOT$x + scl.flt.dX/2
mat.df.PLOT$xmin <- mat.df.PLOT$x - scl.flt.dX/2
mat.df.PLOT      <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
mat.df.PLOT      <- mat.df.PLOT[mat.df.PLOT$value > 0, ]


## @subsection: Plot results
theme_set(theme_bw())

scl.str.RAW_FILE <- 'plot--lambda-distribution--simulated-data--sparse-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')

tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)

obj.gg2.PLOT <- ggplot(data = mat.df.PLOT)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(aes(xmin   = xmin,
                                             xmax   = xmax,
                                             ymin   = 0,
                                             ymax   = value
                                             )
                                         )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
                                          size       = 0.25,
                                          colour     = "black"
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(aes(xintercept = mean(mat.flt.SIM[,6])),
                                          colour = "red",
                                          size   = 1.25
                                          )
obj.gg2.PLOT <- obj.gg2.PLOT + annotate("text",
                                        x     = mean(mat.flt.SIM[,6]) + 0.00008,
                                        y     = 0.30 * 4/5,
                                        label = paste("$", round(mean(mat.flt.SIM[,6]),4), "$", sep=""),
                                        colour = "red",
                                        angle  = 270,
                                        size   = 4
                                        )
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, \\lambda = x \\, ]$")
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(0, 0.006),
                                                  breaks = c(0, 0.001, 0.002, 0.003, 0.004, 0.005, 0.006),
                                                  labels = c("$0.000$","$0.001$","$0.002$","$0.003$","$0.004$","$0.005$","$0.006$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.30),
                                                  breaks = c(0, 0.10, 0.20, 0.30),
                                                  labels = c("$0.00$","$0.10$", "$0.20$", "$0.30$")
                                                  )
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin      = unit(c(1,0.15,-0.85,0.15), "lines"),
                                     axis.text        = element_text(size = 10),
                                     axis.title       = element_text(size = 10),
                                     plot.title       = element_text(vjust = 1.75),
                                     panel.grid.minor = element_blank(),
                                     legend.position  = "none"
                                     )

obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("$\\lambda$ Distribution: Sparse Shocks")

print(obj.gg2.PLOT)
dev.off()

system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))


###########################################################################
###########################################################################
## @section: Plot number of predictors distribution
###########################################################################
###########################################################################

## @subsection: Create plot data, total number of predictors
scl.int.NUM_BUCKETS <- 100
vec.flt.X           <- seq(0, max(mat.flt.SIM[,7]), length.out = (scl.int.NUM_BUCKETS + 1))
scl.flt.dX          <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT1        <- data.frame(x = vec.flt.X, value = NA)
for (i in 1:scl.int.NUM_BUCKETS) {
    mat.df.PLOT1$value[i] <- length(which((mat.flt.SIM[,7] >= mat.df.PLOT1$x[i]) & (mat.flt.SIM[,7] < mat.df.PLOT1$x[i+1])))/scl.int.ITER
}
mat.df.PLOT1$x      <- mat.df.PLOT1$x + scl.flt.dX/2
mat.df.PLOT1$xmax   <- mat.df.PLOT1$x + scl.flt.dX/2
mat.df.PLOT1$xmin   <- mat.df.PLOT1$x - scl.flt.dX/2
mat.df.PLOT1        <- mat.df.PLOT1[is.na(mat.df.PLOT1$value) == FALSE, ]
mat.df.PLOT1        <- mat.df.PLOT1[mat.df.PLOT1$value > 0, ]


## @subsection: Create plot data, number of correct predictors
scl.int.NUM_BUCKETS <- 100
vec.flt.X           <- seq(0, max(mat.flt.SIM[,8]), length.out = (scl.int.NUM_BUCKETS + 1))
scl.flt.dX          <- vec.flt.X[2]-vec.flt.X[1]
mat.df.PLOT2        <- data.frame(x = vec.flt.X, value = NA)
for (i in 1:scl.int.NUM_BUCKETS) {
    mat.df.PLOT2$value[i] <- length(which((mat.flt.SIM[,8] >= mat.df.PLOT2$x[i]) & (mat.flt.SIM[,8] < mat.df.PLOT2$x[i+1])))/scl.int.ITER
}
mat.df.PLOT2$x      <- mat.df.PLOT2$x + scl.flt.dX/2
mat.df.PLOT2$xmax   <- mat.df.PLOT2$x + scl.flt.dX/2
mat.df.PLOT2$xmin   <- mat.df.PLOT2$x - scl.flt.dX/2
mat.df.PLOT2        <- mat.df.PLOT2[is.na(mat.df.PLOT2$value) == FALSE, ]
mat.df.PLOT2        <- mat.df.PLOT2[mat.df.PLOT2$value > 0, ]


## @subsection: Plot results
obj.vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)

theme_set(theme_bw())

scl.str.RAW_FILE <- 'plot--predictor-distribution--simulated-data--sparse-shocks'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')

tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)

## Left panel
obj.gg2.PLOT1 <- ggplot(data = mat.df.PLOT1)
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_rect(aes(xmin = xmin,
                                               xmax = xmax,
                                               ymin = 0,
                                               ymax = value
                                               )
                                           )
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_hline(yintercept = 0,
                                            size       = 0.25,
                                            colour     = "black"
                                            )
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + geom_vline(aes(xintercept = mean(mat.flt.SIM[,7])),
                                            colour = "red",
                                            size   = 1.25
                                            )
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + annotate("text",
                                          x     = mean(mat.flt.SIM[,7]) + 30/35,
                                          y     = 0.10 * 4/5,
                                          label = paste("$", round(mean(mat.flt.SIM[,7]),2), "$", sep=""),
                                          colour = "red",
                                          angle  = 270,
                                          size   = 4
                                          )
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + xlab("")
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + ylab("$\\mathrm{Pr}[ \\langle\\text{\\# Predictors}\\rangle = x ]$")
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_y_continuous(limits = c(0, 0.10),
                                                    breaks = c(0, 0.02, 0.04, 0.06, 0.08, 0.10),
                                                    labels = c("$0.00$", "$0.02$", "$0.04$", "$0.06$", "$0.08$", "$0.10$")
                                                    )
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + scale_x_continuous(limits = c(0, 30),
                                                    breaks = c(0, 10, 20, 30),
                                                    labels = c("$\\phantom{1}0$", "$10$", "$20$", "$30$")
                                                    )
obj.gg2.PLOT1 <- obj.gg2.PLOT1 + theme(plot.margin      = unit(c(0,0.15,-0.85,0.15), "lines"),
                                       axis.text        = element_text(size = 10),
                                       axis.title       = element_text(size = 10),
                                       plot.title       = element_blank(),
                                       panel.grid.minor = element_blank()
                                       )


## Right panel
obj.gg2.PLOT2 <- ggplot(data = mat.df.PLOT2)
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_rect(aes(xmin = xmin,
                                               xmax = xmax,
                                               ymin = 0,
                                               ymax = value
                                               )
                                           )
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_hline(yintercept = 0,
                                            size       = 0.25,
                                            colour     = "black"
                                            )
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + geom_vline(aes(xintercept = mean(mat.flt.SIM[,8])),
                                            colour = "red",
                                            size   = 1.25
                                            )
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + annotate("text",
                                          x     = mean(mat.flt.SIM[,8]) + 2.5/35,
                                          y     = 0.08 * 4/5,
                                          label = paste("$\\phantom{1}", round(mean(mat.flt.SIM[,8]),2), "$", sep=""),
                                          colour = "red",
                                          angle  = 270,
                                          size   = 4
                                          )
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + xlab("")
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + ylab("$\\mathrm{Pr}[ \\langle\\text{\\# Correct}\\rangle = x ]$")
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_y_continuous(limits = c(0, 0.08),
                                                    breaks = c(0, 0.02, 0.04, 0.06, 0.08),
                                                    labels = c("$0.00$", "$0.02$", "$0.04$", "$0.06$", "$0.08$")
                                                    )
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + scale_x_continuous(limits = c(0, 2.5),
                                                    breaks = c(0, 0.5, 1.0, 1.5, 2.0, 2.5),
                                                    labels = c("$0.0$", "$0.5$", "$1.0$", "$1.5$", "$2.0$", "$2.5$")
                                                    )
obj.gg2.PLOT2 <- obj.gg2.PLOT2 + theme(plot.margin      = unit(c(0,0.15,-0.85,0.15), "lines"),
                                       axis.text        = element_text(size = 10),
                                       axis.title       = element_text(size = 10),
                                       plot.title       = element_blank(),
                                       panel.grid.minor = element_blank()
                                       )

## Generate output
pushViewport(viewport(layout = grid.layout(5, 2)))
grid.text("Predictor Distribution: Sparse Shocks", vp = viewport(layout.pos.row = 1, layout.pos.col = 1:2), vjust = 0.20, gp = gpar(fontsize=15))
print(obj.gg2.PLOT1, vp = obj.vplayout(2:5,1))
print(obj.gg2.PLOT2, vp = obj.vplayout(2:5,2))
dev.off()

system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
	###########################################################################
	###########################################################################
	## @purpose: Simulate return data with dense shocks and show that the
	## LASSO doesn't add any forecasting power.
	## ------------------------------------------------------------------------
	## @author: Alex Chinco
	## @date: 04-DEC-2015
	###########################################################################
	###########################################################################





	###########################################################################
	###########################################################################
	## @section: Prep workspace
	###########################################################################
	###########################################################################

	options(width=120, digits=6, digits.secs=6)
	rm(list=ls())

	library(foreign)
	library(grid)
	library(plyr)
	library(ggplot2)
	library(reshape)
	library(vars)
	library(lars)

	library(tikzDevice)
	print(options('tikzLatexPackages'))
	options(tikzLatexPackages =
	c("\\usepackage{tikz}\n",
	"\\usepackage[active,tightpage,psfixbb]{preview}\n",
	"\\PreviewEnvironment{pgfpicture}\n",
	"\\setlength\\PreviewBorder{0pt}\n",
	"\\usepackage{amsmath}\n",
	"\\usepackage{pxfonts}\n",
	"\\usepackage{bm}\n",
	"\\usepackage{xfrac}\n"
	)
	)
	setTikzDefaults(overwrite = FALSE)
	print(options('tikzLatexPackages'))

	library(foreach)
	library(doMC)
	registerDoMC(7)

	scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
	scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"

	set.seed(1234)





	###########################################################################
	###########################################################################
	## @section: Define global parameters
	###########################################################################
	###########################################################################

	scl.int.ITER <- 10^3 # Number of iterations to run

	scl.int.T_FIT <- 50 # Number of minutes used to fit models
	scl.int.T_LAM <- 100 # Number of minutes used to pick lambda
	scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes

	scl.int.N <- 100 # Number of stocks
	scl.int.K <- 75 # Number of predictors

	scl.flt.SIG <- 0.001 # Noise volatility
	scl.flt.TET <- 0.150 * 5 / 75 # Size of dense shocks (same total magnitude as sparse shocks)

	scl.flt.PROB_SWITCH <- 0.01 # Expected time before switch is (1 - probSwitch)/probSwitch.





	###########################################################################
	###########################################################################
	## @section: Simulate data and forecast returns
	###########################################################################
	###########################################################################

	if (TRUE == FALSE) {
	mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
	print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))

	## @subsection: Initialize data objects
	mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)

	mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
	mat.flt.R[1, ] <- 0

	mat.int.PRED <- matrix(rep(NA, scl.int.T * scl.int.K), ncol = scl.int.K)
	mat.int.PRED[1, ] <- sample(scl.int.N, scl.int.K)

	vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results
	vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results


	## @subsection: Simulate data
	for (t in 2:scl.int.T) {
	mat.flt.R[t, ] <- scl.flt.TET * sum(mat.flt.R[t-1, mat.int.PRED[t-1, ]]) + mat.flt.E[t, ]

	mat.int.PRED[t, ] <- mat.int.PRED[t-1, ]
	vec.flt.RAND <- runif(scl.int.K, 0, 1)
	for (p in 1:scl.int.K) {
	if (vec.flt.RAND[p] <= scl.flt.PROB_SWITCH) {
	vec.int.CHOICES <- which(!(seq(1, scl.int.N) %in% mat.int.PRED[t, ]) == TRUE)
	mat.int.PRED[t, p] <- sample(vec.int.CHOICES, 1)
	}
	}
	}


	## @subsection: Forecast returns using AR(1)
	for (t in 1:(scl.int.T-scl.int.T_FIT)) {
	vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
	vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
	vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef

	vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
	}


	## @subsection: Forecast returns using LASSO
	vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005)
	scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
	mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
	for (t in 1:scl.int.T_LAM) {
	vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
	mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

	obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
	vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

	if (t <= scl.int.T_LAM) {
	mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
	}
	}
	vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
	for (p in 1:scl.int.LAM_CHOICES_LEN) {
	vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
	}
	scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
	for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
	vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
	mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

	obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
	vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

	vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit
	}

	## @subsection: Estimate model fit
	vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]

	vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
	vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)

	vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
	vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)

	scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
	scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
	scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared


	## @subsection: Return results
	return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH))

	}

	save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = ""))

	}
	load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--dense-shocks--04dec2015.Rdata", sep = ""))




	###########################################################################
	###########################################################################
	## @section: Plot R^2 distributions
	###########################################################################
	###########################################################################

	## @subsection: Create plot data
	vec.flt.X <- seq(-0.005, 0.02, length.out = 101)
	scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
	mat.df.PLOT <- data.frame(x = vec.flt.X,
	ar1 = NA,
	lasso = NA,
	both = NA
	)
	for (i in 1:100) {
	mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
	mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
	mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
	}
	mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2

	names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO")
	mat.df.PLOT <- melt(mat.df.PLOT, c("x"))
	mat.df.PLOT$xmin <- mat.df.PLOT$x - scl.flt.dX/2
	mat.df.PLOT$xmax <- mat.df.PLOT$x + scl.flt.dX/2
	mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
	mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ]

	mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"),
	avg = apply(mat.flt.SIM[, c(2,3,4)], 2, mean),
	label =
	c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""),
	paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""),
	paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="")
	)
	)


	## @subsection: Plot results
	theme_set(theme_bw())

	scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--dense-shocks'
	scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
	scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
	scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
	scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
	scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')

	tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)

	obj.gg2.PLOT <- ggplot()
	obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
	aes(xmin = xmin,
	xmax = xmax,
	ymin = 0,
	ymax = value,
	group = variable
	)
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
	size = 0.25,
	colour = "black"
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
	aes(xintercept = avg,
	group = variable
	),
	colour = "red",
	size = 1.25
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
	aes(x = avg + 0.001,
	y = 0.40 * 4/5,
	label = label
	),
	colour = "red",
	angle = 270,
	size = 4
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
	obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
	obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
	obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.005, 0.020),
	breaks = c(0.00, 0.005, 0.010, 0.015),
	labels = c("$0.0{\\scriptstyle \\%}$", "$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$", "$1.5{\\scriptstyle \\%}$")
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.40),
	breaks = c(0, 0.10, 0.20, 0.30, 0.40),
	labels = c("$0.00$","$0.10$", "$0.20$", "$0.30$", "$0.40$")
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
	axis.text = element_text(size = 10),
	axis.title = element_text(size = 10),
	plot.title = element_text(vjust = 1.75),
	panel.grid.minor = element_blank(),
	legend.position = "none"
	)

	obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: Dense Shocks")

	print(obj.gg2.PLOT)
	dev.off()

	system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
	system(paste('rm ', scl.str.TEX_FILE, sep = ''))
	system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
	system(paste('rm ', scl.str.AUX_FILE, sep = ''))
	system(paste('rm ', scl.str.LOG_FILE, sep = ''))
	###########################################################################
	###########################################################################
	## @purpose: Simulate return data with no shocks and show that the LASSO
	## does add any predictive power.
	## ------------------------------------------------------------------------
	## @author: Alex Chinco
	## @date: 04-DEC-2015
	###########################################################################
	###########################################################################





	###########################################################################
	###########################################################################
	## @section: Prep workspace
	###########################################################################
	###########################################################################

	options(width=120, digits=6, digits.secs=6)
	rm(list=ls())

	library(foreign)
	library(grid)
	library(plyr)
	library(ggplot2)
	library(reshape)
	library(vars)
	library(lars)

	library(tikzDevice)
	print(options('tikzLatexPackages'))
	options(tikzLatexPackages =
	c("\\usepackage{tikz}\n",
	"\\usepackage[active,tightpage,psfixbb]{preview}\n",
	"\\PreviewEnvironment{pgfpicture}\n",
	"\\setlength\\PreviewBorder{0pt}\n",
	"\\usepackage{amsmath}\n",
	"\\usepackage{pxfonts}\n",
	"\\usepackage{bm}\n",
	"\\usepackage{xfrac}\n"
	)
	)
	setTikzDefaults(overwrite = FALSE)
	print(options('tikzLatexPackages'))

	library(foreach)
	library(doMC)
	registerDoMC(7)

	scl.str.DAT_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/data/"
	scl.str.FIG_DIR <- "~/Dropbox/research/sparse_signals_in_cross_section/figures/"

	set.seed(1234)





	###########################################################################
	###########################################################################
	## @section: Define global parameters
	###########################################################################
	###########################################################################

	scl.int.ITER <- 10^3 # Number of iterations to run

	scl.int.T_FIT <- 50 # Number of minutes used to fit models
	scl.int.T_LAM <- 100 # Number of minutes used to pick lambda
	scl.int.T <- 1000 + scl.int.T_FIT + scl.int.T_LAM # Number of minutes

	scl.int.N <- 100 # Number of stocks
	scl.int.K <- 5 # Number of predictors

	scl.flt.SIG <- 0.001 # Noise volatility






	###########################################################################
	###########################################################################
	## @section: Simulate data and forecast returns
	###########################################################################
	###########################################################################

	if (TRUE == FALSE) {
	mat.flt.SIM <- foreach(i=1:scl.int.ITER, .combine = "rbind") %dopar% {
	print(paste(" Iteration ", i, " complete! ", format(Sys.time(), '%X'), sep = ""))

	## @subsection: Initialize data objects
	mat.flt.E <- matrix(rnorm(scl.int.T * scl.int.N, 0, scl.flt.SIG), ncol = scl.int.N)

	mat.flt.R <- matrix(rep(NA, scl.int.T * scl.int.N), ncol = scl.int.N)
	mat.flt.R[1, ] <- 0

	vec.flt.AR1 <- rep(NA, scl.int.T) # AR(1) forecast results
	vec.flt.LASSO <- rep(NA, scl.int.T) # LASSO forecast results



	## @subsection: Simulate data
	for (t in 2:scl.int.T) {
	mat.flt.R[t, ] <- mat.flt.E[t, ]
	}


	## @subsection: Forecast returns using AR(1)
	for (t in 1:(scl.int.T-scl.int.T_FIT)) {
	vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
	vec.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), 1]
	vec.flt.COEF <- lm(vec.flt.Y ~ vec.flt.X)$coef

	vec.flt.AR1[t+scl.int.T_FIT] <- vec.flt.COEF[1] + vec.flt.COEF[2] * tail(vec.flt.Y, 1)
	}


	## @subsection: Forecast returns using LASSO
	vec.flt.LAM_CHOICES <- seq(0.0005, 0.15, by = 0.0005)
	scl.int.LAM_CHOICES_LEN <- length(vec.flt.LAM_CHOICES)
	mat.flt.PRED_BY_LAM <- matrix(rep(NA, scl.int.T_LAM*scl.int.LAM_CHOICES_LEN), ncol = scl.int.LAM_CHOICES_LEN)
	for (t in 1:scl.int.T_LAM) {
	vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
	mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

	obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
	vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

	if (t <= scl.int.T_LAM) {
	mat.flt.PRED_BY_LAM[t, ] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = vec.flt.LAM_CHOICES, type = "fit", mode = "lambda")$fit
	}
	}
	vec.flt.R2_IN_SAMPLE <- rep(NA, scl.int.LAM_CHOICES_LEN)
	for (p in 1:scl.int.LAM_CHOICES_LEN) {
	vec.flt.R2_IN_SAMPLE[p] <- summary(lm(mat.flt.R[(scl.int.T_FIT+1):(scl.int.T_FIT+scl.int.T_LAM), 1] ~ mat.flt.PRED_BY_LAM[, p]))$r.squared
	}
	scl.flt.LAM <- tail(vec.flt.LAM_CHOICES[which(vec.flt.R2_IN_SAMPLE == max(vec.flt.R2_IN_SAMPLE))],1)
	for (t in (scl.int.T_LAM+1):(scl.int.T-scl.int.T_FIT)) {
	vec.flt.Y <- mat.flt.R[(t+1):(t+(scl.int.T_FIT-1)), 1]
	mat.flt.X <- mat.flt.R[t:(t+(scl.int.T_FIT-2)), ]

	obj.lars.RESULT <- lars(mat.flt.X, vec.flt.Y, type = "lasso", intercept = TRUE)
	vec.flt.NEWX <- matrix(mat.flt.R[(t+(scl.int.T_FIT-1)), ], nrow = 1)

	vec.flt.LASSO[t+scl.int.T_FIT] <- predict(obj.lars.RESULT, vec.flt.NEWX, s = scl.flt.LAM, type = "fit", mode = "lambda")$fit
	}

	## @subsection: Estimate model fit
	vec.flt.R <- mat.flt.R[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T, 1]

	vec.flt.AR1 <- vec.flt.AR1[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
	vec.flt.AR1 <- (vec.flt.AR1 - mean(vec.flt.AR1))/sd(vec.flt.AR1)

	vec.flt.LASSO <- vec.flt.LASSO[(scl.int.T_FIT+scl.int.T_LAM+1):scl.int.T]
	vec.flt.LASSO <- (vec.flt.LASSO - mean(vec.flt.LASSO))/sd(vec.flt.LASSO)

	scl.flt.R2_AR1 <- summary(lm(vec.flt.R ~ vec.flt.AR1))$adj.r.squared
	scl.flt.R2_LASSO <- summary(lm(vec.flt.R ~ vec.flt.LASSO))$adj.r.squared
	scl.flt.R2_BOTH <- summary(lm(vec.flt.R ~ vec.flt.AR1 + vec.flt.LASSO))$adj.r.squared

	## @subsection: Return results
	return(c(i, scl.flt.R2_AR1, scl.flt.R2_LASSO, scl.flt.R2_BOTH))

	}

	save(mat.flt.SIM, file = paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = ""))

	}
	load(paste(scl.str.DAT_DIR, "data--lasso-forecast--simulated-data--no-shocks--04dec2015.Rdata", sep = ""))




	###########################################################################
	###########################################################################
	## @section: Plot R^2 distributions
	###########################################################################
	###########################################################################

	## @subsection: Create plot data
	vec.flt.X <- seq(-0.005, 0.0125, length.out = 101)
	scl.flt.dX <- vec.flt.X[2]-vec.flt.X[1]
	mat.df.PLOT <- data.frame(x = vec.flt.X,
	ar1 = NA,
	lasso = NA,
	both = NA
	)
	for (i in 1:100) {
	mat.df.PLOT$ar1[i] <- length(which((mat.flt.SIM[,2] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,2] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
	mat.df.PLOT$lasso[i] <- length(which((mat.flt.SIM[,3] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,3] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
	mat.df.PLOT$both[i] <- length(which((mat.flt.SIM[,4] > mat.df.PLOT$x[i]) & (mat.flt.SIM[,4] <= mat.df.PLOT$x[i+1])))/scl.int.ITER
	}
	mat.df.PLOT$x <- mat.df.PLOT$x + scl.flt.dX/2

	names(mat.df.PLOT) <- c("x", "AR(1)", "LASSO", "AR(1) + LASSO")
	mat.df.PLOT <- melt(mat.df.PLOT, c("x"))
	mat.df.PLOT <- mat.df.PLOT[is.na(mat.df.PLOT$value) == FALSE, ]
	mat.df.PLOT <- mat.df.PLOT[mat.df.PLOT$value > 0, ]

	mat.df.PLOT2 <- data.frame(variable = c("AR(1)", "LASSO", "AR(1) + LASSO"),
	avg = apply(mat.flt.SIM[, c(2,3,4)], 2, mean),
	label =
	c(paste("$",round(mean(mat.flt.SIM[,2]*100),3),"{\\scriptstyle \\%}$", sep=""),
	paste("$",round(mean(mat.flt.SIM[,3]*100),3),"{\\scriptstyle \\%}$", sep=""),
	paste("$",round(mean(mat.flt.SIM[,4]*100),3),"{\\scriptstyle \\%}$", sep="")
	)
	)


	## @subsection: Plot results
	theme_set(theme_bw())

	scl.str.RAW_FILE <- 'plot--r2-distribution--simulated-data--no-shocks'
	scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
	scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
	scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
	scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
	scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')

	tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)

	obj.gg2.PLOT <- ggplot()
	obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_rect(data = mat.df.PLOT,
	aes(xmin = x - scl.flt.dX/2,
	xmax = x + scl.flt.dX/2,
	ymin = 0,
	ymax = value,
	group = variable
	)
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
	size = 0.25,
	colour = "black"
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_vline(data = mat.df.PLOT2,
	aes(xintercept = avg,
	group = variable
	),
	colour = "red",
	size = 1.25
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
	aes(x = avg + 0.00065,
	y = 0.21 * 4/5,
	label = label
	),
	colour = "red",
	angle = 270,
	size = 4
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
	obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\mathrm{Pr}[ \\, R^2 = x \\, ]$")
	obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ variable, nrow = 1)
	obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(limits = c(-0.0025, 0.0125),
	breaks = c(0.00, 0.005, 0.01),
	labels = c("$0.0{\\scriptstyle \\%}$","$0.5{\\scriptstyle \\%}$", "$1.0{\\scriptstyle \\%}$")
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + scale_y_continuous(limits = c(0, 0.21),
	breaks = c(0, 0.10, 0.20),
	labels = c("$0.00$","$0.10$", "$0.20$")
	)
	obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
	axis.text = element_text(size = 10),
	axis.title = element_text(size = 10),
	plot.title = element_text(vjust = 1.75),
	panel.grid.minor = element_blank(),
	legend.position = "none"
	)

	obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Adjusted $R^2$ Distribution: No Shocks")

	print(obj.gg2.PLOT)
	dev.off()

	system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
	system(paste('rm ', scl.str.TEX_FILE, sep = ''))
	system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
	system(paste('rm ', scl.str.AUX_FILE, sep = ''))
	system(paste('rm ', scl.str.LOG_FILE, sep = ''))