Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexchinco/1e53b8d2c7e28a6664ec to your computer and use it in GitHub Desktop.
Save alexchinco/1e53b8d2c7e28a6664ec to your computer and use it in GitHub Desktop.
Code for figures in "Comparing explanations for the idiosyncratic-return volatility puzzle".
;/*************************************************************************************
@author: Alex Chinco
@date: May 4th, 2015
**************************************************************************************/
OPTIONS LINESIZE = 208;
OPTIONS PAGESIZE = 208;
LIBNAME crsp '/wrds/crsp/sasdata/a_stock';
LIBNAME cboe '/wrds/cboe/sasdata';
LIBNAME ff '/wrds/ff/sasdata';
%LET START_DATE = '01JAN1963'd;
%LET END_DATE = '31DEC2012'd;
%INCLUDE '/home/uiuc/chinco/comparing-ivol-explanations/ROLLING_REG.sas';
;/*************************************************************************************
@sec: Pull daily return data
**************************************************************************************/
PROC SQL;
CREATE TABLE retData AS
SELECT a.permno,
a.date FORMAT=DATE9. AS date,
a.ret FORMAT=BEST8. AS ret,
a.prc FORMAT=BEST8. AS prc,
a.shrout FORMAT=BEST8. AS shrout,
b.exchcd,
b.shrcd
FROM crsp.dsf(KEEP = date permno ret prc shrout) AS a LEFT JOIN
crsp.dseall(KEEP = date permno exchcd shrcd) AS b
ON (a.permno = b.permno) AND
(a.date = b.date)
WHERE (&START_DATE <= a.date <= &END_DATE)
ORDER BY a.permno,
a.date;
QUIT;
DATA retData;
SET retData;
BY permno date;
RETAIN lexchcd lshrcd;
IF first.permno THEN
DO;
lexchcd = exchcd;
lshrcd = shrcd;
END;
ELSE
DO;
IF MISSING(exchcd) THEN
DO;
exchcd = lexchcd;
END;
ELSE
DO;
lexchcd = exchcd;
END;
IF MISSING(shrcd) THEN
DO;
shrcd = lshrcd;
END;
ELSE
DO;
lshrcd = shrcd;
END;
END;
IF (exchcd IN (1,2,3));
IF (shrcd IN (10,11));
IF (NOT MISSING(ret));
IF (ret NOT IN (-66.0,-77.0,-88.0,-99.0));
IF (NOT MISSING(prc));
IF (NOT MISSING(shrout));
IF (prc < 0) THEN
DO;
prc = ABS(prc);
END;
mcap = prc * shrout * 1000;
DROP lexchcd lshrcd shrcd exchcd prc shrout;
RUN;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM retData AS a;
QUIT;
;/*************************************************************************************
@sec: Merge on last month's market cap
**************************************************************************************/
PROC SQL;
CREATE TABLE mcapData AS
SELECT a.permno,
a.date,
a.mcap
FROM retData AS a
ORDER BY a.permno,
-a.date;
QUIT;
DATA mcapData;
SET mcapData;
BY permno;
RETAIN lDate;
IF first.date THEN
DO;
last = 0;
lDate = date;
END;
ELSE
DO;
IF (MONTH(date) = MONTH(lDate)) THEN
DO;
last = 0;
lDate = date;
END;
ELSE
DO;
last = 1;
lDate = date;
END;
END;
DROP lDate;
IF (last = 1);
RUN;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM mcapData AS a;
QUIT;
PROC SQL;
CREATE TABLE retData AS
SELECT a.permno,
a.date,
a.ret,
b.mcap
FROM retData AS a LEFT JOIN
mcapData AS b
ON (a.permno = b.permno) AND
(MONTH(INTNX('month', b.date, 1)) = MONTH(a.date)) AND
(YEAR(INTNX('month', b.date, 1)) = YEAR(a.date))
ORDER BY a.permno,
a.date;
QUIT;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM retData AS a;
QUIT;
;/*************************************************************************************
@sec: Merge on daily FF93 factors
**************************************************************************************/
PROC SQL;
CREATE TABLE retData AS
SELECT a.*,
b.mktrf FORMAT=BEST8. AS mkt,
b.smb FORMAT=BEST8. AS smb,
b.hml FORMAT=BEST8. AS hml,
(a.ret - b.rf) FORMAT=BEST8. AS retx
FROM retData AS a LEFT JOIN
ff.factors_daily AS b
ON (a.date = b.date)
WHERE (&START_DATE <= a.date <= &END_DATE)
ORDER BY a.permno,
a.date;
QUIT;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM retData AS a;
QUIT;
;/*************************************************************************************
@sec: Keep month x permno pairs with at least 17 observations
**************************************************************************************/
PROC SQL;
CREATE TABLE retData AS
SELECT a.*,
MONTH(a.date) FORMAT=2. AS month,
YEAR(a.date) FORMAT=4. AS year
FROM retData AS a
ORDER BY a.permno,
a.date;
QUIT;
PROC SQL;
CREATE TABLE obsData AS
SELECT a.permno,
a.year,
a.month,
COUNT(a.retx) as obs
FROM retData AS a
GROUP BY a.permno,
a.year,
a.month;
QUIT;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM obsData AS a;
QUIT;
PROC SQL;
CREATE TABLE retData AS
SELECT a.*,
b.obs
FROM retData AS a LEFT JOIN
obsData AS b
ON (a.permno = b.permno) AND
(a.year = b.year) AND
(a.month = b.month)
WHERE (b.obs >= 17)
ORDER BY a.permno,
a.date;
QUIT;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM retData AS a;
QUIT;
;/*************************************************************************************
@sec: Estimate daily factor exposures each month
**************************************************************************************/
%ROLLINGREG(
DATA = retData,
OUT_DS = coefData,
ID = permno,
DATE = date,
MODEL_EQUATION = retx = mkt smb hml,
START_DATE = 1-1-1965,
END_DATE = 12-31-2012,
FREQ = month,
S = 1,
N = 1
);
PROC SQL OUTOBS = 10;
SELECT a.*
FROM coefData AS a;
QUIT;
PROC SQL;
CREATE TABLE coefData AS
SELECT a.*
FROM coefData AS a
ORDER BY a.date2;
QUIT;
;/*************************************************************************************
@sec: Estimate maximum return each month
**************************************************************************************/
PROC SQL;
CREATE TABLE maxretData AS
SELECT a.permno,
a.year,
a.month,
MAX(a.ret) FORMAT=BEST8. AS maxret
FROM retData AS a
GROUP BY a.permno,
a.year,
a.month;
QUIT;
DATA maxretData;
SET maxretData;
month = month + 1;
IF (month = 13) THEN
DO;
month = 1;
year = year + 1;
END;
RUN;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM maxretData AS a;
QUIT;
;/*************************************************************************************
@sec: Merge data together
**************************************************************************************/
PROC SQL;
CREATE TABLE outData AS
SELECT a.permno,
a.date,
a.mcap,
a.retx,
a.month,
a.year,
b.regobs FORMAT=2. AS obs,
b._RMSE_ FORMAT=BEST8. AS ivol
FROM retData AS a,
coefData AS b
WHERE (a.permno = b.permno) AND
(MONTH(INTNX('month', b.date2, 1)) = MONTH(a.date)) AND
(YEAR(INTNX('month', b.date2, 1)) = YEAR(a.date))
ORDER BY a.permno,
a.date;
QUIT;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM outData AS a;
QUIT;
PROC SQL;
CREATE TABLE outData AS
SELECT a.permno,
a.year,
a.month,
SUM(log(1 + a.retx)) FORMAT=BEST8. AS retx,
MEAN(LOG(a.mcap/1000000)) FORMAT=BEST8. AS mcap,
MEAN(a.obs) FORMAT=BEST8. AS obs,
MEAN(a.ivol) FORMAT=BEST8. AS ivol
FROM outData AS a
GROUP BY a.permno,
a.year,
a.month;
QUIT;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM outData AS a;
QUIT;
PROC SQL;
CREATE TABLE outData AS
SELECT a.*,
b.maxret
FROM outData AS a,
maxretData AS b
WHERE (a.permno = b.permno) AND
(a.month = b.month) AND
(a.year = b.year)
ORDER BY a.permno,
a.year,
a.month;
QUIT;
PROC SQL OUTOBS = 10;
SELECT a.*
FROM outData AS a;
QUIT;
;/*************************************************************************************
@sec: Print data to CSV
**************************************************************************************/
PROC EXPORT
DATA = outData
OUTFILE = "monthly-idiosyncratic-volatility.csv"
DBMS = CSV
REPLACE;
RUN;
## Prep workspace
options(width=200, digits=6, digits.secs=6)
rm(list=ls())
library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
c("\\usepackage{tikz}\n",
"\\usepackage[active,tightpage,psfixbb]{preview}\n",
"\\PreviewEnvironment{pgfpicture}\n",
"\\setlength\\PreviewBorder{0pt}\n",
"\\usepackage{amsmath}\n",
"\\usepackage{xfrac}\n"
)
)
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))
library(reshape)
library(vars)
library(scales)
library(gmm)
library(foreach)
library(doMC)
registerDoMC(8)
scl.str.DAT_DIR <- "~/Dropbox/research/fast_trading_priced_noise/data/"
scl.str.FIG_DIR <- "~/Dropbox/research/fast_trading_priced_noise/figures/"
set.seed(12356)
## Load data
mat.df.DATA <- read.csv(paste(scl.str.DAT_DIR, "monthly-idiosyncratic-volatility--05may2015.csv", sep = ""),
stringsAsFactors = FALSE
)
names(mat.df.DATA) <- c("permno", "year", "month", "rx", "mcap", "obs", "ivol", "maxret")
mat.df.DATA <- mat.df.DATA[, c("permno", "year", "month", "rx", "ivol", "maxret")]
mat.df.DATA$t <- as.Date(paste(mat.df.DATA$year, mat.df.DATA$month, "01", sep = "-"), format = "%Y-%m-%d")
mat.df.DATA <- mat.df.DATA[, c("permno", "t", "rx", "ivol", "maxret")]
vec.dt.DATE <- sort(unique(mat.df.DATA$t))
scl.int.DATE_LEN <- length(vec.dt.DATE)
## Estimate monthly betas
mat.df.PLOT1 <- foreach(t=1:scl.int.DATE_LEN, .combine = "rbind") %dopar% {
print(vec.dt.DATE[t])
mat.df.TEMP <- mat.df.DATA[mat.df.DATA$t == vec.dt.DATE[t], ]
mat.df.TEMP$ivol <- (mat.df.TEMP$ivol - mean(mat.df.TEMP$ivol))/sd(mat.df.TEMP$ivol)
return(c(summary(lm(mat.df.TEMP$rx ~ mat.df.TEMP$ivol))$coef[2,1:2]))
}
mat.df.PLOT1 <- data.frame(mat.df.PLOT1)
names(mat.df.PLOT1) <- c("bet", "se")
mat.df.PLOT1$t <- vec.dt.DATE
## Plot monthly betas
theme_set(theme_bw())
scl.str.RAW_FILE <- 'plot--ivol-puzzle--monthly-betas'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
obj.gg2.PLOT <- ggplot(data = mat.df.PLOT1)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
size = 0.50,
linetype = 4
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_path(aes(x = t,
y = bet * 100
),
size = 1.25
)
obj.gg2.PLOT <- obj.gg2.PLOT + annotate("text",
x = as.Date("1985-01-01"),
y = -7.5,
size = 4,
label = paste("$\\langle \\beta \\rangle = ", round(mean(mat.df.PLOT1$bet) * 100, 2), "{\\scriptstyle \\%}$", sep ="")
)
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\%$ per Month")
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_text(vjust = 1.75),
panel.grid.minor = element_blank(),
legend.position = "none"
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Returns to iVol-Based Strategy")
print(obj.gg2.PLOT)
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
## Estimate fraction explained by maxret
obj.fun.GMM1 <- function(tet, x) {
vec.flt.M1 <- x[,1] - (tet[1] + tet[2] * x[,2])
vec.flt.M2 <- (x[,1] - (tet[1] + tet[2] * x[,2])) * x[,2]
vec.flt.M3 <- (x[,2] - tet[3] * x[,3]) * x[,3]
vec.flt.M4 <- (tet[2] - tet[4] - (x[,2] - tet[3] * x[,3]) * x[,1])
return(cbind(vec.flt.M1, vec.flt.M2, vec.flt.M3, vec.flt.M4))
}
obj.fun.GMM2 <- function(tet, x) {
vec.flt.M1 <- x[,1] - (tet[1] + tet[2] * x[,2])
vec.flt.M2 <- (x[,1] - (tet[1] + tet[2] * x[,2])) * x[,2]
vec.flt.M3 <- (x[,2] - tet[3] * x[,3]) * x[,3]
vec.flt.M4 <- (tet[2] - tet[4] - tet[3] * x[,3] * x[,1])
return(cbind(vec.flt.M1, vec.flt.M2, vec.flt.M3, vec.flt.M4))
}
mat.df.PLOT2 <- foreach(t=1:scl.int.DATE_LEN, .combine = "rbind") %dopar% {
print(vec.dt.DATE[t])
mat.df.TEMP <- mat.df.DATA[mat.df.DATA$t == vec.dt.DATE[t], ]
mat.df.TEMP$ivol <- (mat.df.TEMP$ivol - mean(mat.df.TEMP$ivol))/sd(mat.df.TEMP$ivol)
mat.df.TEMP$maxret <- (mat.df.TEMP$maxret - mean(mat.df.TEMP$maxret))/sd(mat.df.TEMP$maxret)
obj.gmm.RESULTS1 <- gmm(obj.fun.GMM1,
mat.df.TEMP[, c("rx", "ivol", "maxret")],
c(0.5, 0.5, 0.5, 0.5)
)
obj.gmm.RESULTS2 <- gmm(obj.fun.GMM2,
mat.df.TEMP[, c("rx", "ivol", "maxret")],
c(0.5, 0.5, 0.5, 0.5)
)
return(c(summary(obj.gmm.RESULTS1)$coef[2,1], summary(obj.gmm.RESULTS1)$coef[4,1], summary(obj.gmm.RESULTS2)$coef[2,1] - summary(obj.gmm.RESULTS2)$coef[4,1]))
}
mat.df.PLOT2 <- data.frame(mat.df.PLOT2)
names(mat.df.PLOT2) <- c("bet1", "dlt1", "dlt2")
mat.df.PLOT2$t <- vec.dt.DATE
## Plot fraction explained
theme_set(theme_bw())
scl.str.RAW_FILE <- 'plot--ivol-puzzle--fraction-explained'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
obj.gg2.PLOT <- ggplot(data = mat.df.PLOT2)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
size = 0.50,
linetype = 4
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_path(aes(x = t,
y = dlt1 * 100
),
size = 1.25
)
obj.gg2.PLOT <- obj.gg2.PLOT + annotate("text",
x = as.Date("1985-01-01"),
y = -7.50,
size = 4,
label = paste("$\\langle \\beta_E \\rangle = ", round(mean(mat.df.PLOT2$dlt1) * 100, 2), "{\\scriptstyle \\%}$", sep ="")
)
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\%$ per Month")
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,-0.85,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_text(vjust = 1.75),
panel.grid.minor = element_blank(),
legend.position = "none"
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Returns to iVol-Based Strategy Explained by $\\max(r_{n,d_{t-1}})$")
print(obj.gg2.PLOT)
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
## Compare estimation strategies
scl.str.RAW_FILE <- 'plot--ivol-puzzle--compare-estimation-strategies'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 2, width = 7, standAlone=TRUE)
obj.gg2.PLOT <- ggplot(data = mat.df.PLOT2)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_hline(yintercept = 0,
size = 0.50,
linetype = 4
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_point(aes(x = dlt2 * 100,
y = dlt1 * 100
)
)
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("$\\beta_E = \\gamma \\cdot \\mathrm{Cov}[r_n,\\widetilde{x}_n]$")
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("$\\beta_E = \\beta - \\mathrm{Cov}[r_n, (\\widetilde{\\mathit{ivol}}_n - \\gamma \\cdot \\widetilde{x}_n)]$")
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.15,0.15,0.15), "lines"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10),
plot.title = element_text(vjust = 1.75),
panel.grid.minor = element_blank(),
legend.position = "none"
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("Comparing Estimation Strategies")
print(obj.gg2.PLOT)
dev.off()
system(paste('lualatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment