Last active
August 29, 2015 13:57
-
-
Save alexchinco/9644120 to your computer and use it in GitHub Desktop.
Replicate Hou (2007) Table 1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Prep workspace | |
rm(list=ls()) | |
library(foreign) | |
library(grid) | |
library(plyr) | |
library(ggplot2) | |
library(tikzDevice) | |
print(options('tikzLatexPackages')) | |
options(tikzLatexPackages = | |
c("\\usepackage{tikz}\n", | |
"\\usepackage[active,tightpage,psfixbb]{preview}\n", | |
"\\PreviewEnvironment{pgfpicture}\n", | |
"\\setlength\\PreviewBorder{0pt}\n", | |
"\\usepackage{amsmath}\n", | |
"\\usepackage{xfrac}\n" | |
) | |
) | |
setTikzDefaults(overwrite = FALSE) | |
print(options('tikzLatexPackages')) | |
library(reshape) | |
library(vars) | |
library(scales) | |
scl.str.DAT_DIR <- "~/Dropbox/research/absolutely_small/data/" | |
scl.str.FIG_DIR <- "~/Dropbox/research/absolutely_small/figures/" | |
## Load data | |
mat.df.DATA <- read.csv(paste(scl.str.DAT_DIR, "hou-2007-table-1-data.csv", sep = ""), stringsAsFactors = FALSE) | |
mat.df.DATA$t <- as.Date(as.character(mat.df.DATA$DATE), format = "%Y%m%d") | |
names(mat.df.DATA) <- c("ind", "size", "date", "ret", "t") | |
mat.df.SS <- read.csv(paste(scl.str.DAT_DIR, "june-ff-sum-stats-data.csv", sep = ""), stringsAsFactors = FALSE) | |
mat.df.SS$t <- as.Date(as.character(mat.df.SS$date), format = "%d%b%Y") | |
names(mat.df.SS) <- c("date", "ind", "size", "count", "ret", "t") | |
## Restrict date range | |
mat.df.DATA <- mat.df.DATA[mat.df.DATA$t >= as.Date("1963-07-01"), ] | |
mat.df.DATA <- mat.df.DATA[mat.df.DATA$t < as.Date("2002-01-01"), ] | |
## Create date index | |
mat.df.DATE_INDEX <- data.frame(t = sort(unique(mat.df.DATA$t)), | |
n = seq(1, length(unique(mat.df.DATA$t))) | |
) | |
mat.df.DATA <- merge(mat.df.DATA, mat.df.DATE_INDEX, by = c("t")) | |
mat.df.DATA <- mat.df.DATA[, c("t", "n", "ind", "size", "ret")] | |
## Compute autocorrelations | |
scl.int.NUM_IND <- 12 | |
scl.int.NUM_LAG <- 6 | |
mat.df.PLOT <- data.frame(ind = sort(rep(seq(1, scl.int.NUM_IND), scl.int.NUM_LAG + 1)), | |
lag = rep(seq(0, scl.int.NUM_LAG), scl.int.NUM_IND), | |
bet1 = NA, | |
se1 = NA, | |
bet2 = NA, | |
se2 = NA | |
) | |
for (i in 1:scl.int.NUM_IND) { | |
for (l in 0:scl.int.NUM_LAG) { | |
mat.df.CURRENT <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 3), c("n", "ret")] | |
mat.df.CURRENT$ret <- (mat.df.CURRENT$ret - mean(mat.df.CURRENT$ret))/sd(mat.df.CURRENT$ret) | |
mat.df.LAGGED <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 1), c("n", "ret")] | |
mat.df.LAGGED$ret <- (mat.df.LAGGED$ret - mean(mat.df.LAGGED$ret))/sd(mat.df.LAGGED$ret) | |
names(mat.df.LAGGED) <- c("n", "Lret") | |
mat.df.LAGGED$n <- mat.df.LAGGED$n - l | |
mat.df.REG <- merge(mat.df.CURRENT, mat.df.LAGGED, by = c("n")) | |
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$bet1 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,1] | |
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$se1 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,2] | |
} | |
for (l in 0:scl.int.NUM_LAG) { | |
mat.df.CURRENT <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 1), c("n", "ret")] | |
mat.df.CURRENT$ret <- (mat.df.CURRENT$ret - mean(mat.df.CURRENT$ret))/sd(mat.df.CURRENT$ret) | |
mat.df.LAGGED <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 3), c("n", "ret")] | |
mat.df.LAGGED$ret <- (mat.df.LAGGED$ret - mean(mat.df.LAGGED$ret))/sd(mat.df.LAGGED$ret) | |
names(mat.df.LAGGED) <- c("n", "Lret") | |
mat.df.LAGGED$n <- mat.df.LAGGED$n - l | |
mat.df.REG <- merge(mat.df.CURRENT, mat.df.LAGGED, by = c("n")) | |
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$bet2 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,1] | |
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$se2 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,2] | |
} | |
} | |
## Compute average firm counts | |
mat.df.PLOT2 <- ddply(mat.df.SS, c("t", "ind"), function(X)sum(X$count, na.rm = TRUE)) | |
names(mat.df.PLOT2) <- c("t", "ind", "N") | |
mat.df.PLOT2 <- ddply(mat.df.PLOT2, c("ind"), function(X)round(mean(X$N, na.rm = TRUE))) | |
names(mat.df.PLOT2) <- c("ind", "N") | |
mat.df.PLOT2$N <- paste("$\\langle N \\rangle = ", mat.df.PLOT2$N, "$", sep = "") | |
mat.df.PLOT2$x <- 1.25 | |
mat.df.PLOT2$y <- 0.625 | |
## Compute mean and sd returns | |
mat.df.PLOT3 <- ddply(mat.df.SS, c("ind", "size"), function(X)c(round(mean(X$ret, na.rm = TRUE)/52 * 100, 2), round(sd(X$ret, na.rm = TRUE)/sqrt(52) * 100,2))) | |
names(mat.df.PLOT3) <- c("ind", "size", "avg", "sd") | |
mat.df.PLOT3 <- mat.df.PLOT3[mat.df.PLOT3$size != 2, ] | |
mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$avg <- paste("$\\mu_S = ", mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$avg, "{\\scriptstyle \\%}$", sep = "") | |
mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$avg <- paste("$\\mu_B = ", mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$avg, "{\\scriptstyle \\%}$", sep = "") | |
mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$sd <- paste("$\\sigma_S = ", mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$sd, "{\\scriptstyle \\%}$", sep = "") | |
mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$sd <- paste("$\\sigma_B = ", mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$sd, "{\\scriptstyle \\%}$", sep = "") | |
mat.df.PLOT3 <- melt(mat.df.PLOT3, c("ind", "size")) | |
mat.df.PLOT3$x <- NA | |
mat.df.PLOT3[mat.df.PLOT3$variable == "avg", ]$x <- 3.25 | |
mat.df.PLOT3[mat.df.PLOT3$variable == "sd", ]$x <- 5.25 | |
mat.df.PLOT3$y <- NA | |
mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$y <- 0.625 | |
mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$y <- 0.375 | |
## Plot distribution of total number of mentions | |
theme_set(theme_bw()) | |
mat.df.PLOT$ind <- factor(mat.df.PLOT$ind, | |
levels = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'), | |
labels = c('Consumer Non-Durables', 'Consumer Durables', 'Manufacturing', 'Oil, Gas, Coal', 'Chemicals', 'Business Equipment', 'Telecommunications', 'Utilities', 'Wholesale/Retail Shops', 'Healthcare/Medical', 'Finance', 'Other') | |
) | |
mat.df.PLOT2$ind <- factor(mat.df.PLOT2$ind, | |
levels = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'), | |
labels = c('Consumer Non-Durables', 'Consumer Durables', 'Manufacturing', 'Oil, Gas, Coal', 'Chemicals', 'Business Equipment', 'Telecommunications', 'Utilities', 'Wholesale/Retail Shops', 'Healthcare/Medical', 'Finance', 'Other') | |
) | |
mat.df.PLOT3$ind <- factor(mat.df.PLOT3$ind, | |
levels = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'), | |
labels = c('Consumer Non-Durables', 'Consumer Durables', 'Manufacturing', 'Oil, Gas, Coal', 'Chemicals', 'Business Equipment', 'Telecommunications', 'Utilities', 'Wholesale/Retail Shops', 'Healthcare/Medical', 'Finance', 'Other') | |
) | |
mat.df.PLOT$ub1 <- mat.df.PLOT$bet1 + 2 * mat.df.PLOT$se1 | |
mat.df.PLOT$lb1 <- mat.df.PLOT$bet1 - 2 * mat.df.PLOT$se1 | |
mat.df.PLOT$ub2 <- mat.df.PLOT$bet2 + 2 * mat.df.PLOT$se2 | |
mat.df.PLOT$lb2 <- mat.df.PLOT$bet2 - 2 * mat.df.PLOT$se2 | |
scl.str.RAW_FILE <- 'hou-2007-table-1' | |
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='') | |
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='') | |
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='') | |
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='') | |
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='') | |
tikz(file = scl.str.TEX_FILE, height = 3.5, width = 9, standAlone=TRUE) | |
obj.gg2.PLOT <- ggplot(data = mat.df.PLOT) | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1") | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_ribbon(aes(x = lag, | |
ymax = ub1, | |
ymin = lb1, | |
group = ind | |
), | |
alpha = 0.25, | |
fill = "blue" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_ribbon(aes(x = lag, | |
ymax = ub2, | |
ymin = lb2, | |
group = ind | |
), | |
alpha = 0.25, | |
fill = "red" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_line(aes(x = lag, | |
y = bet1, | |
group = ind | |
), | |
size = 0.75, | |
colour = "blue" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_line(aes(x = lag, | |
y = bet2, | |
group = ind | |
), | |
size = 0.75, | |
colour = "red" | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2, | |
aes(x = x, | |
y = y, | |
label = N, | |
group = ind | |
), | |
size = 2.5 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT3, | |
aes(x = x, | |
y = y, | |
label = value, | |
group = ind | |
), | |
size = 2.5 | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + coord_cartesian(xlim = c(-0.25, 6.25)) | |
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ ind, ncol = 4) | |
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("Lag: $l$ (weeks)") | |
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("") | |
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(breaks = c(0, 1, 2, 3, 4, 6)) | |
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.10,0,-0.75), "lines"), | |
legend.position = "none", | |
axis.title = element_text(size = 8), | |
axis.text = element_text(size = 6), | |
plot.title = element_text(vjust = 1.75), | |
strip.text.x = element_text(size = 8), | |
panel.grid.minor = element_blank() | |
) | |
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("30/70 Ken French Industry Cross-Autocorrelation: ${\\color{blue}\\mathrm{Cor}[r_{i,t}^B,r_{i,t+l}^S]}$ and ${\\color{red}\\mathrm{Cor}[r_{i,t}^S,r_{i,t+l}^B]}$") | |
print(obj.gg2.PLOT) | |
dev.off() | |
system(paste('pdflatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE) | |
system(paste('convert -density 600', file.path(scl.str.PDF_FILE), ' ', file.path(scl.str.PNG_FILE))) | |
system(paste('mv ', scl.str.PNG_FILE, ' ', scl.str.FIG_DIR, sep = '')) | |
system(paste('rm ', scl.str.TEX_FILE, sep = '')) | |
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = '')) | |
system(paste('rm ', scl.str.AUX_FILE, sep = '')) | |
system(paste('rm ', scl.str.LOG_FILE, sep = '')) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OPTIONS LINESIZE = 256; | |
OPTIONS PAGESIZE = 256; | |
LIBNAME comp '/wrds/comp/sasdata/naa'; | |
LIBNAME crsp '/wrds/crsp/sasdata/a_stock'; | |
LIBNAME cc '/wrds/crsp/sasdata/a_ccm'; | |
LIBNAME treas '/wrds/crsp/sasdata/a_treas_bm'; | |
%LET startDate = '01JAN1960'd; | |
%LET endDate = '31DEC2013'd; | |
;/************************************************************************************* | |
@section: PULL DATA IN JUNE | |
**************************************************************************************/ | |
%LET msevars = ticker ncusip shrcd exchcd; | |
%LET msfvars = prc ret shrout cfacpr cfacshr hsiccd; | |
%INCLUDE '/wrds/crsp/samples/crspmerge.sas'; | |
%CRSPMERGE(S = m, | |
START = &startDate, | |
END = &endDate, | |
SFVARS = &msfvars, | |
SEVARS = &msevars, | |
FILTERS = (exchcd in (1,2,3)) AND (shrcd in (10,11)) | |
); | |
PROC SQL; | |
CREATE TABLE crsp_m AS | |
SELECT a.*, | |
b.dlret, | |
(SUM(1, ret) * SUM(1, dlret) - 1) AS retAdj, | |
ABS(a.prc * a.shrout) AS mcap2 | |
FROM crsp_m AS a LEFT JOIN | |
crsp.msedelist (WHERE = (MISSING(dlret) = 0)) as b | |
ON (a.permno = b.permno) AND | |
(INTNX('month', a.date, 0, 'E') = INTNX('month', b.dlstdt, 0, 'E')); | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM crsp_m AS a | |
ORDER BY permco, | |
date; | |
QUIT; | |
;/************************************************************************************* | |
@section: ORGANIZE DATA BY PERMNO | |
**************************************************************************************/ | |
PROC SQL; | |
CREATE TABLE crsp_m AS | |
SELECT a.* | |
FROM crsp_m AS a | |
ORDER BY a.date, | |
a.permco, | |
a.mcap2; | |
QUIT; | |
DATA crsp_m (DROP = mcap2); | |
SET crsp_m; | |
BY date | |
permco | |
mcap2; | |
RETAIN mcap; | |
IF (first.permco AND last.permco) THEN | |
DO; | |
mcap = mcap2; | |
OUTPUT; | |
END; | |
ELSE | |
DO; | |
IF (first.permco) THEN | |
DO; | |
mcap = mcap2; | |
END; | |
ELSE | |
DO; | |
mcap = SUM(mcap2, mcap); | |
END; | |
IF (last.permco) THEN | |
DO; | |
OUTPUT; | |
END; | |
END; | |
RUN; | |
PROC SQL; | |
CREATE TABLE crsp_m AS | |
SELECT a.* | |
FROM crsp_m AS a | |
ORDER BY permno, | |
date; | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM crsp_m AS a | |
ORDER BY permco, | |
date; | |
QUIT; | |
;/************************************************************************************* | |
@section: ASSIGN FF INDUSTRIES | |
**************************************************************************************/ | |
DATA crsp_m; | |
SET crsp_m; | |
ind = 12; | |
IF (100 <= hsiccd <= 999) THEN DO; ind = 1; END; | |
IF (2000 <= hsiccd <= 2399) THEN DO; ind = 1; END; | |
IF (2700 <= hsiccd <= 2749) THEN DO; ind = 1; END; | |
IF (2770 <= hsiccd <= 2799) THEN DO; ind = 1; END; | |
IF (3100 <= hsiccd <= 3199) THEN DO; ind = 1; END; | |
IF (3940 <= hsiccd <= 3989) THEN DO; ind = 1; END; | |
IF (2500 <= hsiccd <= 2519) THEN DO; ind = 2; END; | |
IF (2590 <= hsiccd <= 2599) THEN DO; ind = 2; END; | |
IF (3630 <= hsiccd <= 3659) THEN DO; ind = 2; END; | |
IF (3710 <= hsiccd <= 3711) THEN DO; ind = 2; END; | |
IF (3714 <= hsiccd <= 3714) THEN DO; ind = 2; END; | |
IF (3716 <= hsiccd <= 3716) THEN DO; ind = 2; END; | |
IF (3750 <= hsiccd <= 3751) THEN DO; ind = 2; END; | |
IF (3792 <= hsiccd <= 3792) THEN DO; ind = 2; END; | |
IF (3900 <= hsiccd <= 3939) THEN DO; ind = 2; END; | |
IF (3990 <= hsiccd <= 3999) THEN DO; ind = 2; END; | |
IF (2520 <= hsiccd <= 2589) THEN DO; ind = 3; END; | |
IF (2600 <= hsiccd <= 2699) THEN DO; ind = 3; END; | |
IF (2750 <= hsiccd <= 2769) THEN DO; ind = 3; END; | |
IF (3000 <= hsiccd <= 3099) THEN DO; ind = 3; END; | |
IF (3200 <= hsiccd <= 3569) THEN DO; ind = 3; END; | |
IF (3580 <= hsiccd <= 3629) THEN DO; ind = 3; END; | |
IF (3700 <= hsiccd <= 3709) THEN DO; ind = 3; END; | |
IF (3712 <= hsiccd <= 3713) THEN DO; ind = 3; END; | |
IF (3715 <= hsiccd <= 3715) THEN DO; ind = 3; END; | |
IF (3717 <= hsiccd <= 3749) THEN DO; ind = 3; END; | |
IF (3752 <= hsiccd <= 3791) THEN DO; ind = 3; END; | |
IF (3793 <= hsiccd <= 3799) THEN DO; ind = 3; END; | |
IF (3830 <= hsiccd <= 3839) THEN DO; ind = 3; END; | |
IF (3860 <= hsiccd <= 3899) THEN DO; ind = 3; END; | |
IF (1200 <= hsiccd <= 1399) THEN DO; ind = 4; END; | |
IF (2900 <= hsiccd <= 2999) THEN DO; ind = 4; END; | |
IF (2800 <= hsiccd <= 2829) THEN DO; ind = 5; END; | |
IF (2840 <= hsiccd <= 2899) THEN DO; ind = 5; END; | |
IF (3570 <= hsiccd <= 3579) THEN DO; ind = 6; END; | |
IF (3660 <= hsiccd <= 3692) THEN DO; ind = 6; END; | |
IF (3694 <= hsiccd <= 3699) THEN DO; ind = 6; END; | |
IF (3810 <= hsiccd <= 3829) THEN DO; ind = 6; END; | |
IF (7370 <= hsiccd <= 7379) THEN DO; ind = 6; END; | |
IF (4800 <= hsiccd <= 4899) THEN DO; ind = 7; END; | |
IF (4900 <= hsiccd <= 4949) THEN DO; ind = 8; END; | |
IF (5000 <= hsiccd <= 5999) THEN DO; ind = 9; END; | |
IF (7200 <= hsiccd <= 7299) THEN DO; ind = 9; END; | |
IF (7600 <= hsiccd <= 7699) THEN DO; ind = 9; END; | |
IF (2830 <= hsiccd <= 2839) THEN DO; ind = 10; END; | |
IF (3693 <= hsiccd <= 3693) THEN DO; ind = 10; END; | |
IF (3840 <= hsiccd <= 3859) THEN DO; ind = 10; END; | |
IF (8000 <= hsiccd <= 8099) THEN DO; ind = 10; END; | |
IF (6000 <= hsiccd <= 6999) THEN DO; ind = 11; END; | |
RUN; | |
;/************************************************************************************* | |
@section: KEEP JUNE DATA | |
**************************************************************************************/ | |
PROC SQL; | |
CREATE TABLE crsp_june AS | |
SELECT DISTINCT a.permno FORMAT=8. AS permno, | |
a.date FORMAT=DATE9. AS date, | |
a.mcap FORMAT=BEST16. AS mcap, | |
a.ind FORMAT=2. AS ind, | |
(exp(sum(log(1+b.retAdj))) - 1) FORMAT=BEST16. AS cret | |
FROM crsp_m AS a, | |
crsp_m AS b | |
WHERE (a.permno = b.permno) AND | |
(MONTH(a.date) = 6) AND | |
(0 <= INTCK('month', a.date, b.date) < 12) | |
GROUP BY a.permno, | |
a.date; | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM crsp_june AS a | |
ORDER BY permno, | |
date; | |
QUIT; | |
;/************************************************************************************* | |
@section: COMPUTE INTRA-INDUSTRY SIZE BUCKETS | |
**************************************************************************************/ | |
PROC SQL; | |
CREATE TABLE crsp_june AS | |
SELECT a.* | |
FROM crsp_june AS a | |
ORDER BY a.date, | |
a.ind; | |
QUIT; | |
PROC UNIVARIATE DATA = crsp_june NOPRINT; | |
VAR mcap; | |
BY date ind; | |
OUTPUT OUT = breaks | |
PCTLPTS = 30 70 | |
PCTLPRE = mcap; | |
RUN; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM breaks AS a | |
ORDER BY ind, | |
date; | |
QUIT; | |
PROC SQL; | |
CREATE TABLE crsp_june AS | |
SELECT a.*, | |
b.mcap30, | |
b.mcap70 | |
FROM crsp_june AS a, | |
breaks AS b | |
WHERE (a.date = b.date) AND | |
(a.ind = b.ind); | |
QUIT; | |
DATA crsp_june; | |
SET crsp_june; | |
size = 1; | |
IF (mcap >= mcap30) THEN DO; size = 2; END; | |
IF (mcap >= mcap70) THEN DO; size = 3; END; | |
RUN; | |
PROC SQL; | |
CREATE TABLE crsp_count AS | |
SELECT a.date, | |
a.ind, | |
a.size, | |
COUNT(a.permno) AS count | |
FROM crsp_june AS a | |
GROUP BY a.date, | |
a.ind, | |
a.size; | |
QUIT; | |
PROC SQL; | |
CREATE TABLE crsp_june AS | |
SELECT a.*, | |
b.count | |
FROM crsp_june AS a, | |
crsp_count AS b | |
WHERE (a.date = b.date) AND | |
(a.ind = b.ind) AND | |
(a.size = b.size); | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM crsp_june AS a | |
ORDER BY permno, | |
date; | |
QUIT; | |
;/************************************************************************************* | |
@section: COMPUTE JUNE SUMMARY STATISTICS | |
**************************************************************************************/ | |
PROC SQL; | |
CREATE TABLE crsp_june_ss AS | |
SELECT DISTINCT a.date, | |
a.ind, | |
a.size, | |
COUNT(a.permno) FORMAT=BEST16. AS count, | |
MEAN(a.cret) FORMAT=BEST16. AS cret | |
FROM crsp_june AS a | |
GROUP BY a.date, | |
a.ind, | |
a.size; | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM crsp_june_ss AS a | |
ORDER BY date, | |
ind, | |
size; | |
QUIT; | |
PROC EXPORT | |
DATA = crsp_june_ss | |
OUTFILE = "june-ff-sum-stats-data.csv" | |
DBMS = CSV | |
REPLACE; | |
RUN; | |
ENDSAS; | |
;/************************************************************************************* | |
@section: PULL DAILY DATA | |
**************************************************************************************/ | |
%LET dsevars = ticker ncusip shrcd exchcd; | |
%LET dsfvars = ret retx cfacpr cfacshr; | |
%INCLUDE '/wrds/crsp/samples/crspmerge.sas'; | |
%CRSPMERGE(S = d, | |
START = &startDate, | |
END = &endDate, | |
SFVARS = &dsfvars, | |
SEVARS = &dsevars, | |
FILTERS = (exchcd in (1,2,3)) AND (shrcd in (10,11)) | |
); | |
PROC SQL; | |
CREATE TABLE crsp_d AS | |
SELECT a.*, | |
WEEK(a.date) FORMAT=2. AS week, | |
YEAR(a.date) FORMAT=4. AS year, | |
b.dlret FORMAT=BEST16. AS dlret, | |
(SUM(1, ret) * SUM(1, dlret) - 1) FORMAT=BEST16. AS retAdj | |
FROM crsp_d AS a LEFT JOIN | |
crsp.dsedelist (WHERE = (MISSING(dlret) = 0)) AS b | |
ON (a.permno = b.permno) AND | |
(a.date = b.dlstdt) | |
ORDER BY permco, | |
date; | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM crsp_d AS a | |
ORDER BY permco, | |
date; | |
QUIT; | |
;/************************************************************************************* | |
@section: COMPUTE EW INDUSTRY x SIZE WEEKLY RETURNS | |
**************************************************************************************/ | |
PROC SQL; | |
CREATE TABLE crsp_d AS | |
SELECT DISTINCT a.*, | |
b.ind FORMAT=2. AS ind, | |
b.size FORMAT=1. AS size | |
FROM crsp_d AS a, | |
crsp_june AS b | |
WHERE (a.permno = b.permno) AND | |
(0 < INTCK('year', b.date, a.date) <= 1) | |
ORDER BY a.permno, | |
a.date; | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM crsp_d AS a | |
ORDER BY permno, | |
date; | |
QUIT; | |
PROC SQL; | |
CREATE TABLE ind_size_d AS | |
SELECT DISTINCT a.date, | |
a.ind, | |
a.size, | |
MEAN(a.retAdj) FORMAT=BEST16. AS ret | |
FROM crsp_d AS a | |
GROUP BY a.date, | |
a.ind, | |
a.size; | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM ind_size_d AS a | |
ORDER BY ind, | |
size, | |
date; | |
QUIT; | |
PROC SQL; | |
CREATE TABLE ind_size_w AS | |
SELECT DISTINCT a.ind, | |
a.size, | |
a.date, | |
(exp(sum(log(1+b.ret))) - 1) FORMAT=BEST16. AS cret | |
FROM ind_size_d AS a, | |
ind_size_d AS b | |
WHERE (a.ind = b.ind) AND | |
(a.size = b.size) AND | |
(WEEKDAY(a.date) = 3) AND | |
(0 <= INTCK('day', a.date, b.date) < 7) | |
GROUP BY a.ind, | |
a.size, | |
a.date; | |
QUIT; | |
PROC SQL OUTOBS = 25; | |
SELECT a.* | |
FROM ind_size_w AS a | |
ORDER BY a.ind, | |
a.size, | |
a.date; | |
QUIT; | |
;/************************************************************************************* | |
@section: EXPORT RESULTS TO CSV FILES | |
**************************************************************************************/ | |
PROC EXPORT | |
DATA = ind_size_w | |
OUTFILE = "hou-2007-table-1-data.csv" | |
DBMS = CSV | |
REPLACE; | |
RUN; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment