Skip to content

Instantly share code, notes, and snippets.

@alexchinco
Last active August 29, 2015 13:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save alexchinco/9644120 to your computer and use it in GitHub Desktop.
Save alexchinco/9644120 to your computer and use it in GitHub Desktop.
Replicate Hou (2007) Table 1
## Prep workspace
rm(list=ls())
library(foreign)
library(grid)
library(plyr)
library(ggplot2)
library(tikzDevice)
print(options('tikzLatexPackages'))
options(tikzLatexPackages =
c("\\usepackage{tikz}\n",
"\\usepackage[active,tightpage,psfixbb]{preview}\n",
"\\PreviewEnvironment{pgfpicture}\n",
"\\setlength\\PreviewBorder{0pt}\n",
"\\usepackage{amsmath}\n",
"\\usepackage{xfrac}\n"
)
)
setTikzDefaults(overwrite = FALSE)
print(options('tikzLatexPackages'))
library(reshape)
library(vars)
library(scales)
scl.str.DAT_DIR <- "~/Dropbox/research/absolutely_small/data/"
scl.str.FIG_DIR <- "~/Dropbox/research/absolutely_small/figures/"
## Load data
mat.df.DATA <- read.csv(paste(scl.str.DAT_DIR, "hou-2007-table-1-data.csv", sep = ""), stringsAsFactors = FALSE)
mat.df.DATA$t <- as.Date(as.character(mat.df.DATA$DATE), format = "%Y%m%d")
names(mat.df.DATA) <- c("ind", "size", "date", "ret", "t")
mat.df.SS <- read.csv(paste(scl.str.DAT_DIR, "june-ff-sum-stats-data.csv", sep = ""), stringsAsFactors = FALSE)
mat.df.SS$t <- as.Date(as.character(mat.df.SS$date), format = "%d%b%Y")
names(mat.df.SS) <- c("date", "ind", "size", "count", "ret", "t")
## Restrict date range
mat.df.DATA <- mat.df.DATA[mat.df.DATA$t >= as.Date("1963-07-01"), ]
mat.df.DATA <- mat.df.DATA[mat.df.DATA$t < as.Date("2002-01-01"), ]
## Create date index
mat.df.DATE_INDEX <- data.frame(t = sort(unique(mat.df.DATA$t)),
n = seq(1, length(unique(mat.df.DATA$t)))
)
mat.df.DATA <- merge(mat.df.DATA, mat.df.DATE_INDEX, by = c("t"))
mat.df.DATA <- mat.df.DATA[, c("t", "n", "ind", "size", "ret")]
## Compute autocorrelations
scl.int.NUM_IND <- 12
scl.int.NUM_LAG <- 6
mat.df.PLOT <- data.frame(ind = sort(rep(seq(1, scl.int.NUM_IND), scl.int.NUM_LAG + 1)),
lag = rep(seq(0, scl.int.NUM_LAG), scl.int.NUM_IND),
bet1 = NA,
se1 = NA,
bet2 = NA,
se2 = NA
)
for (i in 1:scl.int.NUM_IND) {
for (l in 0:scl.int.NUM_LAG) {
mat.df.CURRENT <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 3), c("n", "ret")]
mat.df.CURRENT$ret <- (mat.df.CURRENT$ret - mean(mat.df.CURRENT$ret))/sd(mat.df.CURRENT$ret)
mat.df.LAGGED <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 1), c("n", "ret")]
mat.df.LAGGED$ret <- (mat.df.LAGGED$ret - mean(mat.df.LAGGED$ret))/sd(mat.df.LAGGED$ret)
names(mat.df.LAGGED) <- c("n", "Lret")
mat.df.LAGGED$n <- mat.df.LAGGED$n - l
mat.df.REG <- merge(mat.df.CURRENT, mat.df.LAGGED, by = c("n"))
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$bet1 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,1]
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$se1 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,2]
}
for (l in 0:scl.int.NUM_LAG) {
mat.df.CURRENT <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 1), c("n", "ret")]
mat.df.CURRENT$ret <- (mat.df.CURRENT$ret - mean(mat.df.CURRENT$ret))/sd(mat.df.CURRENT$ret)
mat.df.LAGGED <- mat.df.DATA[(mat.df.DATA$ind == i) & (mat.df.DATA$size == 3), c("n", "ret")]
mat.df.LAGGED$ret <- (mat.df.LAGGED$ret - mean(mat.df.LAGGED$ret))/sd(mat.df.LAGGED$ret)
names(mat.df.LAGGED) <- c("n", "Lret")
mat.df.LAGGED$n <- mat.df.LAGGED$n - l
mat.df.REG <- merge(mat.df.CURRENT, mat.df.LAGGED, by = c("n"))
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$bet2 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,1]
mat.df.PLOT[(mat.df.PLOT$ind == i) & (mat.df.PLOT$lag == l), ]$se2 <- summary(lm(ret ~ 0 + Lret, data = mat.df.REG))$coef[1,2]
}
}
## Compute average firm counts
mat.df.PLOT2 <- ddply(mat.df.SS, c("t", "ind"), function(X)sum(X$count, na.rm = TRUE))
names(mat.df.PLOT2) <- c("t", "ind", "N")
mat.df.PLOT2 <- ddply(mat.df.PLOT2, c("ind"), function(X)round(mean(X$N, na.rm = TRUE)))
names(mat.df.PLOT2) <- c("ind", "N")
mat.df.PLOT2$N <- paste("$\\langle N \\rangle = ", mat.df.PLOT2$N, "$", sep = "")
mat.df.PLOT2$x <- 1.25
mat.df.PLOT2$y <- 0.625
## Compute mean and sd returns
mat.df.PLOT3 <- ddply(mat.df.SS, c("ind", "size"), function(X)c(round(mean(X$ret, na.rm = TRUE)/52 * 100, 2), round(sd(X$ret, na.rm = TRUE)/sqrt(52) * 100,2)))
names(mat.df.PLOT3) <- c("ind", "size", "avg", "sd")
mat.df.PLOT3 <- mat.df.PLOT3[mat.df.PLOT3$size != 2, ]
mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$avg <- paste("$\\mu_S = ", mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$avg, "{\\scriptstyle \\%}$", sep = "")
mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$avg <- paste("$\\mu_B = ", mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$avg, "{\\scriptstyle \\%}$", sep = "")
mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$sd <- paste("$\\sigma_S = ", mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$sd, "{\\scriptstyle \\%}$", sep = "")
mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$sd <- paste("$\\sigma_B = ", mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$sd, "{\\scriptstyle \\%}$", sep = "")
mat.df.PLOT3 <- melt(mat.df.PLOT3, c("ind", "size"))
mat.df.PLOT3$x <- NA
mat.df.PLOT3[mat.df.PLOT3$variable == "avg", ]$x <- 3.25
mat.df.PLOT3[mat.df.PLOT3$variable == "sd", ]$x <- 5.25
mat.df.PLOT3$y <- NA
mat.df.PLOT3[mat.df.PLOT3$size == 1, ]$y <- 0.625
mat.df.PLOT3[mat.df.PLOT3$size == 3, ]$y <- 0.375
## Plot distribution of total number of mentions
theme_set(theme_bw())
mat.df.PLOT$ind <- factor(mat.df.PLOT$ind,
levels = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'),
labels = c('Consumer Non-Durables', 'Consumer Durables', 'Manufacturing', 'Oil, Gas, Coal', 'Chemicals', 'Business Equipment', 'Telecommunications', 'Utilities', 'Wholesale/Retail Shops', 'Healthcare/Medical', 'Finance', 'Other')
)
mat.df.PLOT2$ind <- factor(mat.df.PLOT2$ind,
levels = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'),
labels = c('Consumer Non-Durables', 'Consumer Durables', 'Manufacturing', 'Oil, Gas, Coal', 'Chemicals', 'Business Equipment', 'Telecommunications', 'Utilities', 'Wholesale/Retail Shops', 'Healthcare/Medical', 'Finance', 'Other')
)
mat.df.PLOT3$ind <- factor(mat.df.PLOT3$ind,
levels = c('1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'),
labels = c('Consumer Non-Durables', 'Consumer Durables', 'Manufacturing', 'Oil, Gas, Coal', 'Chemicals', 'Business Equipment', 'Telecommunications', 'Utilities', 'Wholesale/Retail Shops', 'Healthcare/Medical', 'Finance', 'Other')
)
mat.df.PLOT$ub1 <- mat.df.PLOT$bet1 + 2 * mat.df.PLOT$se1
mat.df.PLOT$lb1 <- mat.df.PLOT$bet1 - 2 * mat.df.PLOT$se1
mat.df.PLOT$ub2 <- mat.df.PLOT$bet2 + 2 * mat.df.PLOT$se2
mat.df.PLOT$lb2 <- mat.df.PLOT$bet2 - 2 * mat.df.PLOT$se2
scl.str.RAW_FILE <- 'hou-2007-table-1'
scl.str.TEX_FILE <- paste(scl.str.RAW_FILE,'.tex',sep='')
scl.str.PDF_FILE <- paste(scl.str.RAW_FILE,'.pdf',sep='')
scl.str.PNG_FILE <- paste(scl.str.RAW_FILE,'.png',sep='')
scl.str.AUX_FILE <- paste(scl.str.RAW_FILE,'.aux',sep='')
scl.str.LOG_FILE <- paste(scl.str.RAW_FILE,'.log',sep='')
tikz(file = scl.str.TEX_FILE, height = 3.5, width = 9, standAlone=TRUE)
obj.gg2.PLOT <- ggplot(data = mat.df.PLOT)
obj.gg2.PLOT <- obj.gg2.PLOT + scale_colour_brewer(palette="Set1")
obj.gg2.PLOT <- obj.gg2.PLOT + geom_ribbon(aes(x = lag,
ymax = ub1,
ymin = lb1,
group = ind
),
alpha = 0.25,
fill = "blue"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_ribbon(aes(x = lag,
ymax = ub2,
ymin = lb2,
group = ind
),
alpha = 0.25,
fill = "red"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_line(aes(x = lag,
y = bet1,
group = ind
),
size = 0.75,
colour = "blue"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_line(aes(x = lag,
y = bet2,
group = ind
),
size = 0.75,
colour = "red"
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT2,
aes(x = x,
y = y,
label = N,
group = ind
),
size = 2.5
)
obj.gg2.PLOT <- obj.gg2.PLOT + geom_text(data = mat.df.PLOT3,
aes(x = x,
y = y,
label = value,
group = ind
),
size = 2.5
)
obj.gg2.PLOT <- obj.gg2.PLOT + coord_cartesian(xlim = c(-0.25, 6.25))
obj.gg2.PLOT <- obj.gg2.PLOT + facet_wrap(~ ind, ncol = 4)
obj.gg2.PLOT <- obj.gg2.PLOT + xlab("Lag: $l$ (weeks)")
obj.gg2.PLOT <- obj.gg2.PLOT + ylab("")
obj.gg2.PLOT <- obj.gg2.PLOT + scale_x_continuous(breaks = c(0, 1, 2, 3, 4, 6))
obj.gg2.PLOT <- obj.gg2.PLOT + theme(plot.margin = unit(c(1,0.10,0,-0.75), "lines"),
legend.position = "none",
axis.title = element_text(size = 8),
axis.text = element_text(size = 6),
plot.title = element_text(vjust = 1.75),
strip.text.x = element_text(size = 8),
panel.grid.minor = element_blank()
)
obj.gg2.PLOT <- obj.gg2.PLOT + ggtitle("30/70 Ken French Industry Cross-Autocorrelation: ${\\color{blue}\\mathrm{Cor}[r_{i,t}^B,r_{i,t+l}^S]}$ and ${\\color{red}\\mathrm{Cor}[r_{i,t}^S,r_{i,t+l}^B]}$")
print(obj.gg2.PLOT)
dev.off()
system(paste('pdflatex', file.path(scl.str.TEX_FILE)), ignore.stdout = TRUE)
system(paste('convert -density 600', file.path(scl.str.PDF_FILE), ' ', file.path(scl.str.PNG_FILE)))
system(paste('mv ', scl.str.PNG_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.TEX_FILE, sep = ''))
system(paste('mv ', scl.str.PDF_FILE, ' ', scl.str.FIG_DIR, sep = ''))
system(paste('rm ', scl.str.AUX_FILE, sep = ''))
system(paste('rm ', scl.str.LOG_FILE, sep = ''))
OPTIONS LINESIZE = 256;
OPTIONS PAGESIZE = 256;
LIBNAME comp '/wrds/comp/sasdata/naa';
LIBNAME crsp '/wrds/crsp/sasdata/a_stock';
LIBNAME cc '/wrds/crsp/sasdata/a_ccm';
LIBNAME treas '/wrds/crsp/sasdata/a_treas_bm';
%LET startDate = '01JAN1960'd;
%LET endDate = '31DEC2013'd;
;/*************************************************************************************
@section: PULL DATA IN JUNE
**************************************************************************************/
%LET msevars = ticker ncusip shrcd exchcd;
%LET msfvars = prc ret shrout cfacpr cfacshr hsiccd;
%INCLUDE '/wrds/crsp/samples/crspmerge.sas';
%CRSPMERGE(S = m,
START = &startDate,
END = &endDate,
SFVARS = &msfvars,
SEVARS = &msevars,
FILTERS = (exchcd in (1,2,3)) AND (shrcd in (10,11))
);
PROC SQL;
CREATE TABLE crsp_m AS
SELECT a.*,
b.dlret,
(SUM(1, ret) * SUM(1, dlret) - 1) AS retAdj,
ABS(a.prc * a.shrout) AS mcap2
FROM crsp_m AS a LEFT JOIN
crsp.msedelist (WHERE = (MISSING(dlret) = 0)) as b
ON (a.permno = b.permno) AND
(INTNX('month', a.date, 0, 'E') = INTNX('month', b.dlstdt, 0, 'E'));
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM crsp_m AS a
ORDER BY permco,
date;
QUIT;
;/*************************************************************************************
@section: ORGANIZE DATA BY PERMNO
**************************************************************************************/
PROC SQL;
CREATE TABLE crsp_m AS
SELECT a.*
FROM crsp_m AS a
ORDER BY a.date,
a.permco,
a.mcap2;
QUIT;
DATA crsp_m (DROP = mcap2);
SET crsp_m;
BY date
permco
mcap2;
RETAIN mcap;
IF (first.permco AND last.permco) THEN
DO;
mcap = mcap2;
OUTPUT;
END;
ELSE
DO;
IF (first.permco) THEN
DO;
mcap = mcap2;
END;
ELSE
DO;
mcap = SUM(mcap2, mcap);
END;
IF (last.permco) THEN
DO;
OUTPUT;
END;
END;
RUN;
PROC SQL;
CREATE TABLE crsp_m AS
SELECT a.*
FROM crsp_m AS a
ORDER BY permno,
date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM crsp_m AS a
ORDER BY permco,
date;
QUIT;
;/*************************************************************************************
@section: ASSIGN FF INDUSTRIES
**************************************************************************************/
DATA crsp_m;
SET crsp_m;
ind = 12;
IF (100 <= hsiccd <= 999) THEN DO; ind = 1; END;
IF (2000 <= hsiccd <= 2399) THEN DO; ind = 1; END;
IF (2700 <= hsiccd <= 2749) THEN DO; ind = 1; END;
IF (2770 <= hsiccd <= 2799) THEN DO; ind = 1; END;
IF (3100 <= hsiccd <= 3199) THEN DO; ind = 1; END;
IF (3940 <= hsiccd <= 3989) THEN DO; ind = 1; END;
IF (2500 <= hsiccd <= 2519) THEN DO; ind = 2; END;
IF (2590 <= hsiccd <= 2599) THEN DO; ind = 2; END;
IF (3630 <= hsiccd <= 3659) THEN DO; ind = 2; END;
IF (3710 <= hsiccd <= 3711) THEN DO; ind = 2; END;
IF (3714 <= hsiccd <= 3714) THEN DO; ind = 2; END;
IF (3716 <= hsiccd <= 3716) THEN DO; ind = 2; END;
IF (3750 <= hsiccd <= 3751) THEN DO; ind = 2; END;
IF (3792 <= hsiccd <= 3792) THEN DO; ind = 2; END;
IF (3900 <= hsiccd <= 3939) THEN DO; ind = 2; END;
IF (3990 <= hsiccd <= 3999) THEN DO; ind = 2; END;
IF (2520 <= hsiccd <= 2589) THEN DO; ind = 3; END;
IF (2600 <= hsiccd <= 2699) THEN DO; ind = 3; END;
IF (2750 <= hsiccd <= 2769) THEN DO; ind = 3; END;
IF (3000 <= hsiccd <= 3099) THEN DO; ind = 3; END;
IF (3200 <= hsiccd <= 3569) THEN DO; ind = 3; END;
IF (3580 <= hsiccd <= 3629) THEN DO; ind = 3; END;
IF (3700 <= hsiccd <= 3709) THEN DO; ind = 3; END;
IF (3712 <= hsiccd <= 3713) THEN DO; ind = 3; END;
IF (3715 <= hsiccd <= 3715) THEN DO; ind = 3; END;
IF (3717 <= hsiccd <= 3749) THEN DO; ind = 3; END;
IF (3752 <= hsiccd <= 3791) THEN DO; ind = 3; END;
IF (3793 <= hsiccd <= 3799) THEN DO; ind = 3; END;
IF (3830 <= hsiccd <= 3839) THEN DO; ind = 3; END;
IF (3860 <= hsiccd <= 3899) THEN DO; ind = 3; END;
IF (1200 <= hsiccd <= 1399) THEN DO; ind = 4; END;
IF (2900 <= hsiccd <= 2999) THEN DO; ind = 4; END;
IF (2800 <= hsiccd <= 2829) THEN DO; ind = 5; END;
IF (2840 <= hsiccd <= 2899) THEN DO; ind = 5; END;
IF (3570 <= hsiccd <= 3579) THEN DO; ind = 6; END;
IF (3660 <= hsiccd <= 3692) THEN DO; ind = 6; END;
IF (3694 <= hsiccd <= 3699) THEN DO; ind = 6; END;
IF (3810 <= hsiccd <= 3829) THEN DO; ind = 6; END;
IF (7370 <= hsiccd <= 7379) THEN DO; ind = 6; END;
IF (4800 <= hsiccd <= 4899) THEN DO; ind = 7; END;
IF (4900 <= hsiccd <= 4949) THEN DO; ind = 8; END;
IF (5000 <= hsiccd <= 5999) THEN DO; ind = 9; END;
IF (7200 <= hsiccd <= 7299) THEN DO; ind = 9; END;
IF (7600 <= hsiccd <= 7699) THEN DO; ind = 9; END;
IF (2830 <= hsiccd <= 2839) THEN DO; ind = 10; END;
IF (3693 <= hsiccd <= 3693) THEN DO; ind = 10; END;
IF (3840 <= hsiccd <= 3859) THEN DO; ind = 10; END;
IF (8000 <= hsiccd <= 8099) THEN DO; ind = 10; END;
IF (6000 <= hsiccd <= 6999) THEN DO; ind = 11; END;
RUN;
;/*************************************************************************************
@section: KEEP JUNE DATA
**************************************************************************************/
PROC SQL;
CREATE TABLE crsp_june AS
SELECT DISTINCT a.permno FORMAT=8. AS permno,
a.date FORMAT=DATE9. AS date,
a.mcap FORMAT=BEST16. AS mcap,
a.ind FORMAT=2. AS ind,
(exp(sum(log(1+b.retAdj))) - 1) FORMAT=BEST16. AS cret
FROM crsp_m AS a,
crsp_m AS b
WHERE (a.permno = b.permno) AND
(MONTH(a.date) = 6) AND
(0 <= INTCK('month', a.date, b.date) < 12)
GROUP BY a.permno,
a.date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM crsp_june AS a
ORDER BY permno,
date;
QUIT;
;/*************************************************************************************
@section: COMPUTE INTRA-INDUSTRY SIZE BUCKETS
**************************************************************************************/
PROC SQL;
CREATE TABLE crsp_june AS
SELECT a.*
FROM crsp_june AS a
ORDER BY a.date,
a.ind;
QUIT;
PROC UNIVARIATE DATA = crsp_june NOPRINT;
VAR mcap;
BY date ind;
OUTPUT OUT = breaks
PCTLPTS = 30 70
PCTLPRE = mcap;
RUN;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM breaks AS a
ORDER BY ind,
date;
QUIT;
PROC SQL;
CREATE TABLE crsp_june AS
SELECT a.*,
b.mcap30,
b.mcap70
FROM crsp_june AS a,
breaks AS b
WHERE (a.date = b.date) AND
(a.ind = b.ind);
QUIT;
DATA crsp_june;
SET crsp_june;
size = 1;
IF (mcap >= mcap30) THEN DO; size = 2; END;
IF (mcap >= mcap70) THEN DO; size = 3; END;
RUN;
PROC SQL;
CREATE TABLE crsp_count AS
SELECT a.date,
a.ind,
a.size,
COUNT(a.permno) AS count
FROM crsp_june AS a
GROUP BY a.date,
a.ind,
a.size;
QUIT;
PROC SQL;
CREATE TABLE crsp_june AS
SELECT a.*,
b.count
FROM crsp_june AS a,
crsp_count AS b
WHERE (a.date = b.date) AND
(a.ind = b.ind) AND
(a.size = b.size);
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM crsp_june AS a
ORDER BY permno,
date;
QUIT;
;/*************************************************************************************
@section: COMPUTE JUNE SUMMARY STATISTICS
**************************************************************************************/
PROC SQL;
CREATE TABLE crsp_june_ss AS
SELECT DISTINCT a.date,
a.ind,
a.size,
COUNT(a.permno) FORMAT=BEST16. AS count,
MEAN(a.cret) FORMAT=BEST16. AS cret
FROM crsp_june AS a
GROUP BY a.date,
a.ind,
a.size;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM crsp_june_ss AS a
ORDER BY date,
ind,
size;
QUIT;
PROC EXPORT
DATA = crsp_june_ss
OUTFILE = "june-ff-sum-stats-data.csv"
DBMS = CSV
REPLACE;
RUN;
ENDSAS;
;/*************************************************************************************
@section: PULL DAILY DATA
**************************************************************************************/
%LET dsevars = ticker ncusip shrcd exchcd;
%LET dsfvars = ret retx cfacpr cfacshr;
%INCLUDE '/wrds/crsp/samples/crspmerge.sas';
%CRSPMERGE(S = d,
START = &startDate,
END = &endDate,
SFVARS = &dsfvars,
SEVARS = &dsevars,
FILTERS = (exchcd in (1,2,3)) AND (shrcd in (10,11))
);
PROC SQL;
CREATE TABLE crsp_d AS
SELECT a.*,
WEEK(a.date) FORMAT=2. AS week,
YEAR(a.date) FORMAT=4. AS year,
b.dlret FORMAT=BEST16. AS dlret,
(SUM(1, ret) * SUM(1, dlret) - 1) FORMAT=BEST16. AS retAdj
FROM crsp_d AS a LEFT JOIN
crsp.dsedelist (WHERE = (MISSING(dlret) = 0)) AS b
ON (a.permno = b.permno) AND
(a.date = b.dlstdt)
ORDER BY permco,
date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM crsp_d AS a
ORDER BY permco,
date;
QUIT;
;/*************************************************************************************
@section: COMPUTE EW INDUSTRY x SIZE WEEKLY RETURNS
**************************************************************************************/
PROC SQL;
CREATE TABLE crsp_d AS
SELECT DISTINCT a.*,
b.ind FORMAT=2. AS ind,
b.size FORMAT=1. AS size
FROM crsp_d AS a,
crsp_june AS b
WHERE (a.permno = b.permno) AND
(0 < INTCK('year', b.date, a.date) <= 1)
ORDER BY a.permno,
a.date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM crsp_d AS a
ORDER BY permno,
date;
QUIT;
PROC SQL;
CREATE TABLE ind_size_d AS
SELECT DISTINCT a.date,
a.ind,
a.size,
MEAN(a.retAdj) FORMAT=BEST16. AS ret
FROM crsp_d AS a
GROUP BY a.date,
a.ind,
a.size;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM ind_size_d AS a
ORDER BY ind,
size,
date;
QUIT;
PROC SQL;
CREATE TABLE ind_size_w AS
SELECT DISTINCT a.ind,
a.size,
a.date,
(exp(sum(log(1+b.ret))) - 1) FORMAT=BEST16. AS cret
FROM ind_size_d AS a,
ind_size_d AS b
WHERE (a.ind = b.ind) AND
(a.size = b.size) AND
(WEEKDAY(a.date) = 3) AND
(0 <= INTCK('day', a.date, b.date) < 7)
GROUP BY a.ind,
a.size,
a.date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM ind_size_w AS a
ORDER BY a.ind,
a.size,
a.date;
QUIT;
;/*************************************************************************************
@section: EXPORT RESULTS TO CSV FILES
**************************************************************************************/
PROC EXPORT
DATA = ind_size_w
OUTFILE = "hou-2007-table-1-data.csv"
DBMS = CSV
REPLACE;
RUN;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment