Created
September 4, 2017 22:58
-
-
Save sdeture/0346e40fae0ca4308a3cf600a893250e to your computer and use it in GitHub Desktop.
Reformatted ALS Replication Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
###Replication of Allen Larson and Sloan 2013### | |
#IMPORTANT | |
#Run all the libraries and functions up to line 204 in "libraries and functions for ALS replication" | |
##load in data (FUNDA universe 1950-present; CRSP universe 1950-present; and merged link table) | |
FUNDA <- read_csv("~/Documents/Research/Specific Accruals/FUNDA_f.csv") | |
CRSP <- read_csv("~/Documents/Research/Specific Accruals/CRSP_f.csv") | |
LINK <- read_csv("~/Documents/Research/Specific Accruals/linktable.csv") | |
#Convert to data.table format for faster computing | |
CRSP<-data.table(CRSP) | |
FUNDA<-data.table(FUNDA) | |
LINK<-data.table(LINK) | |
#make a copy of data | |
FUNDA_original <- FUNDA | |
CRSP_original <- CRSP | |
LINK_original <- LINK | |
#set this variable sets the number of clusters for the main cluster analysis | |
k<-3 | |
#Make a smaller version of data to test code, if desired | |
#FUNDA<-FUNDA_original[is.element(tic, c("MSFT","AAPL"))] | |
#CRSP<-CRSP_original[is.element(TICKER, c("MSFT","AAPL"))] | |
#remove some unneeded variables | |
#do not use sich because the data is not there before the 80s | |
FUNDA<-FUNDA[, .(gvkey, datadate, fyear, fyr, cusip, at, dlc, txp, che,lct,act,oibdp,sic,fic,popsrc, sale, emp, | |
prcc_f, csho, dltt, pstkl, txditc, xad, epspx, adjex_f)] | |
CRSP<-CRSP[, .(RET, PERMNO, date, CUSIP, EXCHCD)] | |
LINK<- LINK[, .(LPERMNO, LINKTYPE, gvkey, LINKDT, LINKENDDT)] | |
##create annual return var | |
##gives return over this month and past 12 months | |
CRSP[, RET:=as.numeric(RET)] | |
CRSP[,num_mo:=.N,PERMNO] | |
CRSP<-CRSP[num_mo>12] | |
CRSP[, ann_return := rollapplyr((1+RET), 12, prod), by = PERMNO] | |
CRSP[, ann_return := lagged(ann_return, 11), by = PERMNO] | |
##create MOM6 var | |
##this gives return over this month and previous 5] | |
CRSP[,num_mo:=.N,PERMNO] | |
CRSP<-CRSP[num_mo>12] | |
CRSP[, MOM6 := rollapplyr((1+RET), 6, prod), by = PERMNO] | |
CRSP[, MOM6 := lagged(MOM6, 17), by = PERMNO] | |
##create MOM36 var | |
##this gives return over this month and previous 35] | |
CRSP[,num_mo:=.N,PERMNO] | |
CRSP<-CRSP[num_mo>36] | |
CRSP[, MOM36 := rollapplyr((1+RET), 36, prod), by = PERMNO] | |
CRSP[, MOM36 := lagged(MOM36, 47), by = PERMNO] | |
# prepare to merge CRSP and FUNDA by creating a match_date variable to match on (note that | |
# the FUNDA matchdate is 16months ahead of the CRSP matchdate...this is because each CRSP | |
# row has return for past 12 months. For my analysis, I want returns for the 12 months | |
# start 4 months after FUNDA fiscal yearend. Fiscal Year end is defined in the third line, | |
# fyear+fyr). Take fiscal year end, add 16 months to it, and take return over previous | |
# 12 months from that date. | |
CRSP[,data_year := sub("^(\\d{4}).*$", "\\1", date)] | |
CRSP[,match_date := as.Date(as.character(date), "%Y%m%d")] | |
CRSP[,match_date := format(match_date, "%Y %m")] | |
FUNDA[,match_date := as.Date(paste(sprintf("%02d%02d", fyear, fyr), rep("01",length(fyear)), sep=""), "%Y%m%d")] | |
month(FUNDA$match_date)=month(FUNDA$match_date)+16 | |
FUNDA[,match_date := format(match_date, "%Y %m")] | |
# Get Permno into FUNDA by merging with LINK | |
LINK<-LINK[is.element(LINKTYPE, c('LU', 'LC', 'LD', 'LF', 'LN', 'LO', 'LS', 'LX'))] | |
FUNDA <- merge(FUNDA, LINK, by="gvkey", all.x = T, allow.cartesian = T) | |
FUNDA <- FUNDA[(LINKDT <= datadate) | (LINKDT=="B")] | |
FUNDA <- FUNDA[(datadate <= LINKENDDT) | (LINKDT=="E")] | |
FUNDA[, PERMNO:=LPERMNO] | |
FUNDA<-unique(FUNDA, by = c("datadate", "PERMNO")) | |
FUNDA<-FUNDA[, c("LINKDT","LINKENDDT", "LINKTYPE", "LPERMNO"):=NULL] | |
#save a copy | |
FUNDA_01<-FUNDA | |
#merge CRSP and COMPUSTAT data | |
FUNDA <- merge(FUNDA, CRSP,by=c("PERMNO", "match_date"),all.x = T) | |
FUNDA<-unique(FUNDA, by = c("match_date", "PERMNO")) | |
#save a copy | |
FUNDA1<-FUNDA | |
##define ACC variable | |
FUNDA[,avg_at:=(at+lagged(at,1))/2,by=gvkey] | |
FUNDA[dlc == NA, dlc := 0] | |
FUNDA[txp == NA, txp := 0] | |
FUNDA[,ACC:= (delta(act,1)-delta(che,1)-delta(lct,1)+delta(dlc,1)+delta(txp,1)),by=gvkey] | |
FUNDA[,ACC:= ACC/avg_at, by=gvkey] | |
FUNDA[,ann_return:=ann_return-1] | |
#save a copy | |
FUNDA1_0<-FUNDA | |
##define INC and CF variables | |
FUNDA[,INC:=oibdp/avg_at] | |
FUNDA[,CF:=INC-ACC] | |
#Winsorize | |
FUNDA[, ACC:=winsorize(ACC)] | |
FUNDA[, INC:=winsorize(INC)] | |
FUNDA[, CF:=winsorize(CF)] | |
#define leads and lags of INC and CF variables | |
FUNDA[,lead_INC:=lead(INC,1), by=gvkey] | |
FUNDA[,lead_CF:=lead(CF,1), by=gvkey] | |
FUNDA[,lead_ACC:=lead(ACC,1), by=gvkey] | |
#save a copy of data before moving forward | |
FUNDA_retro<-FUNDA | |
#restrict sample to just their sample | |
FUNDA<-FUNDA[!(6000<sic & sic<6999)] | |
FUNDA<-FUNDA[fic=="USA"] | |
FUNDA<-FUNDA[popsrc=="D"] | |
FUNDA<-FUNDA[(1961<fyear & fyear<2009)] | |
FUNDA2<-FUNDA ##save data | |
FUNDA<-FUNDA[!(EXCHCD==0)] | |
FUNDA3<-FUNDA ##save data | |
FUNDA[, change:=length(unique(fyr)), gvkey] #number of unique fyear months for each firm | |
FUNDA<-FUNDA[!(is.na(ACC+CF+ann_return+INC+lead_INC+lead_ACC+lead_CF))] | |
##NOTE: Each section table code outputs latex code to generate that table | |
###TABLE 1### | |
table1<-data.table(FUNDA[,.(ACC,CF,INC,ann_return)]) | |
pearson.correlation.matrix <- cor(FUNDA[,.(ACC, lead_ACC, CF, lead_CF, INC, lead_INC, ann_return)],method="pearson") | |
spearman.correlation.matrix <- cor(FUNDA[,.(ACC, lead_ACC, CF, lead_CF, INC, lead_INC, ann_return)],method="spearman") | |
correlation.matrix<-upper.tri(pearson.correlation.matrix)*pearson.correlation.matrix + lower.tri(spearman.correlation.matrix)*spearman.correlation.matrix | |
stargazer(table1, summary.stat = c("mean", "sd", "p25", "median", "p75"), title="Univariate Statistics") | |
stargazer(correlation.matrix, title="Pair-wise correlations--Pearson (above) and Spearman (below) diagonal") | |
###TABLE 2### | |
#make quantiles of ACC CF INC and leads using user defined function (in libraries and functions file) | |
FUNDA[, quint_INC:=rank_data(INC,5), fyear] | |
FUNDA[, quint_leadINC:=rank_data(lead_INC,5), fyear] | |
FUNDA[, quint_CF:=rank_data(CF,5), fyear] | |
FUNDA[, quint_leadCF:=rank_data(lead_CF,5), fyear] | |
FUNDA[, quint_ACC:=rank_data(ACC,5), fyear] | |
FUNDA[, quint_leadACC:=rank_data(lead_ACC,5), fyear] | |
##make a function that creates each panel of table 2 | |
##input variables quintXXX and quintXXXlead for x and y | |
##input variables XXX and XXXlead for xx and yy | |
##input title of table and notes as ttt and nnn | |
tab2<-function(x, y, ttt,nnn) { | |
#make a basic two way table | |
table2a<-table(x, y) | |
table2a<-round(100*table2a/sum(table2a),2) | |
#make a list containing the two tway table above redone for every year | |
A<-list() | |
for (i in min(FUNDA$fyear):max(FUNDA$fyear)) { | |
name <- paste('year:',i,sep='') | |
A[[name]]<-as.matrix(table(x[FUNDA$fyear==i],y[FUNDA$fyear==i])) | |
} | |
A<-lapply(A, function(z) 100*z/sum(z)) | |
#find element wise sd of that list | |
sd_thing<-round(apply(array(unlist(A), c(5, 5, length(A))), c(1,2), sd),2) | |
##make a table of 1 (if significantly different from 4 at 1% level t test) and 0 otherwise | |
stars<-(abs((table2a-4)/(sd_thing/sqrt(length(A))))>2.7)+0 | |
stars<-replace(as.vector(stars), stars==1, "*") | |
stars<-replace(as.vector(stars), stars==0, "") | |
stars<-matrix(stars, nrow=5, ncol=5) | |
bold<-(table2a>4)+0 | |
boldpre<-replace(as.vector(bold), bold==1, "||") | |
boldpre<-replace(as.vector(boldpre), bold==0, "") | |
boldpre<-matrix(boldpre, nrow=5, ncol=5) | |
boldpost<-replace(as.vector(bold), bold==1, "||") | |
boldpost<-replace(as.vector(boldpost), bold==0, "") | |
boldpost<-matrix(boldpost, nrow=5, ncol=5) | |
##add percentage marks and stars to the two way table, then make latex code | |
table2af<-matrix(paste(boldpre, table2a, "%", boldpost, stars,sep=""), nrow=5, ncol=5) | |
stargazer(table2af, title=ttt, colnames=T,rownames=T) | |
} | |
#panel a | |
tab2(FUNDA$quint_INC,FUNDA$quint_leadINC, "Distribution of firm-years across consecutive annual income quintiles","") | |
#panel b | |
tab2(FUNDA$quint_CF,FUNDA$quint_leadCF, "Distribution of firm-years across consecutive annual cash flow quintiles") | |
#panel c | |
tab2(FUNDA$quint_ACC,FUNDA$quint_leadACC, "Distribution of firm-years across consecutive annual accrual quintiles") | |
###TABLE 3### | |
##PANEL A## | |
#define more variables, and save a copy of the data | |
FUNDA[,lagged_CF:=lagged(CF,1), by=gvkey] | |
FUNDA[,sales_growth:=(sale-lagged(sale,1))/lagged(sale,1), by=gvkey] | |
FUNDA[sales_growth==Inf, sales_growth:=NA] | |
FUNDA[,sales_growth:=wins99(sales_growth), by=fyear] | |
FUNDA[,emp_growth:=(emp-lagged(emp,1))/lagged(emp,1), by=gvkey] | |
FUNDA[emp_growth==Inf, emp_growth:=NA] | |
FUNDA[,emp_growth:=wins99(emp_growth), by=fyear] | |
FUNDAt3<-FUNDA | |
#define two digit SIC...note that we have to remove sic groups that have | |
#too few observations to run the model to avoid errors in the lapply stage | |
FUNDA[,sic_group := sub("^(\\d{2}).*$", "\\1", sic)] | |
FUNDA[,sic_size:=.N,sic_group] | |
#make list of large enough sic groups, then run the model on each group. | |
#the output is a LIST of models, stored as tab3_lms | |
vec_of_sic<-unique(FUNDA[sic_size>30]$sic_group) | |
tab3_lms <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
sales_growth+emp_growth+ | |
lagged_CF+CF+lead_CF, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
##produce a LIST of reduced models | |
tab3_lms_mddgrowth <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
sales_growth+emp_growth, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
##produce a LIST of reduced models | |
tab3_lms_mddmatch <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
lagged_CF+CF+lead_CF, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
#create partial R^2 for each model | |
partial_R_growth<-lapply(1:length(tab3_lms), | |
function(x) | |
rsq.partial(tab3_lms[[x]], tab3_lms_mddgrowth[[x]],type = "sse")$partial.rsq) | |
partial_R_match<-lapply(1:length(tab3_lms), | |
function(x) | |
rsq.partial(tab3_lms[[x]], tab3_lms_mddmatch[[x]],type = "sse")$partial.rsq) | |
#pull coefficients, partial Rsquares, Rsquare, and TStats from each model | |
tab3_summaries<-lapply(1:length(tab3_lms), function(x) | |
append( | |
append( | |
append( | |
append(tab3_lms[[x]]$coefficients, | |
partial_R_growth[[x]]), | |
partial_R_match[[x]]), | |
summary(tab3_lms[[x]])$r.square), | |
summary(tab3_lms[[x]])$coefficients[,3] | |
)) | |
##find mean and percentiles,accross industries, of the summary stats produced above | |
mean_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), mean),3) | |
q25_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), function(x) quantile(x,.25)),3) | |
q50_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), function(x) quantile(x,.50)),3) | |
q75_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), function(x) quantile(x,.75)),3) | |
##bind the results into a table. Note that the first 9 elements of the above vectors relate to | |
##coefficients and R^2,and the last 6 elements of each vector relate to TStats | |
tab3<-cbind(mean_of_lm[1:9], append(mean_of_lm[10:15], c("", "","")), q25_of_lm[1:9], q50_of_lm[1:9],q75_of_lm[1:9]) | |
rownames(tab3) <- c("Intercept", "Sales Growth","Employee Growth", "Lagged Cash Flow", "Cash Flow", "Lead Cash Flow", "Partial R^2 Growth", "Partial R^2 Match", "R^2") | |
rownames(tab3, do.NULL = F) | |
colnames(tab3)<- c("Mean", "T-Statistic", "25th Percentile", "Median","75th Percentile") | |
##make the latex code for the table3 panel A | |
stargazer(tab3, title="Distribution of coefficients for the modified Dechow and Dichev (MDD) Model") | |
##PANEL B## | |
#Calculate fitted values for MDD model | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGOOD:=tab3_lms[[x]]$coefficients[1]+ | |
sales_growth*tab3_lms[[x]]$coefficients[2]+ | |
emp_growth*tab3_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab3_lms[[x]]$coefficients[4]+ | |
CF*tab3_lms[[x]]$coefficients[5]+ | |
lead_CF*tab3_lms[[x]]$coefficients[6] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGROWTH:= | |
sales_growth*tab3_lms[[x]]$coefficients[2]+ | |
emp_growth*tab3_lms[[x]]$coefficients[3] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDMATCH:= | |
lagged_CF*tab3_lms[[x]]$coefficients[4]+ | |
CF*tab3_lms[[x]]$coefficients[5]+ | |
lead_CF*tab3_lms[[x]]$coefficients[6] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDERROR:=ACC-(tab3_lms[[x]]$coefficients[1]+ | |
sales_growth*tab3_lms[[x]]$coefficients[2]+ | |
emp_growth*tab3_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab3_lms[[x]]$coefficients[4]+ | |
CF*tab3_lms[[x]]$coefficients[5]+ | |
lead_CF*tab3_lms[[x]]$coefficients[6]) | |
]) | |
#Create lagged values for MDD fitted values | |
FUNDA[,lagged_MDDGOOD:=lagged(MDDGOOD,1), by=gvkey] | |
FUNDA[,lagged_MDDGROWTH:=lagged(MDDGROWTH,1), by=gvkey] | |
FUNDA[,lagged_MDDMATCH:=lagged(MDDMATCH,1), by=gvkey] | |
FUNDA[,lagged_MDDERROR:=lagged(MDDERROR,1), by=gvkey] | |
FUNDA[,lead_MDDGOOD:=lead(MDDGOOD,1), by=gvkey] | |
FUNDA[,lead_MDDGROWTH:=lead(MDDGROWTH,1), by=gvkey] | |
FUNDA[,lead_MDDMATCH:=lead(MDDMATCH,1), by=gvkey] | |
FUNDA[,lead_MDDERROR:=lead(MDDERROR,1), by=gvkey] | |
#compute correlations and pvalues | |
CPvalues<-corr.test(FUNDA[,.(MDDGOOD,MDDGROWTH,MDDMATCH,MDDERROR,lagged_MDDGOOD,lagged_MDDGROWTH,lagged_MDDMATCH), | |
lagged_MDDERROR]) | |
#organize into table, then round, then add parenthesis for pvalues, then add row labels | |
tab3b<-rbind(CPvalues$r[2,6], CPvalues$p[2,6], CPvalues$r[3,7], CPvalues$p[3,7], | |
CPvalues$r[4,8], CPvalues$p[4,8], CPvalues$r[5,1], CPvalues$p[5,1]) | |
tab3b<-round(tab3b, 3) | |
auto_correlation<-paste(rep(c("","("),4), tab3b, rep(c("",")"),4),sep="") | |
tab3b<-as.data.frame(auto_correlation,nrow=16) | |
rownames(tab3b, do.NULL = F) | |
rownames(tab3b) <- c("MDDGOOD", "", "MDDGROWTH"," " ,"MDDMATCH", " ","MDDERROR"," ") | |
#make latex code for table 3b | |
stargazer(as.matrix(tab3b), title="Autocorrelations of good accruals and accrual estimation error") | |
###TABLE 4### | |
##Panel A## | |
#Define variables | |
FUNDA[,lagged_ACC:=lagged(ACC,1), by=gvkey] | |
##function inputs a number of clusters, and outputs a table 4 panel A | |
make.tab4<-function(x) { | |
m1 <- flexmix(ACC ~ lagged_ACC, data = FUNDA[(!is.na(ACC))&(!is.na(lagged_ACC))], k = x) | |
tab4<-cbind( t(as.matrix(parameters(m1))), | |
table(clusters(m1))/sum(table(clusters(m1))), append(AIC(m1),rep(NA,x-1))) | |
colnames(tab4, do.NULL = F) | |
colnames(tab4) <- c("alpha", "beta","" ,"cluster size", "AIC") | |
return(round(tab4[,c(1,2,4,5)],3)) | |
} | |
#bind together outputs from above to make a pretty table, then add row names, then make latex code for the table | |
tab4<-rbind(rep("",4), make.tab4(1), rep("",4), make.tab4(2), rep("",4), make.tab4(3)) | |
row.names(tab4)<-c("One Cluster","Cluster 1","Two Clusters","Cluster 1","Cluster 2","Three Clusters","Cluster 1", "Cluster 2", "Cluster 3") | |
stargazer(tab4, title="Latent class mixture models foraccrual autoregressions: cluster analysis") | |
##Panel B## | |
#Make a version of FUNDA that includes mixture classes for use in tables 4 panel b | |
FUNDAt4<-FUNDA[(!is.na(ACC))&(!is.na(lagged_ACC))] | |
m2<-flexmix(ACC ~ lagged_ACC, data = FUNDAt4, k = 2) | |
m3<-flexmix(ACC ~ lagged_ACC, data = FUNDAt4, k = 3) | |
m4<-flexmix(ACC ~ lagged_ACC, data = FUNDAt4, k = 4) | |
FUNDAt4[,m2cluster:=clusters(m2)] | |
FUNDAt4[,m3cluster:=clusters(m3)] | |
FUNDAt4[,m4cluster:=clusters(m4)] | |
##this function inputs a number, k, the number of clusters...and outputs a list of k linear models: | |
##the main model evaluated within each cluster (have to hand set k=3, done at beggining of this file) | |
make.tab4b<- function(k) { return( | |
lapply(1:k, | |
function(x) | |
lm1<-lm(ACC~sales_growth+emp_growth+lagged_CF+CF+lead_CF, | |
FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x], na.action=na.omit)) | |
) } | |
##this function inputs a number, k, the number of clusters...and outputs a list of k linear models: | |
##the reduced growth model evaluated within each cluster (have to hand set k=3, done at beggining of this file) | |
make.tab4bGROWTH<- function(k) { return( | |
lapply(1:k, | |
function(x) | |
lm1<-lm(ACC~sales_growth+emp_growth, | |
FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x], na.action=na.omit)) | |
) } | |
##this function inputs a number, k, the number of clusters...and outputs a list of k linear models: | |
##the reduced match model evaluated within each cluster (have to hand set k=3, done at beggining of this file) | |
make.tab4bMATCH<- function(k) { return( | |
lapply(1:k, | |
function(x) | |
lm1<-lm(ACC~lagged_CF+CF+lead_CF, | |
FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x], na.action=na.omit)) | |
) } | |
##THIS MAIN FUNCTION inputs the number of clusters, and calls the previous function to make models, then organizes the results of that | |
##model into table 4b. This if the function you should use to replicate table4b if you come back to this project. | |
make.tab4b.final <- function(num_cluster) { | |
tab4_lms <- | |
make.tab4b(num_cluster) #calls a previous function to creates linear models | |
tab4_lms_mddgrowth<-make.tab4bGROWTH(num_cluster) #calls a previous function to creates reduced version linear models | |
tab4_lms_mddmatch<-make.tab4bMATCH(num_cluster) #calls a previous function to creates reduced version linear models | |
partial_R_growth<-lapply(1:length(tab4_lms), #calculates partial Rsq for each linear model | |
function(x) | |
rsq.partial(tab4_lms[[x]], tab4_lms_mddgrowth[[x]],type = "sse")$partial.rsq) | |
partial_R_match<-lapply(1:length(tab4_lms), #calculates partial Rsq for each linear model | |
function(x) | |
rsq.partial(tab4_lms[[x]], tab4_lms_mddmatch[[x]],type = "sse")$partial.rsq) | |
tab4b_summaries <- | |
lapply(1:length(tab4_lms), function(x) | |
#pull summary stats from each linear model in the above list, so we can put this info into a table | |
append( | |
append( | |
append( | |
append(tab4_lms[[x]]$coefficients, | |
partial_R_growth[[x]]), | |
partial_R_match[[x]] | |
), | |
summary(tab4_lms[[x]])$r.square | |
), | |
coeftest.cluster(data.frame(FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x]), tab4_lms[[x]], cluster1="gvkey",cluster2 = "fyear")[,3] | |
)) | |
table4bmain <- t(matrix(unlist(tab4b_summaries), c(15, length(tab4b_summaries)))) ##organize summary into matrix | |
table4bmain<-round(table4bmain,3) | |
table4b<-table4bmain[,1:9] #keep just coefficient and r2 info from matrix | |
colnames(table4b, do.NULL=FALSE) ##add columns and make coefficient/r2 matrix pretty ** | |
colnames(table4b) <- | |
c("intcpt", | |
"SGR", | |
"EMPGR", | |
"LagCF", | |
"CF", | |
"LeadCF", | |
"R^2 Growth", | |
"R^2 Match", | |
"R^2") | |
rownames(table4b) <- | |
as.vector(unlist(lapply(1:num_cluster, function(x) | |
paste("Cluster", x)))) | |
table4bt<-table4bmain[,10:15] #keep just t-stat info from main matrix | |
table4bt<-matrix(paste("(",table4bt,")",sep=""),ncol=6) | |
colnames(table4bt) <- | |
c("intcpt", | |
"SGR", | |
"EMPGR", | |
"LagCF", | |
"CF", | |
"LeadCF") | |
rownames(table4bt) <- | |
as.vector(unlist(lapply(1:num_cluster, function(x) | |
paste("Cluster", x)))) | |
table4b<-cbind(Cluster=row.names(table4b), table4b) #organize into table | |
table4bt<-cbind(Cluster=row.names(table4bt), table4bt) | |
table4b<-rbindlist(list(data.table(table4b),data.table(table4bt)), fill=T)[order(Cluster)] | |
table4b[is.na(table4b)]<-"" | |
table4b[2*(1:num_cluster)]$Cluster<-"" | |
table4b<-as.matrix(table4b) | |
return(table4b) | |
} | |
stargazer(make.tab4b.final(3), summary= FALSE,title="Regressions of accruals on growth and cash flow variables by accrual cluster",column.sep.width="2pt" ) | |
####TABLE 5#### | |
#step one, create a version of MDDMATCH/GOOD/ACCRUALS/ERROR without leadCF in the model | |
tab5_lms <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
sales_growth+emp_growth+ | |
lagged_CF+CF, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGOOD5:=tab5_lms[[x]]$coefficients[1]+ | |
sales_growth*tab5_lms[[x]]$coefficients[2]+ | |
emp_growth*tab5_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab5_lms[[x]]$coefficients[4]+ | |
CF*tab5_lms[[x]]$coefficients[5] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGROWTH5:= | |
sales_growth*tab5_lms[[x]]$coefficients[2]+ | |
emp_growth*tab5_lms[[x]]$coefficients[3] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDMATCH5:= | |
lagged_CF*tab5_lms[[x]]$coefficients[4]+ | |
CF*tab5_lms[[x]]$coefficients[5] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDERROR5:=ACC-(tab5_lms[[x]]$coefficients[1]+ | |
sales_growth*tab5_lms[[x]]$coefficients[2]+ | |
emp_growth*tab5_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab5_lms[[x]]$coefficients[4]+ | |
CF*tab5_lms[[x]]$coefficients[5]) | |
]) | |
#step two, create winsorized variables if they havent been used already MMDMATCH5/MDDGROWTH5 etc | |
#are the versions of MDD components calculated without leadCF | |
FUNDA[,MDDMATCH:=winsorize(MDDMATCH)] | |
FUNDA[,MDDGROWTH:=winsorize(MDDGROWTH)] | |
FUNDA[,MDDGOOD:=winsorize(MDDGOOD)] | |
FUNDA[,MDDERROR:=winsorize(MDDERROR)] | |
FUNDA[,MDDMATCH5:=winsorize(MDDMATCH5)] | |
FUNDA[,MDDGROWTH5:=winsorize(MDDGROWTH5)] | |
FUNDA[,MDDGOOD5:=winsorize(MDDGOOD5)] | |
FUNDA[,MDDERROR5:=winsorize(MDDERROR5)] | |
#step three, define models (one version to get r square, one version to get GOW t-values) | |
tab5lm1r<-lm(lead_INC~CF+ACC, FUNDA) | |
tab5lm1<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+ACC, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm2r<-lm(lead_INC~CF+MDDGOOD+MDDERROR, FUNDA) | |
tab5lm2<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGOOD+MDDERROR, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm3r<-lm(lead_INC~CF+MDDGROWTH+MDDMATCH+MDDERROR, FUNDA) | |
tab5lm3<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGROWTH+MDDMATCH+MDDERROR, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm4r<-lm(lead_INC~CF+MDDGOOD5+MDDERROR5, FUNDA) | |
tab5lm4<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGOOD5+MDDERROR5, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm5r<-lm(lead_INC~CF+MDDGROWTH5+MDDMATCH5+MDDERROR5, FUNDA) | |
tab5lm5<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGROWTH5+MDDMATCH5+MDDERROR5, FUNDA), cluster1="gvkey",cluster2="fyear") | |
#step four, put it all into a table | |
tab5<-smartbind(tab5lm1[,1], round(tab5lm1[,3],2), tab5lm2[,1], round(tab5lm2[,3],2), tab5lm3[,1], round(tab5lm3[,3],2),tab5lm4[,1], round(tab5lm4[,3],2),tab5lm5[,1], round(tab5lm5[,3],2)) | |
tab5[7:10,4:7]<-tab5[7:10,8:11] | |
tab5<-tab5[,1:7] | |
adj_r_sq<-c(summary(tab5lm1r)$adj.r.squared,NA,summary(tab5lm2r)$adj.r.squared, NA, summary(tab5lm3r)$adj.r.squared, NA, summary(tab5lm4r)$adj.r.squared, NA, summary(tab5lm5r)$adj.r.squared, NA) | |
tab5<-cbind(tab5,adj_r_sq) | |
tab5<-round(tab5,3) | |
parenth<-rbind(rep(FALSE,8), !is.na(tab5[2,]),rep(FALSE,8), !is.na(tab5[4,]),rep(FALSE,8), !is.na(tab5[6,]), rep(FALSE,8), !is.na(tab5[8,]), rep(FALSE,8), !is.na(tab5[10,])) | |
parenthpre<-replace(as.vector(parenth), parenth, "(") | |
parenthpre<-matrix(replace(as.vector(parenthpre), !parenth, ""),nrow=10) | |
parenthpost<-replace(as.vector(parenth), parenth, ")") | |
parenthpost<-matrix(replace(as.vector(parenthpost), !parenth, ""),nrow=10) | |
tab5[is.na(tab5)]<-"" | |
tab5<-matrix(paste(parenthpre,as.matrix(tab5),parenthpost,sep=""),nrow=10) | |
tab5<-data.table(tab5) | |
tab5<-tab5[,c(1,2,3,4,6,7,5,8)] | |
tab5<-rbind(c("Panel A"), tab5[1:2], c("Panel B"), tab5[3:6], c("Panel C"), tab5[7:10],fill=T) | |
tab5[is.na(tab5)]<-"" | |
colnames(tab5)<-c("","intcpt","CF","ACC","good","growth","match","error","Adj R Squared") | |
stargazer(as.matrix(tab5), summary=FALSE,title="Pooled cross-sectional regressions of next year's income on cash flows, accruals and accrual components from the MDD Model.",column.sep.width="2pt" ) | |
#####TABLE 6##### | |
#step one, make some new variables | |
FUNDA[,size := log(prcc_f*csho)] | |
FUNDA[,mk2bk := at/(prcc_f*csho+dlc+dltt+pstkl+txditc)] | |
#step two, define models (one version to get r square, one version to get GOW t-values) | |
tab6lm1r<-lm(ann_return~CF+ACC+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm1<-coeftest.cluster(data.frame(FUNDA), tab6lm1r, cluster1="gvkey",cluster2="fyear") | |
tab6lm2r<-lm(ann_return~CF+MDDGOOD+MDDERROR+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm2<-coeftest.cluster(data.frame(FUNDA), tab6lm2r, cluster1="gvkey",cluster2="fyear") | |
tab6lm3r<-lm(ann_return~CF+MDDGROWTH+MDDMATCH+MDDERROR+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm3<-coeftest.cluster(data.frame(FUNDA), tab6lm3r, cluster1="gvkey",cluster2="fyear") | |
tab6lm4r<-lm(ann_return~CF+MDDGOOD5+MDDERROR5+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm4<-coeftest.cluster(data.frame(FUNDA), tab6lm4r, cluster1="gvkey",cluster2="fyear") | |
tab6lm5r<-lm(ann_return~CF+MDDGROWTH5+MDDMATCH5+MDDERROR5+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm5<-coeftest.cluster(data.frame(FUNDA), tab6lm5r, cluster1="gvkey",cluster2="fyear") | |
#step four, put it all into a table | |
tab6<-smartbind(tab6lm1[,1], round(tab6lm1[,3],2), tab6lm2[,1], round(tab6lm2[,3],2), tab6lm3[,1], round(tab6lm3[,3],2),tab6lm4[,1], round(tab6lm4[,3],2),tab6lm5[,1], round(tab6lm5[,3],2)) | |
tab6[7:10,8:11]<-tab6[7:10,12:15] | |
tab6<-tab6[,1:11] | |
adj_r_sq<-c(summary(tab6lm1r)$adj.r.squared,NA,summary(tab6lm2r)$adj.r.squared, NA, summary(tab6lm3r)$adj.r.squared, NA, summary(tab6lm4r)$adj.r.squared, NA, summary(tab6lm5r)$adj.r.squared, NA) | |
tab6<-cbind(tab6,adj_r_sq) | |
tab6<-round(tab6,3) | |
parenth<-rbind(rep(FALSE,8), !is.na(tab6[2,]),rep(FALSE,8), !is.na(tab6[4,]),rep(FALSE,8), !is.na(tab6[6,]), | |
rep(FALSE,8), !is.na(tab6[8,]), rep(FALSE,8), !is.na(tab6[10,])) | |
parenthpre<-replace(as.vector(parenth), parenth, "(") | |
parenthpre<-matrix(replace(as.vector(parenthpre), !parenth, ""),nrow=10) | |
parenthpost<-replace(as.vector(parenth), parenth, ")") | |
parenthpost<-matrix(replace(as.vector(parenthpost), !parenth, ""),nrow=10) | |
tab6[is.na(tab6)]<-"" | |
tab6<-matrix(paste(parenthpre,as.matrix(tab6),parenthpost,sep=""),nrow=10) | |
tab6<-data.table(tab6) | |
tab6<-tab6[,c(1,2,3,8,10,11,9,5,4,6,7,12)] | |
tab6<-rbind(c("Panel A"), tab6[1:2], c("Panel B"), tab6[3:6], c("Panel C"), tab6[7:10],fill=TRUE) | |
tab6[is.na(tab6)]<-"" | |
colnames(tab6)<-c("","intcpt","CF","ACC","good","growth","match","error","BM","size","M6","M36","Adj R Squared") | |
stargazer(as.matrix(tab6), summary=FALSE,title="Pooled cross-sectional regressions of next year's stock returns on cash flows, accruals and accrual components from the MDD Model.",column.sep.width="2pt" ) | |
#merge cluster data in with rest of FUNDA, then save a version--Only necessary for extension purposes | |
FUNDA<-merge(FUNDA, FUNDAt4[,.(m3cluster, match_date, gvkey)], by=c("gvkey","match_date"),all.x=TRUE) | |
write.csv(FUNDA, file = "/Users/skylardeture/documents/Research/Specific Accruals/end_of_replication_data_8_26_2017.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment