Reformatted ALS Replication Code
###Replication of Allen Larson and Sloan 2013### | |
#IMPORTANT | |
#Run all the libraries and functions up to line 204 in "libraries and functions for ALS replication" | |
##load in data (FUNDA universe 1950-present; CRSP universe 1950-present; and merged link table) | |
FUNDA <- read_csv("~/Documents/Research/Specific Accruals/FUNDA_f.csv") | |
CRSP <- read_csv("~/Documents/Research/Specific Accruals/CRSP_f.csv") | |
LINK <- read_csv("~/Documents/Research/Specific Accruals/linktable.csv") | |
#Convert to data.table format for faster computing | |
CRSP<-data.table(CRSP) | |
FUNDA<-data.table(FUNDA) | |
LINK<-data.table(LINK) | |
#make a copy of data | |
FUNDA_original <- FUNDA | |
CRSP_original <- CRSP | |
LINK_original <- LINK | |
#set this variable sets the number of clusters for the main cluster analysis | |
k<-3 | |
#Make a smaller version of data to test code, if desired | |
#FUNDA<-FUNDA_original[is.element(tic, c("MSFT","AAPL"))] | |
#CRSP<-CRSP_original[is.element(TICKER, c("MSFT","AAPL"))] | |
#remove some unneeded variables | |
#do not use sich because the data is not there before the 80s | |
FUNDA<-FUNDA[, .(gvkey, datadate, fyear, fyr, cusip, at, dlc, txp, che,lct,act,oibdp,sic,fic,popsrc, sale, emp, | |
prcc_f, csho, dltt, pstkl, txditc, xad, epspx, adjex_f)] | |
CRSP<-CRSP[, .(RET, PERMNO, date, CUSIP, EXCHCD)] | |
LINK<- LINK[, .(LPERMNO, LINKTYPE, gvkey, LINKDT, LINKENDDT)] | |
##create annual return var | |
##gives return over this month and past 12 months | |
CRSP[, RET:=as.numeric(RET)] | |
CRSP[,num_mo:=.N,PERMNO] | |
CRSP<-CRSP[num_mo>12] | |
CRSP[, ann_return := rollapplyr((1+RET), 12, prod), by = PERMNO] | |
CRSP[, ann_return := lagged(ann_return, 11), by = PERMNO] | |
##create MOM6 var | |
##this gives return over this month and previous 5] | |
CRSP[,num_mo:=.N,PERMNO] | |
CRSP<-CRSP[num_mo>12] | |
CRSP[, MOM6 := rollapplyr((1+RET), 6, prod), by = PERMNO] | |
CRSP[, MOM6 := lagged(MOM6, 17), by = PERMNO] | |
##create MOM36 var | |
##this gives return over this month and previous 35] | |
CRSP[,num_mo:=.N,PERMNO] | |
CRSP<-CRSP[num_mo>36] | |
CRSP[, MOM36 := rollapplyr((1+RET), 36, prod), by = PERMNO] | |
CRSP[, MOM36 := lagged(MOM36, 47), by = PERMNO] | |
# prepare to merge CRSP and FUNDA by creating a match_date variable to match on (note that | |
# the FUNDA matchdate is 16months ahead of the CRSP matchdate...this is because each CRSP | |
# row has return for past 12 months. For my analysis, I want returns for the 12 months | |
# start 4 months after FUNDA fiscal yearend. Fiscal Year end is defined in the third line, | |
# fyear+fyr). Take fiscal year end, add 16 months to it, and take return over previous | |
# 12 months from that date. | |
CRSP[,data_year := sub("^(\\d{4}).*$", "\\1", date)] | |
CRSP[,match_date := as.Date(as.character(date), "%Y%m%d")] | |
CRSP[,match_date := format(match_date, "%Y %m")] | |
FUNDA[,match_date := as.Date(paste(sprintf("%02d%02d", fyear, fyr), rep("01",length(fyear)), sep=""), "%Y%m%d")] | |
month(FUNDA$match_date)=month(FUNDA$match_date)+16 | |
FUNDA[,match_date := format(match_date, "%Y %m")] | |
# Get Permno into FUNDA by merging with LINK | |
LINK<-LINK[is.element(LINKTYPE, c('LU', 'LC', 'LD', 'LF', 'LN', 'LO', 'LS', 'LX'))] | |
FUNDA <- merge(FUNDA, LINK, by="gvkey", all.x = T, allow.cartesian = T) | |
FUNDA <- FUNDA[(LINKDT <= datadate) | (LINKDT=="B")] | |
FUNDA <- FUNDA[(datadate <= LINKENDDT) | (LINKDT=="E")] | |
FUNDA[, PERMNO:=LPERMNO] | |
FUNDA<-unique(FUNDA, by = c("datadate", "PERMNO")) | |
FUNDA<-FUNDA[, c("LINKDT","LINKENDDT", "LINKTYPE", "LPERMNO"):=NULL] | |
#save a copy | |
FUNDA_01<-FUNDA | |
#merge CRSP and COMPUSTAT data | |
FUNDA <- merge(FUNDA, CRSP,by=c("PERMNO", "match_date"),all.x = T) | |
FUNDA<-unique(FUNDA, by = c("match_date", "PERMNO")) | |
#save a copy | |
FUNDA1<-FUNDA | |
##define ACC variable | |
FUNDA[,avg_at:=(at+lagged(at,1))/2,by=gvkey] | |
FUNDA[dlc == NA, dlc := 0] | |
FUNDA[txp == NA, txp := 0] | |
FUNDA[,ACC:= (delta(act,1)-delta(che,1)-delta(lct,1)+delta(dlc,1)+delta(txp,1)),by=gvkey] | |
FUNDA[,ACC:= ACC/avg_at, by=gvkey] | |
FUNDA[,ann_return:=ann_return-1] | |
#save a copy | |
FUNDA1_0<-FUNDA | |
##define INC and CF variables | |
FUNDA[,INC:=oibdp/avg_at] | |
FUNDA[,CF:=INC-ACC] | |
#Winsorize | |
FUNDA[, ACC:=winsorize(ACC)] | |
FUNDA[, INC:=winsorize(INC)] | |
FUNDA[, CF:=winsorize(CF)] | |
#define leads and lags of INC and CF variables | |
FUNDA[,lead_INC:=lead(INC,1), by=gvkey] | |
FUNDA[,lead_CF:=lead(CF,1), by=gvkey] | |
FUNDA[,lead_ACC:=lead(ACC,1), by=gvkey] | |
#save a copy of data before moving forward | |
FUNDA_retro<-FUNDA | |
#restrict sample to just their sample | |
FUNDA<-FUNDA[!(6000<sic & sic<6999)] | |
FUNDA<-FUNDA[fic=="USA"] | |
FUNDA<-FUNDA[popsrc=="D"] | |
FUNDA<-FUNDA[(1961<fyear & fyear<2009)] | |
FUNDA2<-FUNDA ##save data | |
FUNDA<-FUNDA[!(EXCHCD==0)] | |
FUNDA3<-FUNDA ##save data | |
FUNDA[, change:=length(unique(fyr)), gvkey] #number of unique fyear months for each firm | |
FUNDA<-FUNDA[!(is.na(ACC+CF+ann_return+INC+lead_INC+lead_ACC+lead_CF))] | |
##NOTE: Each section table code outputs latex code to generate that table | |
###TABLE 1### | |
table1<-data.table(FUNDA[,.(ACC,CF,INC,ann_return)]) | |
pearson.correlation.matrix <- cor(FUNDA[,.(ACC, lead_ACC, CF, lead_CF, INC, lead_INC, ann_return)],method="pearson") | |
spearman.correlation.matrix <- cor(FUNDA[,.(ACC, lead_ACC, CF, lead_CF, INC, lead_INC, ann_return)],method="spearman") | |
correlation.matrix<-upper.tri(pearson.correlation.matrix)*pearson.correlation.matrix + lower.tri(spearman.correlation.matrix)*spearman.correlation.matrix | |
stargazer(table1, summary.stat = c("mean", "sd", "p25", "median", "p75"), title="Univariate Statistics") | |
stargazer(correlation.matrix, title="Pair-wise correlations--Pearson (above) and Spearman (below) diagonal") | |
###TABLE 2### | |
#make quantiles of ACC CF INC and leads using user defined function (in libraries and functions file) | |
FUNDA[, quint_INC:=rank_data(INC,5), fyear] | |
FUNDA[, quint_leadINC:=rank_data(lead_INC,5), fyear] | |
FUNDA[, quint_CF:=rank_data(CF,5), fyear] | |
FUNDA[, quint_leadCF:=rank_data(lead_CF,5), fyear] | |
FUNDA[, quint_ACC:=rank_data(ACC,5), fyear] | |
FUNDA[, quint_leadACC:=rank_data(lead_ACC,5), fyear] | |
##make a function that creates each panel of table 2 | |
##input variables quintXXX and quintXXXlead for x and y | |
##input variables XXX and XXXlead for xx and yy | |
##input title of table and notes as ttt and nnn | |
tab2<-function(x, y, ttt,nnn) { | |
#make a basic two way table | |
table2a<-table(x, y) | |
table2a<-round(100*table2a/sum(table2a),2) | |
#make a list containing the two tway table above redone for every year | |
A<-list() | |
for (i in min(FUNDA$fyear):max(FUNDA$fyear)) { | |
name <- paste('year:',i,sep='') | |
A[[name]]<-as.matrix(table(x[FUNDA$fyear==i],y[FUNDA$fyear==i])) | |
} | |
A<-lapply(A, function(z) 100*z/sum(z)) | |
#find element wise sd of that list | |
sd_thing<-round(apply(array(unlist(A), c(5, 5, length(A))), c(1,2), sd),2) | |
##make a table of 1 (if significantly different from 4 at 1% level t test) and 0 otherwise | |
stars<-(abs((table2a-4)/(sd_thing/sqrt(length(A))))>2.7)+0 | |
stars<-replace(as.vector(stars), stars==1, "*") | |
stars<-replace(as.vector(stars), stars==0, "") | |
stars<-matrix(stars, nrow=5, ncol=5) | |
bold<-(table2a>4)+0 | |
boldpre<-replace(as.vector(bold), bold==1, "||") | |
boldpre<-replace(as.vector(boldpre), bold==0, "") | |
boldpre<-matrix(boldpre, nrow=5, ncol=5) | |
boldpost<-replace(as.vector(bold), bold==1, "||") | |
boldpost<-replace(as.vector(boldpost), bold==0, "") | |
boldpost<-matrix(boldpost, nrow=5, ncol=5) | |
##add percentage marks and stars to the two way table, then make latex code | |
table2af<-matrix(paste(boldpre, table2a, "%", boldpost, stars,sep=""), nrow=5, ncol=5) | |
stargazer(table2af, title=ttt, colnames=T,rownames=T) | |
} | |
#panel a | |
tab2(FUNDA$quint_INC,FUNDA$quint_leadINC, "Distribution of firm-years across consecutive annual income quintiles","") | |
#panel b | |
tab2(FUNDA$quint_CF,FUNDA$quint_leadCF, "Distribution of firm-years across consecutive annual cash flow quintiles") | |
#panel c | |
tab2(FUNDA$quint_ACC,FUNDA$quint_leadACC, "Distribution of firm-years across consecutive annual accrual quintiles") | |
###TABLE 3### | |
##PANEL A## | |
#define more variables, and save a copy of the data | |
FUNDA[,lagged_CF:=lagged(CF,1), by=gvkey] | |
FUNDA[,sales_growth:=(sale-lagged(sale,1))/lagged(sale,1), by=gvkey] | |
FUNDA[sales_growth==Inf, sales_growth:=NA] | |
FUNDA[,sales_growth:=wins99(sales_growth), by=fyear] | |
FUNDA[,emp_growth:=(emp-lagged(emp,1))/lagged(emp,1), by=gvkey] | |
FUNDA[emp_growth==Inf, emp_growth:=NA] | |
FUNDA[,emp_growth:=wins99(emp_growth), by=fyear] | |
FUNDAt3<-FUNDA | |
#define two digit SIC...note that we have to remove sic groups that have | |
#too few observations to run the model to avoid errors in the lapply stage | |
FUNDA[,sic_group := sub("^(\\d{2}).*$", "\\1", sic)] | |
FUNDA[,sic_size:=.N,sic_group] | |
#make list of large enough sic groups, then run the model on each group. | |
#the output is a LIST of models, stored as tab3_lms | |
vec_of_sic<-unique(FUNDA[sic_size>30]$sic_group) | |
tab3_lms <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
sales_growth+emp_growth+ | |
lagged_CF+CF+lead_CF, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
##produce a LIST of reduced models | |
tab3_lms_mddgrowth <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
sales_growth+emp_growth, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
##produce a LIST of reduced models | |
tab3_lms_mddmatch <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
lagged_CF+CF+lead_CF, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
#create partial R^2 for each model | |
partial_R_growth<-lapply(1:length(tab3_lms), | |
function(x) | |
rsq.partial(tab3_lms[[x]], tab3_lms_mddgrowth[[x]],type = "sse")$partial.rsq) | |
partial_R_match<-lapply(1:length(tab3_lms), | |
function(x) | |
rsq.partial(tab3_lms[[x]], tab3_lms_mddmatch[[x]],type = "sse")$partial.rsq) | |
#pull coefficients, partial Rsquares, Rsquare, and TStats from each model | |
tab3_summaries<-lapply(1:length(tab3_lms), function(x) | |
append( | |
append( | |
append( | |
append(tab3_lms[[x]]$coefficients, | |
partial_R_growth[[x]]), | |
partial_R_match[[x]]), | |
summary(tab3_lms[[x]])$r.square), | |
summary(tab3_lms[[x]])$coefficients[,3] | |
)) | |
##find mean and percentiles,accross industries, of the summary stats produced above | |
mean_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), mean),3) | |
q25_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), function(x) quantile(x,.25)),3) | |
q50_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), function(x) quantile(x,.50)),3) | |
q75_of_lm<-round(apply(array(unlist(tab3_summaries), c(15, 1, length(tab3_summaries))), c(1,2), function(x) quantile(x,.75)),3) | |
##bind the results into a table. Note that the first 9 elements of the above vectors relate to | |
##coefficients and R^2,and the last 6 elements of each vector relate to TStats | |
tab3<-cbind(mean_of_lm[1:9], append(mean_of_lm[10:15], c("", "","")), q25_of_lm[1:9], q50_of_lm[1:9],q75_of_lm[1:9]) | |
rownames(tab3) <- c("Intercept", "Sales Growth","Employee Growth", "Lagged Cash Flow", "Cash Flow", "Lead Cash Flow", "Partial R^2 Growth", "Partial R^2 Match", "R^2") | |
rownames(tab3, do.NULL = F) | |
colnames(tab3)<- c("Mean", "T-Statistic", "25th Percentile", "Median","75th Percentile") | |
##make the latex code for the table3 panel A | |
stargazer(tab3, title="Distribution of coefficients for the modified Dechow and Dichev (MDD) Model") | |
##PANEL B## | |
#Calculate fitted values for MDD model | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGOOD:=tab3_lms[[x]]$coefficients[1]+ | |
sales_growth*tab3_lms[[x]]$coefficients[2]+ | |
emp_growth*tab3_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab3_lms[[x]]$coefficients[4]+ | |
CF*tab3_lms[[x]]$coefficients[5]+ | |
lead_CF*tab3_lms[[x]]$coefficients[6] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGROWTH:= | |
sales_growth*tab3_lms[[x]]$coefficients[2]+ | |
emp_growth*tab3_lms[[x]]$coefficients[3] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDMATCH:= | |
lagged_CF*tab3_lms[[x]]$coefficients[4]+ | |
CF*tab3_lms[[x]]$coefficients[5]+ | |
lead_CF*tab3_lms[[x]]$coefficients[6] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDERROR:=ACC-(tab3_lms[[x]]$coefficients[1]+ | |
sales_growth*tab3_lms[[x]]$coefficients[2]+ | |
emp_growth*tab3_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab3_lms[[x]]$coefficients[4]+ | |
CF*tab3_lms[[x]]$coefficients[5]+ | |
lead_CF*tab3_lms[[x]]$coefficients[6]) | |
]) | |
#Create lagged values for MDD fitted values | |
FUNDA[,lagged_MDDGOOD:=lagged(MDDGOOD,1), by=gvkey] | |
FUNDA[,lagged_MDDGROWTH:=lagged(MDDGROWTH,1), by=gvkey] | |
FUNDA[,lagged_MDDMATCH:=lagged(MDDMATCH,1), by=gvkey] | |
FUNDA[,lagged_MDDERROR:=lagged(MDDERROR,1), by=gvkey] | |
FUNDA[,lead_MDDGOOD:=lead(MDDGOOD,1), by=gvkey] | |
FUNDA[,lead_MDDGROWTH:=lead(MDDGROWTH,1), by=gvkey] | |
FUNDA[,lead_MDDMATCH:=lead(MDDMATCH,1), by=gvkey] | |
FUNDA[,lead_MDDERROR:=lead(MDDERROR,1), by=gvkey] | |
#compute correlations and pvalues | |
CPvalues<-corr.test(FUNDA[,.(MDDGOOD,MDDGROWTH,MDDMATCH,MDDERROR,lagged_MDDGOOD,lagged_MDDGROWTH,lagged_MDDMATCH), | |
lagged_MDDERROR]) | |
#organize into table, then round, then add parenthesis for pvalues, then add row labels | |
tab3b<-rbind(CPvalues$r[2,6], CPvalues$p[2,6], CPvalues$r[3,7], CPvalues$p[3,7], | |
CPvalues$r[4,8], CPvalues$p[4,8], CPvalues$r[5,1], CPvalues$p[5,1]) | |
tab3b<-round(tab3b, 3) | |
auto_correlation<-paste(rep(c("","("),4), tab3b, rep(c("",")"),4),sep="") | |
tab3b<-as.data.frame(auto_correlation,nrow=16) | |
rownames(tab3b, do.NULL = F) | |
rownames(tab3b) <- c("MDDGOOD", "", "MDDGROWTH"," " ,"MDDMATCH", " ","MDDERROR"," ") | |
#make latex code for table 3b | |
stargazer(as.matrix(tab3b), title="Autocorrelations of good accruals and accrual estimation error") | |
###TABLE 4### | |
##Panel A## | |
#Define variables | |
FUNDA[,lagged_ACC:=lagged(ACC,1), by=gvkey] | |
##function inputs a number of clusters, and outputs a table 4 panel A | |
make.tab4<-function(x) { | |
m1 <- flexmix(ACC ~ lagged_ACC, data = FUNDA[(!is.na(ACC))&(!is.na(lagged_ACC))], k = x) | |
tab4<-cbind( t(as.matrix(parameters(m1))), | |
table(clusters(m1))/sum(table(clusters(m1))), append(AIC(m1),rep(NA,x-1))) | |
colnames(tab4, do.NULL = F) | |
colnames(tab4) <- c("alpha", "beta","" ,"cluster size", "AIC") | |
return(round(tab4[,c(1,2,4,5)],3)) | |
} | |
#bind together outputs from above to make a pretty table, then add row names, then make latex code for the table | |
tab4<-rbind(rep("",4), make.tab4(1), rep("",4), make.tab4(2), rep("",4), make.tab4(3)) | |
row.names(tab4)<-c("One Cluster","Cluster 1","Two Clusters","Cluster 1","Cluster 2","Three Clusters","Cluster 1", "Cluster 2", "Cluster 3") | |
stargazer(tab4, title="Latent class mixture models foraccrual autoregressions: cluster analysis") | |
##Panel B## | |
#Make a version of FUNDA that includes mixture classes for use in tables 4 panel b | |
FUNDAt4<-FUNDA[(!is.na(ACC))&(!is.na(lagged_ACC))] | |
m2<-flexmix(ACC ~ lagged_ACC, data = FUNDAt4, k = 2) | |
m3<-flexmix(ACC ~ lagged_ACC, data = FUNDAt4, k = 3) | |
m4<-flexmix(ACC ~ lagged_ACC, data = FUNDAt4, k = 4) | |
FUNDAt4[,m2cluster:=clusters(m2)] | |
FUNDAt4[,m3cluster:=clusters(m3)] | |
FUNDAt4[,m4cluster:=clusters(m4)] | |
##this function inputs a number, k, the number of clusters...and outputs a list of k linear models: | |
##the main model evaluated within each cluster (have to hand set k=3, done at beggining of this file) | |
make.tab4b<- function(k) { return( | |
lapply(1:k, | |
function(x) | |
lm1<-lm(ACC~sales_growth+emp_growth+lagged_CF+CF+lead_CF, | |
FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x], na.action=na.omit)) | |
) } | |
##this function inputs a number, k, the number of clusters...and outputs a list of k linear models: | |
##the reduced growth model evaluated within each cluster (have to hand set k=3, done at beggining of this file) | |
make.tab4bGROWTH<- function(k) { return( | |
lapply(1:k, | |
function(x) | |
lm1<-lm(ACC~sales_growth+emp_growth, | |
FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x], na.action=na.omit)) | |
) } | |
##this function inputs a number, k, the number of clusters...and outputs a list of k linear models: | |
##the reduced match model evaluated within each cluster (have to hand set k=3, done at beggining of this file) | |
make.tab4bMATCH<- function(k) { return( | |
lapply(1:k, | |
function(x) | |
lm1<-lm(ACC~lagged_CF+CF+lead_CF, | |
FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x], na.action=na.omit)) | |
) } | |
##THIS MAIN FUNCTION inputs the number of clusters, and calls the previous function to make models, then organizes the results of that | |
##model into table 4b. This if the function you should use to replicate table4b if you come back to this project. | |
make.tab4b.final <- function(num_cluster) { | |
tab4_lms <- | |
make.tab4b(num_cluster) #calls a previous function to creates linear models | |
tab4_lms_mddgrowth<-make.tab4bGROWTH(num_cluster) #calls a previous function to creates reduced version linear models | |
tab4_lms_mddmatch<-make.tab4bMATCH(num_cluster) #calls a previous function to creates reduced version linear models | |
partial_R_growth<-lapply(1:length(tab4_lms), #calculates partial Rsq for each linear model | |
function(x) | |
rsq.partial(tab4_lms[[x]], tab4_lms_mddgrowth[[x]],type = "sse")$partial.rsq) | |
partial_R_match<-lapply(1:length(tab4_lms), #calculates partial Rsq for each linear model | |
function(x) | |
rsq.partial(tab4_lms[[x]], tab4_lms_mddmatch[[x]],type = "sse")$partial.rsq) | |
tab4b_summaries <- | |
lapply(1:length(tab4_lms), function(x) | |
#pull summary stats from each linear model in the above list, so we can put this info into a table | |
append( | |
append( | |
append( | |
append(tab4_lms[[x]]$coefficients, | |
partial_R_growth[[x]]), | |
partial_R_match[[x]] | |
), | |
summary(tab4_lms[[x]])$r.square | |
), | |
coeftest.cluster(data.frame(FUNDAt4[eval(parse(text=paste("m",k,"cluster", sep="")))==x]), tab4_lms[[x]], cluster1="gvkey",cluster2 = "fyear")[,3] | |
)) | |
table4bmain <- t(matrix(unlist(tab4b_summaries), c(15, length(tab4b_summaries)))) ##organize summary into matrix | |
table4bmain<-round(table4bmain,3) | |
table4b<-table4bmain[,1:9] #keep just coefficient and r2 info from matrix | |
colnames(table4b, do.NULL=FALSE) ##add columns and make coefficient/r2 matrix pretty ** | |
colnames(table4b) <- | |
c("intcpt", | |
"SGR", | |
"EMPGR", | |
"LagCF", | |
"CF", | |
"LeadCF", | |
"R^2 Growth", | |
"R^2 Match", | |
"R^2") | |
rownames(table4b) <- | |
as.vector(unlist(lapply(1:num_cluster, function(x) | |
paste("Cluster", x)))) | |
table4bt<-table4bmain[,10:15] #keep just t-stat info from main matrix | |
table4bt<-matrix(paste("(",table4bt,")",sep=""),ncol=6) | |
colnames(table4bt) <- | |
c("intcpt", | |
"SGR", | |
"EMPGR", | |
"LagCF", | |
"CF", | |
"LeadCF") | |
rownames(table4bt) <- | |
as.vector(unlist(lapply(1:num_cluster, function(x) | |
paste("Cluster", x)))) | |
table4b<-cbind(Cluster=row.names(table4b), table4b) #organize into table | |
table4bt<-cbind(Cluster=row.names(table4bt), table4bt) | |
table4b<-rbindlist(list(data.table(table4b),data.table(table4bt)), fill=T)[order(Cluster)] | |
table4b[is.na(table4b)]<-"" | |
table4b[2*(1:num_cluster)]$Cluster<-"" | |
table4b<-as.matrix(table4b) | |
return(table4b) | |
} | |
stargazer(make.tab4b.final(3), summary= FALSE,title="Regressions of accruals on growth and cash flow variables by accrual cluster",column.sep.width="2pt" ) | |
####TABLE 5#### | |
#step one, create a version of MDDMATCH/GOOD/ACCRUALS/ERROR without leadCF in the model | |
tab5_lms <- lapply(1:length(vec_of_sic), | |
function(x) | |
lm(ACC~ | |
sales_growth+emp_growth+ | |
lagged_CF+CF, | |
FUNDA[sic_group==vec_of_sic[x]], na.action=na.omit)) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGOOD5:=tab5_lms[[x]]$coefficients[1]+ | |
sales_growth*tab5_lms[[x]]$coefficients[2]+ | |
emp_growth*tab5_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab5_lms[[x]]$coefficients[4]+ | |
CF*tab5_lms[[x]]$coefficients[5] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDGROWTH5:= | |
sales_growth*tab5_lms[[x]]$coefficients[2]+ | |
emp_growth*tab5_lms[[x]]$coefficients[3] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDMATCH5:= | |
lagged_CF*tab5_lms[[x]]$coefficients[4]+ | |
CF*tab5_lms[[x]]$coefficients[5] | |
]) | |
silent<-lapply(1:length(vec_of_sic), | |
function(x) | |
FUNDA[sic_group==vec_of_sic[x], | |
MDDERROR5:=ACC-(tab5_lms[[x]]$coefficients[1]+ | |
sales_growth*tab5_lms[[x]]$coefficients[2]+ | |
emp_growth*tab5_lms[[x]]$coefficients[3]+ | |
lagged_CF*tab5_lms[[x]]$coefficients[4]+ | |
CF*tab5_lms[[x]]$coefficients[5]) | |
]) | |
#step two, create winsorized variables if they havent been used already MMDMATCH5/MDDGROWTH5 etc | |
#are the versions of MDD components calculated without leadCF | |
FUNDA[,MDDMATCH:=winsorize(MDDMATCH)] | |
FUNDA[,MDDGROWTH:=winsorize(MDDGROWTH)] | |
FUNDA[,MDDGOOD:=winsorize(MDDGOOD)] | |
FUNDA[,MDDERROR:=winsorize(MDDERROR)] | |
FUNDA[,MDDMATCH5:=winsorize(MDDMATCH5)] | |
FUNDA[,MDDGROWTH5:=winsorize(MDDGROWTH5)] | |
FUNDA[,MDDGOOD5:=winsorize(MDDGOOD5)] | |
FUNDA[,MDDERROR5:=winsorize(MDDERROR5)] | |
#step three, define models (one version to get r square, one version to get GOW t-values) | |
tab5lm1r<-lm(lead_INC~CF+ACC, FUNDA) | |
tab5lm1<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+ACC, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm2r<-lm(lead_INC~CF+MDDGOOD+MDDERROR, FUNDA) | |
tab5lm2<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGOOD+MDDERROR, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm3r<-lm(lead_INC~CF+MDDGROWTH+MDDMATCH+MDDERROR, FUNDA) | |
tab5lm3<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGROWTH+MDDMATCH+MDDERROR, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm4r<-lm(lead_INC~CF+MDDGOOD5+MDDERROR5, FUNDA) | |
tab5lm4<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGOOD5+MDDERROR5, FUNDA), cluster1="gvkey",cluster2="fyear") | |
tab5lm5r<-lm(lead_INC~CF+MDDGROWTH5+MDDMATCH5+MDDERROR5, FUNDA) | |
tab5lm5<-coeftest.cluster(data.frame(FUNDA), lm(lead_INC~CF+MDDGROWTH5+MDDMATCH5+MDDERROR5, FUNDA), cluster1="gvkey",cluster2="fyear") | |
#step four, put it all into a table | |
tab5<-smartbind(tab5lm1[,1], round(tab5lm1[,3],2), tab5lm2[,1], round(tab5lm2[,3],2), tab5lm3[,1], round(tab5lm3[,3],2),tab5lm4[,1], round(tab5lm4[,3],2),tab5lm5[,1], round(tab5lm5[,3],2)) | |
tab5[7:10,4:7]<-tab5[7:10,8:11] | |
tab5<-tab5[,1:7] | |
adj_r_sq<-c(summary(tab5lm1r)$adj.r.squared,NA,summary(tab5lm2r)$adj.r.squared, NA, summary(tab5lm3r)$adj.r.squared, NA, summary(tab5lm4r)$adj.r.squared, NA, summary(tab5lm5r)$adj.r.squared, NA) | |
tab5<-cbind(tab5,adj_r_sq) | |
tab5<-round(tab5,3) | |
parenth<-rbind(rep(FALSE,8), !is.na(tab5[2,]),rep(FALSE,8), !is.na(tab5[4,]),rep(FALSE,8), !is.na(tab5[6,]), rep(FALSE,8), !is.na(tab5[8,]), rep(FALSE,8), !is.na(tab5[10,])) | |
parenthpre<-replace(as.vector(parenth), parenth, "(") | |
parenthpre<-matrix(replace(as.vector(parenthpre), !parenth, ""),nrow=10) | |
parenthpost<-replace(as.vector(parenth), parenth, ")") | |
parenthpost<-matrix(replace(as.vector(parenthpost), !parenth, ""),nrow=10) | |
tab5[is.na(tab5)]<-"" | |
tab5<-matrix(paste(parenthpre,as.matrix(tab5),parenthpost,sep=""),nrow=10) | |
tab5<-data.table(tab5) | |
tab5<-tab5[,c(1,2,3,4,6,7,5,8)] | |
tab5<-rbind(c("Panel A"), tab5[1:2], c("Panel B"), tab5[3:6], c("Panel C"), tab5[7:10],fill=T) | |
tab5[is.na(tab5)]<-"" | |
colnames(tab5)<-c("","intcpt","CF","ACC","good","growth","match","error","Adj R Squared") | |
stargazer(as.matrix(tab5), summary=FALSE,title="Pooled cross-sectional regressions of next year's income on cash flows, accruals and accrual components from the MDD Model.",column.sep.width="2pt" ) | |
#####TABLE 6##### | |
#step one, make some new variables | |
FUNDA[,size := log(prcc_f*csho)] | |
FUNDA[,mk2bk := at/(prcc_f*csho+dlc+dltt+pstkl+txditc)] | |
#step two, define models (one version to get r square, one version to get GOW t-values) | |
tab6lm1r<-lm(ann_return~CF+ACC+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm1<-coeftest.cluster(data.frame(FUNDA), tab6lm1r, cluster1="gvkey",cluster2="fyear") | |
tab6lm2r<-lm(ann_return~CF+MDDGOOD+MDDERROR+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm2<-coeftest.cluster(data.frame(FUNDA), tab6lm2r, cluster1="gvkey",cluster2="fyear") | |
tab6lm3r<-lm(ann_return~CF+MDDGROWTH+MDDMATCH+MDDERROR+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm3<-coeftest.cluster(data.frame(FUNDA), tab6lm3r, cluster1="gvkey",cluster2="fyear") | |
tab6lm4r<-lm(ann_return~CF+MDDGOOD5+MDDERROR5+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm4<-coeftest.cluster(data.frame(FUNDA), tab6lm4r, cluster1="gvkey",cluster2="fyear") | |
tab6lm5r<-lm(ann_return~CF+MDDGROWTH5+MDDMATCH5+MDDERROR5+size+mk2bk+MOM6+MOM36, FUNDA) | |
tab6lm5<-coeftest.cluster(data.frame(FUNDA), tab6lm5r, cluster1="gvkey",cluster2="fyear") | |
#step four, put it all into a table | |
tab6<-smartbind(tab6lm1[,1], round(tab6lm1[,3],2), tab6lm2[,1], round(tab6lm2[,3],2), tab6lm3[,1], round(tab6lm3[,3],2),tab6lm4[,1], round(tab6lm4[,3],2),tab6lm5[,1], round(tab6lm5[,3],2)) | |
tab6[7:10,8:11]<-tab6[7:10,12:15] | |
tab6<-tab6[,1:11] | |
adj_r_sq<-c(summary(tab6lm1r)$adj.r.squared,NA,summary(tab6lm2r)$adj.r.squared, NA, summary(tab6lm3r)$adj.r.squared, NA, summary(tab6lm4r)$adj.r.squared, NA, summary(tab6lm5r)$adj.r.squared, NA) | |
tab6<-cbind(tab6,adj_r_sq) | |
tab6<-round(tab6,3) | |
parenth<-rbind(rep(FALSE,8), !is.na(tab6[2,]),rep(FALSE,8), !is.na(tab6[4,]),rep(FALSE,8), !is.na(tab6[6,]), | |
rep(FALSE,8), !is.na(tab6[8,]), rep(FALSE,8), !is.na(tab6[10,])) | |
parenthpre<-replace(as.vector(parenth), parenth, "(") | |
parenthpre<-matrix(replace(as.vector(parenthpre), !parenth, ""),nrow=10) | |
parenthpost<-replace(as.vector(parenth), parenth, ")") | |
parenthpost<-matrix(replace(as.vector(parenthpost), !parenth, ""),nrow=10) | |
tab6[is.na(tab6)]<-"" | |
tab6<-matrix(paste(parenthpre,as.matrix(tab6),parenthpost,sep=""),nrow=10) | |
tab6<-data.table(tab6) | |
tab6<-tab6[,c(1,2,3,8,10,11,9,5,4,6,7,12)] | |
tab6<-rbind(c("Panel A"), tab6[1:2], c("Panel B"), tab6[3:6], c("Panel C"), tab6[7:10],fill=TRUE) | |
tab6[is.na(tab6)]<-"" | |
colnames(tab6)<-c("","intcpt","CF","ACC","good","growth","match","error","BM","size","M6","M36","Adj R Squared") | |
stargazer(as.matrix(tab6), summary=FALSE,title="Pooled cross-sectional regressions of next year's stock returns on cash flows, accruals and accrual components from the MDD Model.",column.sep.width="2pt" ) | |
#merge cluster data in with rest of FUNDA, then save a version--Only necessary for extension purposes | |
FUNDA<-merge(FUNDA, FUNDAt4[,.(m3cluster, match_date, gvkey)], by=c("gvkey","match_date"),all.x=TRUE) | |
write.csv(FUNDA, file = "/Users/skylardeture/documents/Research/Specific Accruals/end_of_replication_data_8_26_2017.csv") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment