Created
September 4, 2017 23:02
-
-
Save sdeture/078de22811055563bb030810953d32c7 to your computer and use it in GitHub Desktop.
Reformatted Spectral Analysis Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##Spectral Analysis## | |
##Things you need: | |
#(1)packages and libraries | |
#(2)FUNDA having already been through the replication analysis in"R code for Specific Accruals" | |
#(3)FUNDQ, fresh from WRDS | |
#load in data..the data saved at the end of the replication code, and the FUNDQ unrestated universe | |
library(readr) | |
end_of_replication_data <- read_csv("~/Documents/Research/Specific Accruals/end_of_replication_data_8_26_2017.csv") | |
FUNDQ <- read_csv("~/Documents/Research/Specific Accruals/Data Sets/FUNDQ1.csv") | |
#convert to data.table format | |
FUNDQ<-data.table(FUNDQ) | |
FUNDA<-data.table(end_of_replication_data) | |
#make a copy of data | |
FUNDQ_original <- FUNDQ | |
EoRD_original <- FUNDA | |
#call FUNDQ FUNDA so you can make FUNDA variables on a quarterly basis | |
FUNDA_annual<-FUNDA | |
FUNDA<-FUNDQ | |
#Some cleanup | |
FUNDA<-FUNDA[,.(gvkey,fyearq,datadate,saleq,invtq,cogsq,rectq,revtq,oibdpq,xrdq,atq,ppentq, | |
dlcq,txpq,fqtr,oancfy, ibq, apq)] | |
FUNDA<-unique(FUNDA, by = c("datadate", "gvkey")) | |
FUNDA[,match_CUSIP := sub("^(\\d{8}).*$", "\\1", gvkey)] | |
FUNDA[,avg_atq:=(atq+lagged(atq,1))/2,by=gvkey] | |
FUNDA[dlcq == NA, dlcq := 0] | |
FUNDA[txpq == NA, txpq := 0] | |
FUNDA[fqtr >1, oancfq := oancfy - lagged(oancfy,1), gvkey] | |
FUNDA[fqtr == 1, oancfq := oancfy] | |
#define Accruals variable. One option is to follow ALS, which is the option I commented out. | |
#the other option is to take all non-cash net income, which is what I choose to do because | |
#I want depreciation included as an accrual. | |
#FUNDA[,ACCq:= (delta(actq,1)-delta(cheq,1)-delta(lctq,1)+delta(dlcq,1)+delta(txpq,1)),by=match_CUSIP] | |
FUNDA[, ACCq:= ibq-oancfq] | |
FUNDA[, ACCq:= ACCq/avg_atq, by=gvkey] | |
#remove observations that don't have accruals defined | |
FUNDA<-FUNDA[!(is.na(ACCq))] | |
FUNDA<-unique(FUNDA, by = c("datadate", "gvkey")) | |
##make list, each element of the list is a vector of spectra strength corresponding to a unique gvkey | |
FUNDA[, num_accq_obs := sum(!is.na(ACCq)), gvkey] | |
kern <- kernel("daniell", c(1, 1, 1)) | |
list_of_spectra <- | |
lapply(unique(FUNDA[num_accq_obs > 20]$gvkey), function(x) | |
spec.pgram(FUNDA[gvkey == x]$ACCq, plot = F)$spec) | |
##creates indices of the max spectra, the implied period length of the max spectra, | |
##and the max spectra; for each gvkey | |
max_spectra_index <- lapply(list_of_spectra, function(x) | |
which.max(x)) | |
num_of_spectra <- lapply(list_of_spectra, function(x) | |
length(x)) | |
max_frequency <- lapply(list_of_spectra, function(x) (.5 / length(x) * which.max(x))) | |
max_spectra_period_length <- | |
lapply(list_of_spectra, function(x) | |
1 / (.5 / length(x) * which.max(x))) | |
max_spectra <- lapply(list_of_spectra, function(x) | |
max(x)) | |
relative_importance_max_spectra <- lapply(list_of_spectra, function(x) | |
max(x)/mean(x)) | |
##creates indices of the 2nd_max spectra, the implied period length of the 2nd_max spectra, | |
## and the 2nd_max spectra; for each gvkey | |
n<-2 | |
max2_spectra_index <- lapply(list_of_spectra, function(x) | |
which(x == sort(x)[length(x) - (n - 1)])) | |
max2_frequency <- lapply(list_of_spectra, function(x) | |
(.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)]))) | |
max2_spectra_period_length <- | |
lapply(list_of_spectra, function(x) | |
1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)]))) | |
max2_spectra <- lapply(list_of_spectra, function(x) | |
sort(x)[length(x) - (n - 1)]) | |
relative_importance_max2_spectra <- lapply(list_of_spectra, function(x) | |
sort(x)[length(x) - (n - 1)]/mean(x)) | |
##creates indices of the 3rd_max spectra, the implied period length of the 2nd_max spectra, | |
##and the 3rd_max spectra; for each gvkey | |
n<-3 | |
max3_spectra_index <- lapply(list_of_spectra, function(x) | |
which(x == sort(x)[length(x) - (n - 1)])) | |
max3_frequency <- lapply(list_of_spectra, function(x) | |
(.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)]))) | |
max3_spectra_period_length <- | |
lapply(list_of_spectra, function(x) | |
1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)]))) | |
max3_spectra <- lapply(list_of_spectra, function(x) | |
sort(x)[length(x) - (n - 1)]) | |
relative_importance_max3_spectra <- lapply(list_of_spectra, function(x) | |
sort(x)[length(x) - (n - 1)]/mean(x)) | |
##make data.table of gvkey and maximal frequency | |
max_frequency_lookup <- | |
data.table(cbind( | |
gvkey = unique(FUNDA[num_accq_obs > 20]$gvkey), | |
max_freq = unlist(as.numeric(max_frequency)), | |
max2_freq = unlist(as.numeric(max2_frequency)), | |
max3_freq = unlist(as.numeric(max3_frequency)) | |
)) | |
#Save a copy for future use | |
write.csv(max_frequency_lookup, file = "/Users/skylardeture/documents/Research/Specific Accruals/max_frequency_lookup") | |
##merge the maximal frequency into FUNDA (which is really quarterly at this point) | |
FUNDA <- merge(FUNDA, max_frequency_lookup,by=c("gvkey"),all.x = T) | |
FUNDA[,max_freq:=as.numeric(max_freq)] | |
FUNDA[,max2_freq:=as.numeric(max2_freq)] | |
FUNDA[,max3_freq:=as.numeric(max3_freq)] | |
FUNDA[,dom_freq_small:=min(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)] | |
FUNDA[,dom_freq_med:=median(c(max_freq,max2_freq,max3_freq)), by=.(gvkey,datadate)] | |
FUNDA[,dom_freq_big:=max(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)] | |
FUNDA_save<-FUNDA | |
##so now we have a dataset of quarterly observations, and it includes the frequency variables | |
##which are constant for each firm | |
##lets compute the cash conversion cycle## | |
FUNDA[,DIO:=((invtq+lagged(invtq,1))/2)/(cogsq/365)/365,gvkey] | |
FUNDA[,DSO:=((rectq+lagged(rectq,1))/2)/(revtq/365)/365,gvkey] | |
FUNDA[,DPO:=((apq+lagged(apq,1))/2)/(cogsq/365)/365,gvkey] | |
FUNDA[is.element(DIO, c(Inf, -Inf)), DIO:= NA] | |
FUNDA[is.element(DSO, c(Inf, -Inf)), DSO:= NA] | |
FUNDA[is.element(DPO, c(Inf, -Inf)), DPO:= NA] | |
FUNDA[,CCC:=DIO+DSO-DPO] | |
FUNDA[,avg_ISP_cycle:=mean(DIO+DSO+DPO,na.rm = T),gvkey] | |
FUNDA[,avg_DIO:=mean(DIO, na.rm=T),gvkey] | |
FUNDA[,avg_DSO:=mean(DSO, na.rm=T),gvkey] | |
FUNDA[,avg_DPO:=mean(DPO, na.rm=T),gvkey] | |
FUNDA[,avg_CCC:=mean(CCC, na.rm=T),gvkey] | |
##switch the names back to FUNDQ for quarterly and FUNDA for annual, and merge the two. | |
FUNDQ<-FUNDA | |
FUNDA<-FUNDA_annual | |
FUNDQ[,fyear:=fyearq] | |
#Save a copy | |
FUNDA_save2<-FUNDA | |
FUNDQ_save2<-FUNDQ | |
#Merge | |
FUNDA<-merge(FUNDQ,FUNDA, | |
by = c("gvkey","fyear"), all.x = T) | |
##remove firms that aren't exhange traded, to match ALS | |
FUNDA<-FUNDA[!(EXCHCD==0)] | |
##make the firm attributes used in schipper lafond francis ollson 2004 that require quarterly data | |
##for variables using time series data on a rolling window, I use 20 quarters, instead of 10 years | |
FUNDA[,log_assets:=log(at)] | |
FUNDA<-FUNDA[, num_saleq_obs := sum(!is.na(saleq)), gvkey][num_saleq_obs > 20] | |
FUNDA[, SALES_var:=rollapplyr(saleq, 20, sd, fill=c(NA,NA,NA))/atq, gvkey] | |
FUNDA[, op_cycle:= log(((invtq+lagged(invtq,1))/2)/(cogsq/91)+((rectq+lagged(rectq,1))/2)/(revtq/91))] | |
proportion_neg<- function(x) {return(sum(x<0)/length(x))} | |
FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20] | |
FUNDA[, neg_earn:=rollapplyr(oibdpq,20, proportion_neg,fill=c(NA,NA,NA)), gvkey] | |
FUNDA[xrdq == NA, xrdq := 0] | |
FUNDA[xad == NA, xad := 0] ##ad expense not available quarterly | |
FUNDA[,intangible_intensity:=(xrdq+xad/4)/saleq] | |
FUNDA[,absence_intangibles:=0] | |
FUNDA[intangible_intensity == 0, absence_intangibles:=1] | |
FUNDA[, capital_intensity:= ppentq/atq] | |
FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20] | |
FUNDA[, CFO_var:=rollapplyr(oancfq,20,sd,fill=c(NA,NA,NA))/atq, gvkey] | |
FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20] | |
FUNDA<-FUNDA[, num_atq_obs := sum(!is.na(atq)), gvkey][num_atq_obs > 20] | |
FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20] | |
FUNDA[, ni_at:=oibdpq/atq] | |
FUNDA[, cf_at:=oancfq/atq] | |
FUNDA[, sd_ni_at:=rollapplyr(ni_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey] | |
FUNDA[, sd_cf_at:=rollapplyr(cf_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey] | |
FUNDA[,smoothness:=sd_ni_at/sd_cf_at] | |
#Make the FLOS variables that require annual data | |
SLFO<-data.table(FUNDA_annual) | |
SLFO[,eps_adj:=epspx/adjex_f] | |
SLFO[,lag_eps_adj:=lagged(epspx/adjex_f,1),gvkey] | |
SLFO<-SLFO[is.finite(lag_eps_adj),count:=.N,gvkey][count>10] | |
#define coefficients for persistance and predicitbility rolling regression | |
SLFO[, c("p0","p1") := rolling_reg(as.matrix(cbind(lag_eps_adj)),as.matrix(cbind(eps_adj))), gvkey] | |
SLFO[, persistance:=p1] | |
SLFO[, predictibility:=rollapplyr((eps_adj-p0-p1*lag_eps_adj),10,sd,fill=c(NA,NA,NA)), gvkey] | |
#define coefficients for accruals quality | |
SLFO[, c("a0","a1","a2","a3","a4","a5") := | |
rolling_reg(as.matrix(cbind(sales_growth,emp_growth,lagged_CF,lead_CF,CF)), | |
as.matrix(cbind(ACC))), gvkey] | |
SLFO[, acc_quality:=rollapplyr((ACC-a0-a1*sales_growth-a2*emp_growth-a3*lagged_CF-a4*lead_CF-a5*CF | |
),10,sd,fill=c(NA,NA,NA)), gvkey] | |
SLFO[,vr_return:=lagged(MOM6*(1+ann_return),1), gvkey] ##this is 18 months, ending 4 months after end of fiscal year | |
SLFO[,vr_ni:=oibdp/(prcc_f*csho)] | |
SLFO[,vr_delta_ni:=vr_ni-lagged(vr_ni,1), gvkey] | |
SLFO[, c("vR") := | |
rolling_regR(as.matrix(cbind(vr_ni, vr_delta_ni)), | |
as.matrix(cbind(vr_return))), gvkey] | |
SLFO[,relevance:= -vR] | |
#Merge quarterly set with annual set, so that you have all variables in one place | |
FUNDA<-merge(FUNDA, SLFO[,.(gvkey,fyear,acc_quality,predictibility,persistance,sale,emp,relevance,emp_growth,sales_growth)], c("gvkey","fyear"), all.x = T) | |
FUNDA<-unique(FUNDA, by = c("datadate.x", "gvkey")) | |
#Data having gone through "R Code for Specific Accruals" and this file, up to this point | |
write.csv(FUNDA, file = "/Users/skylardeture/documents/Research/Specific Accruals/ALSdata5.csv") | |
FUNDA_save3<-FUNDA | |
##At this point, go to line 205 of "libraries and functions for ALS replication" | |
##and run the code from that point on, to merge Fama and French industry definitions | |
##into the data | |
#List of variables to Winsorize at 1 and 99 | |
VarsToWins<-c("log_assets","op_cycle","intangible_intensity", | |
"capital_intensity","CFO_var","SALES_var","relevance","smoothness", | |
"predictability","persistence","acc_quality") | |
FUNDA[ , (VarsToWins) := lapply(.SD, function(x) wins99(x)), .SDcols = VarsToWins] | |
FUNDA[ , (VarsToWins) := lapply(.SD, function(x) ifelse(is.finite(x), x, NA)), .SDcols = VarsToWins] | |
#Create a dummy variable for whether the biggest or smallest frequencies are dominant | |
FUNDA[, small_freq_important:= (dom_freq_small == max_freq)+0] | |
FUNDA[, big_freq_important:= (dom_freq_big == max_freq)+0] | |
##Make a table containing spectra/freq information and header info for each firm, one obs per firm. | |
##Also make one with firm-year level data | |
FUNDA_fy_abv <- | |
FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], .(gvkey,fyear)] | |
FUNDA_f_abv <- | |
FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], match_CUSIP] | |
##Time to make the tables | |
##make summary tables | |
stargazer(FUNDA_f_abv[,.(max_freq,max2_freq,max3_freq)], | |
summary.stat = c("mean", "sd", "p25", "median", "p75"), | |
title="Summary Stats by Spectral Density") | |
stargazer(FUNDA_f_abv[,.(dom_freq_small,dom_freq_med,dom_freq_big)], | |
summary.stat = c("mean", "sd", "p25", "median", "p75"), | |
title="Summary Stats by Frequency Length") | |
stargazer(FUNDA[,.(log_assets[is.finite(log_assets)],op_cycle[is.finite(op_cycle)],is.finite(intangible_intensity)*intangible_intensity, | |
capital_intensity,CFO_var[is.finite(CFO_var)],neg_earn,SALES_var[is.finite(SALES_var)],relevance,smoothness, | |
predictibility,persistance,acc_quality)], | |
summary.stat = c("mean", "sd", "p25", "median", "p75"), | |
covariate.labels=c("log assets","op cycle","intangible intensity", | |
"capital intensity","CFO var","neg earn","SALES var","relevance","smoothness", | |
"predictability","persistence","acc quality"), | |
title="Summary Stats of Firm and Earnings Characteristics") | |
##INCLUDED--firm qualities regressed on frequencies sorted by frequency size | |
stargazer(list( | |
coeftest.cluster(data.frame(FUNDA[is.finite(log_assets)]), lm(log_assets~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(log_assets)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(op_cycle)]), lm(op_cycle~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(op_cycle)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(intangible_intensity)]), lm(intangible_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(intangible_intensity)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(capital_intensity)]), lm(capital_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(capital_intensity)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(CFO_var)]), lm(CFO_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(CFO_var)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(neg_earn)]), lm(neg_earn~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(neg_earn)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(SALES_var)]), lm(SALES_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(SALES_var)]), cluster1="fyear", cluster2="gvkey") | |
), title="Firm qualities on frequencies", column.labels=(c("size", "op-cycle" , "intang-intensity", "cap-intensity", "CFO-var", "losses" , "Sales-var", "acc-error"))) | |
##INCLUDED--earnings qualities regressed on frequencies sorted by frequency size | |
stargazer(list( | |
coeftest.cluster(data.frame(FUNDA[is.finite(relevance)]), lm(relevance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(relevance)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(smoothness)]), lm(smoothness~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(smoothness)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(persistance)]), lm(persistance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(persistance)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(predictibility)]), lm(predictibility~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(predictibility)]), cluster1="fyear", cluster2="gvkey"), | |
coeftest.cluster(data.frame(FUNDA[is.finite(acc_quality)]), lm(acc_quality~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(acc_quality)]), cluster1="fyear", cluster2="gvkey") | |
), title="Earnings qualities on frequencies", column.labels=(c("VR" , "smooth" , "pers", "pred", "acc quality"))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment