Skip to content

Instantly share code, notes, and snippets.

@sdeture
Created September 4, 2017 23:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sdeture/078de22811055563bb030810953d32c7 to your computer and use it in GitHub Desktop.
Save sdeture/078de22811055563bb030810953d32c7 to your computer and use it in GitHub Desktop.
Reformatted Spectral Analysis Code
##Spectral Analysis##
##Things you need:
#(1)packages and libraries
#(2)FUNDA having already been through the replication analysis in"R code for Specific Accruals"
#(3)FUNDQ, fresh from WRDS
#load in data..the data saved at the end of the replication code, and the FUNDQ unrestated universe
library(readr)
end_of_replication_data <- read_csv("~/Documents/Research/Specific Accruals/end_of_replication_data_8_26_2017.csv")
FUNDQ <- read_csv("~/Documents/Research/Specific Accruals/Data Sets/FUNDQ1.csv")
#convert to data.table format
FUNDQ<-data.table(FUNDQ)
FUNDA<-data.table(end_of_replication_data)
#make a copy of data
FUNDQ_original <- FUNDQ
EoRD_original <- FUNDA
#call FUNDQ FUNDA so you can make FUNDA variables on a quarterly basis
FUNDA_annual<-FUNDA
FUNDA<-FUNDQ
#Some cleanup
FUNDA<-FUNDA[,.(gvkey,fyearq,datadate,saleq,invtq,cogsq,rectq,revtq,oibdpq,xrdq,atq,ppentq,
dlcq,txpq,fqtr,oancfy, ibq, apq)]
FUNDA<-unique(FUNDA, by = c("datadate", "gvkey"))
FUNDA[,match_CUSIP := sub("^(\\d{8}).*$", "\\1", gvkey)]
FUNDA[,avg_atq:=(atq+lagged(atq,1))/2,by=gvkey]
FUNDA[dlcq == NA, dlcq := 0]
FUNDA[txpq == NA, txpq := 0]
FUNDA[fqtr >1, oancfq := oancfy - lagged(oancfy,1), gvkey]
FUNDA[fqtr == 1, oancfq := oancfy]
#define Accruals variable. One option is to follow ALS, which is the option I commented out.
#the other option is to take all non-cash net income, which is what I choose to do because
#I want depreciation included as an accrual.
#FUNDA[,ACCq:= (delta(actq,1)-delta(cheq,1)-delta(lctq,1)+delta(dlcq,1)+delta(txpq,1)),by=match_CUSIP]
FUNDA[, ACCq:= ibq-oancfq]
FUNDA[, ACCq:= ACCq/avg_atq, by=gvkey]
#remove observations that don't have accruals defined
FUNDA<-FUNDA[!(is.na(ACCq))]
FUNDA<-unique(FUNDA, by = c("datadate", "gvkey"))
##make list, each element of the list is a vector of spectra strength corresponding to a unique gvkey
FUNDA[, num_accq_obs := sum(!is.na(ACCq)), gvkey]
kern <- kernel("daniell", c(1, 1, 1))
list_of_spectra <-
lapply(unique(FUNDA[num_accq_obs > 20]$gvkey), function(x)
spec.pgram(FUNDA[gvkey == x]$ACCq, plot = F)$spec)
##creates indices of the max spectra, the implied period length of the max spectra,
##and the max spectra; for each gvkey
max_spectra_index <- lapply(list_of_spectra, function(x)
which.max(x))
num_of_spectra <- lapply(list_of_spectra, function(x)
length(x))
max_frequency <- lapply(list_of_spectra, function(x) (.5 / length(x) * which.max(x)))
max_spectra_period_length <-
lapply(list_of_spectra, function(x)
1 / (.5 / length(x) * which.max(x)))
max_spectra <- lapply(list_of_spectra, function(x)
max(x))
relative_importance_max_spectra <- lapply(list_of_spectra, function(x)
max(x)/mean(x))
##creates indices of the 2nd_max spectra, the implied period length of the 2nd_max spectra,
## and the 2nd_max spectra; for each gvkey
n<-2
max2_spectra_index <- lapply(list_of_spectra, function(x)
which(x == sort(x)[length(x) - (n - 1)]))
max2_frequency <- lapply(list_of_spectra, function(x)
(.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max2_spectra_period_length <-
lapply(list_of_spectra, function(x)
1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max2_spectra <- lapply(list_of_spectra, function(x)
sort(x)[length(x) - (n - 1)])
relative_importance_max2_spectra <- lapply(list_of_spectra, function(x)
sort(x)[length(x) - (n - 1)]/mean(x))
##creates indices of the 3rd_max spectra, the implied period length of the 2nd_max spectra,
##and the 3rd_max spectra; for each gvkey
n<-3
max3_spectra_index <- lapply(list_of_spectra, function(x)
which(x == sort(x)[length(x) - (n - 1)]))
max3_frequency <- lapply(list_of_spectra, function(x)
(.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max3_spectra_period_length <-
lapply(list_of_spectra, function(x)
1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max3_spectra <- lapply(list_of_spectra, function(x)
sort(x)[length(x) - (n - 1)])
relative_importance_max3_spectra <- lapply(list_of_spectra, function(x)
sort(x)[length(x) - (n - 1)]/mean(x))
##make data.table of gvkey and maximal frequency
max_frequency_lookup <-
data.table(cbind(
gvkey = unique(FUNDA[num_accq_obs > 20]$gvkey),
max_freq = unlist(as.numeric(max_frequency)),
max2_freq = unlist(as.numeric(max2_frequency)),
max3_freq = unlist(as.numeric(max3_frequency))
))
#Save a copy for future use
write.csv(max_frequency_lookup, file = "/Users/skylardeture/documents/Research/Specific Accruals/max_frequency_lookup")
##merge the maximal frequency into FUNDA (which is really quarterly at this point)
FUNDA <- merge(FUNDA, max_frequency_lookup,by=c("gvkey"),all.x = T)
FUNDA[,max_freq:=as.numeric(max_freq)]
FUNDA[,max2_freq:=as.numeric(max2_freq)]
FUNDA[,max3_freq:=as.numeric(max3_freq)]
FUNDA[,dom_freq_small:=min(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)]
FUNDA[,dom_freq_med:=median(c(max_freq,max2_freq,max3_freq)), by=.(gvkey,datadate)]
FUNDA[,dom_freq_big:=max(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)]
FUNDA_save<-FUNDA
##so now we have a dataset of quarterly observations, and it includes the frequency variables
##which are constant for each firm
##lets compute the cash conversion cycle##
FUNDA[,DIO:=((invtq+lagged(invtq,1))/2)/(cogsq/365)/365,gvkey]
FUNDA[,DSO:=((rectq+lagged(rectq,1))/2)/(revtq/365)/365,gvkey]
FUNDA[,DPO:=((apq+lagged(apq,1))/2)/(cogsq/365)/365,gvkey]
FUNDA[is.element(DIO, c(Inf, -Inf)), DIO:= NA]
FUNDA[is.element(DSO, c(Inf, -Inf)), DSO:= NA]
FUNDA[is.element(DPO, c(Inf, -Inf)), DPO:= NA]
FUNDA[,CCC:=DIO+DSO-DPO]
FUNDA[,avg_ISP_cycle:=mean(DIO+DSO+DPO,na.rm = T),gvkey]
FUNDA[,avg_DIO:=mean(DIO, na.rm=T),gvkey]
FUNDA[,avg_DSO:=mean(DSO, na.rm=T),gvkey]
FUNDA[,avg_DPO:=mean(DPO, na.rm=T),gvkey]
FUNDA[,avg_CCC:=mean(CCC, na.rm=T),gvkey]
##switch the names back to FUNDQ for quarterly and FUNDA for annual, and merge the two.
FUNDQ<-FUNDA
FUNDA<-FUNDA_annual
FUNDQ[,fyear:=fyearq]
#Save a copy
FUNDA_save2<-FUNDA
FUNDQ_save2<-FUNDQ
#Merge
FUNDA<-merge(FUNDQ,FUNDA,
by = c("gvkey","fyear"), all.x = T)
##remove firms that aren't exhange traded, to match ALS
FUNDA<-FUNDA[!(EXCHCD==0)]
##make the firm attributes used in schipper lafond francis ollson 2004 that require quarterly data
##for variables using time series data on a rolling window, I use 20 quarters, instead of 10 years
FUNDA[,log_assets:=log(at)]
FUNDA<-FUNDA[, num_saleq_obs := sum(!is.na(saleq)), gvkey][num_saleq_obs > 20]
FUNDA[, SALES_var:=rollapplyr(saleq, 20, sd, fill=c(NA,NA,NA))/atq, gvkey]
FUNDA[, op_cycle:= log(((invtq+lagged(invtq,1))/2)/(cogsq/91)+((rectq+lagged(rectq,1))/2)/(revtq/91))]
proportion_neg<- function(x) {return(sum(x<0)/length(x))}
FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20]
FUNDA[, neg_earn:=rollapplyr(oibdpq,20, proportion_neg,fill=c(NA,NA,NA)), gvkey]
FUNDA[xrdq == NA, xrdq := 0]
FUNDA[xad == NA, xad := 0] ##ad expense not available quarterly
FUNDA[,intangible_intensity:=(xrdq+xad/4)/saleq]
FUNDA[,absence_intangibles:=0]
FUNDA[intangible_intensity == 0, absence_intangibles:=1]
FUNDA[, capital_intensity:= ppentq/atq]
FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20]
FUNDA[, CFO_var:=rollapplyr(oancfq,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20]
FUNDA<-FUNDA[, num_atq_obs := sum(!is.na(atq)), gvkey][num_atq_obs > 20]
FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20]
FUNDA[, ni_at:=oibdpq/atq]
FUNDA[, cf_at:=oancfq/atq]
FUNDA[, sd_ni_at:=rollapplyr(ni_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
FUNDA[, sd_cf_at:=rollapplyr(cf_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
FUNDA[,smoothness:=sd_ni_at/sd_cf_at]
#Make the FLOS variables that require annual data
SLFO<-data.table(FUNDA_annual)
SLFO[,eps_adj:=epspx/adjex_f]
SLFO[,lag_eps_adj:=lagged(epspx/adjex_f,1),gvkey]
SLFO<-SLFO[is.finite(lag_eps_adj),count:=.N,gvkey][count>10]
#define coefficients for persistance and predicitbility rolling regression
SLFO[, c("p0","p1") := rolling_reg(as.matrix(cbind(lag_eps_adj)),as.matrix(cbind(eps_adj))), gvkey]
SLFO[, persistance:=p1]
SLFO[, predictibility:=rollapplyr((eps_adj-p0-p1*lag_eps_adj),10,sd,fill=c(NA,NA,NA)), gvkey]
#define coefficients for accruals quality
SLFO[, c("a0","a1","a2","a3","a4","a5") :=
rolling_reg(as.matrix(cbind(sales_growth,emp_growth,lagged_CF,lead_CF,CF)),
as.matrix(cbind(ACC))), gvkey]
SLFO[, acc_quality:=rollapplyr((ACC-a0-a1*sales_growth-a2*emp_growth-a3*lagged_CF-a4*lead_CF-a5*CF
),10,sd,fill=c(NA,NA,NA)), gvkey]
SLFO[,vr_return:=lagged(MOM6*(1+ann_return),1), gvkey] ##this is 18 months, ending 4 months after end of fiscal year
SLFO[,vr_ni:=oibdp/(prcc_f*csho)]
SLFO[,vr_delta_ni:=vr_ni-lagged(vr_ni,1), gvkey]
SLFO[, c("vR") :=
rolling_regR(as.matrix(cbind(vr_ni, vr_delta_ni)),
as.matrix(cbind(vr_return))), gvkey]
SLFO[,relevance:= -vR]
#Merge quarterly set with annual set, so that you have all variables in one place
FUNDA<-merge(FUNDA, SLFO[,.(gvkey,fyear,acc_quality,predictibility,persistance,sale,emp,relevance,emp_growth,sales_growth)], c("gvkey","fyear"), all.x = T)
FUNDA<-unique(FUNDA, by = c("datadate.x", "gvkey"))
#Data having gone through "R Code for Specific Accruals" and this file, up to this point
write.csv(FUNDA, file = "/Users/skylardeture/documents/Research/Specific Accruals/ALSdata5.csv")
FUNDA_save3<-FUNDA
##At this point, go to line 205 of "libraries and functions for ALS replication"
##and run the code from that point on, to merge Fama and French industry definitions
##into the data
#List of variables to Winsorize at 1 and 99
VarsToWins<-c("log_assets","op_cycle","intangible_intensity",
"capital_intensity","CFO_var","SALES_var","relevance","smoothness",
"predictability","persistence","acc_quality")
FUNDA[ , (VarsToWins) := lapply(.SD, function(x) wins99(x)), .SDcols = VarsToWins]
FUNDA[ , (VarsToWins) := lapply(.SD, function(x) ifelse(is.finite(x), x, NA)), .SDcols = VarsToWins]
#Create a dummy variable for whether the biggest or smallest frequencies are dominant
FUNDA[, small_freq_important:= (dom_freq_small == max_freq)+0]
FUNDA[, big_freq_important:= (dom_freq_big == max_freq)+0]
##Make a table containing spectra/freq information and header info for each firm, one obs per firm.
##Also make one with firm-year level data
FUNDA_fy_abv <-
FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], .(gvkey,fyear)]
FUNDA_f_abv <-
FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], match_CUSIP]
##Time to make the tables
##make summary tables
stargazer(FUNDA_f_abv[,.(max_freq,max2_freq,max3_freq)],
summary.stat = c("mean", "sd", "p25", "median", "p75"),
title="Summary Stats by Spectral Density")
stargazer(FUNDA_f_abv[,.(dom_freq_small,dom_freq_med,dom_freq_big)],
summary.stat = c("mean", "sd", "p25", "median", "p75"),
title="Summary Stats by Frequency Length")
stargazer(FUNDA[,.(log_assets[is.finite(log_assets)],op_cycle[is.finite(op_cycle)],is.finite(intangible_intensity)*intangible_intensity,
capital_intensity,CFO_var[is.finite(CFO_var)],neg_earn,SALES_var[is.finite(SALES_var)],relevance,smoothness,
predictibility,persistance,acc_quality)],
summary.stat = c("mean", "sd", "p25", "median", "p75"),
covariate.labels=c("log assets","op cycle","intangible intensity",
"capital intensity","CFO var","neg earn","SALES var","relevance","smoothness",
"predictability","persistence","acc quality"),
title="Summary Stats of Firm and Earnings Characteristics")
##INCLUDED--firm qualities regressed on frequencies sorted by frequency size
stargazer(list(
coeftest.cluster(data.frame(FUNDA[is.finite(log_assets)]), lm(log_assets~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(log_assets)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(op_cycle)]), lm(op_cycle~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(op_cycle)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(intangible_intensity)]), lm(intangible_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(intangible_intensity)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(capital_intensity)]), lm(capital_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(capital_intensity)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(CFO_var)]), lm(CFO_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(CFO_var)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(neg_earn)]), lm(neg_earn~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(neg_earn)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(SALES_var)]), lm(SALES_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(SALES_var)]), cluster1="fyear", cluster2="gvkey")
), title="Firm qualities on frequencies", column.labels=(c("size", "op-cycle" , "intang-intensity", "cap-intensity", "CFO-var", "losses" , "Sales-var", "acc-error")))
##INCLUDED--earnings qualities regressed on frequencies sorted by frequency size
stargazer(list(
coeftest.cluster(data.frame(FUNDA[is.finite(relevance)]), lm(relevance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(relevance)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(smoothness)]), lm(smoothness~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(smoothness)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(persistance)]), lm(persistance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(persistance)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(predictibility)]), lm(predictibility~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(predictibility)]), cluster1="fyear", cluster2="gvkey"),
coeftest.cluster(data.frame(FUNDA[is.finite(acc_quality)]), lm(acc_quality~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(acc_quality)]), cluster1="fyear", cluster2="gvkey")
), title="Earnings qualities on frequencies", column.labels=(c("VR" , "smooth" , "pers", "pred", "acc quality")))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment