sdeture/gist:078de22811055563bb030810953d32c7 Secret

## gistfile1.txt
##Spectral Analysis##

##Things you need:
#(1)packages and libraries
#(2)FUNDA having already been through the replication analysis in"R code for Specific Accruals"
#(3)FUNDQ, fresh from WRDS

#load in data..the data saved at the end of the replication code, and the FUNDQ unrestated universe
library(readr)
end_of_replication_data <- read_csv("~/Documents/Research/Specific Accruals/end_of_replication_data_8_26_2017.csv")
FUNDQ <- read_csv("~/Documents/Research/Specific Accruals/Data Sets/FUNDQ1.csv")

#convert to data.table format
FUNDQ<-data.table(FUNDQ)
FUNDA<-data.table(end_of_replication_data)

#make a copy of data
FUNDQ_original <- FUNDQ
EoRD_original <- FUNDA

#call FUNDQ FUNDA so you can make FUNDA variables on a quarterly basis
FUNDA_annual<-FUNDA
FUNDA<-FUNDQ

#Some cleanup
FUNDA<-FUNDA[,.(gvkey,fyearq,datadate,saleq,invtq,cogsq,rectq,revtq,oibdpq,xrdq,atq,ppentq,
                dlcq,txpq,fqtr,oancfy, ibq, apq)]
FUNDA<-unique(FUNDA, by = c("datadate", "gvkey"))
FUNDA[,match_CUSIP := sub("^(\\d{8}).*$", "\\1", gvkey)]
FUNDA[,avg_atq:=(atq+lagged(atq,1))/2,by=gvkey]
FUNDA[dlcq == NA, dlcq := 0]
FUNDA[txpq == NA, txpq := 0]
FUNDA[fqtr >1, oancfq := oancfy - lagged(oancfy,1), gvkey]
FUNDA[fqtr == 1, oancfq := oancfy]

#define Accruals variable.  One option is to follow ALS, which is the option I commented out.
#the other option is to take all non-cash net income, which is what I choose to do because
#I want depreciation included as an accrual.
#FUNDA[,ACCq:= (delta(actq,1)-delta(cheq,1)-delta(lctq,1)+delta(dlcq,1)+delta(txpq,1)),by=match_CUSIP]
FUNDA[, ACCq:= ibq-oancfq]
FUNDA[, ACCq:= ACCq/avg_atq, by=gvkey]

#remove observations that don't have accruals defined
FUNDA<-FUNDA[!(is.na(ACCq))]
FUNDA<-unique(FUNDA, by = c("datadate", "gvkey"))

##make list, each element of the list is a vector of spectra strength corresponding to a unique gvkey
FUNDA[, num_accq_obs := sum(!is.na(ACCq)), gvkey]
kern <- kernel("daniell", c(1, 1, 1))
list_of_spectra <-
  lapply(unique(FUNDA[num_accq_obs > 20]$gvkey), function(x)
    spec.pgram(FUNDA[gvkey == x]$ACCq, plot = F)$spec)

##creates indices of the max spectra, the implied period length of the max spectra,
##and the max spectra; for each gvkey
max_spectra_index <- lapply(list_of_spectra, function(x)
  which.max(x))
num_of_spectra <- lapply(list_of_spectra, function(x)
  length(x))
max_frequency <-  lapply(list_of_spectra, function(x) (.5 / length(x) * which.max(x)))
max_spectra_period_length <-
  lapply(list_of_spectra, function(x)
    1 / (.5 / length(x) * which.max(x)))
max_spectra <- lapply(list_of_spectra, function(x)
  max(x))
relative_importance_max_spectra <- lapply(list_of_spectra, function(x)
  max(x)/mean(x))

##creates indices of the 2nd_max spectra, the implied period length of the 2nd_max spectra,
## and the 2nd_max spectra; for each gvkey
n<-2
max2_spectra_index <- lapply(list_of_spectra, function(x)
  which(x == sort(x)[length(x) - (n - 1)]))
max2_frequency <-  lapply(list_of_spectra, function(x)
  (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max2_spectra_period_length <-
  lapply(list_of_spectra, function(x)
    1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max2_spectra <- lapply(list_of_spectra, function(x)
  sort(x)[length(x) - (n - 1)])
relative_importance_max2_spectra <- lapply(list_of_spectra, function(x)
  sort(x)[length(x) - (n - 1)]/mean(x))

##creates indices of the 3rd_max spectra, the implied period length of the 2nd_max spectra,
##and the 3rd_max spectra; for each gvkey
n<-3
max3_spectra_index <- lapply(list_of_spectra, function(x)
  which(x == sort(x)[length(x) - (n - 1)]))
max3_frequency <-  lapply(list_of_spectra, function(x)
  (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max3_spectra_period_length <-
  lapply(list_of_spectra, function(x)
    1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
max3_spectra <- lapply(list_of_spectra, function(x)
  sort(x)[length(x) - (n - 1)])
relative_importance_max3_spectra <- lapply(list_of_spectra, function(x)
  sort(x)[length(x) - (n - 1)]/mean(x))

##make data.table of gvkey and maximal frequency
max_frequency_lookup <-
  data.table(cbind(
    gvkey = unique(FUNDA[num_accq_obs > 20]$gvkey),
    max_freq = unlist(as.numeric(max_frequency)),
    max2_freq = unlist(as.numeric(max2_frequency)),
    max3_freq = unlist(as.numeric(max3_frequency))
  ))

#Save a copy for future use
write.csv(max_frequency_lookup, file = "/Users/skylardeture/documents/Research/Specific Accruals/max_frequency_lookup")

##merge the maximal frequency into FUNDA (which is really quarterly at this point)
FUNDA <- merge(FUNDA, max_frequency_lookup,by=c("gvkey"),all.x = T)
FUNDA[,max_freq:=as.numeric(max_freq)]
FUNDA[,max2_freq:=as.numeric(max2_freq)]
FUNDA[,max3_freq:=as.numeric(max3_freq)]
FUNDA[,dom_freq_small:=min(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)]
FUNDA[,dom_freq_med:=median(c(max_freq,max2_freq,max3_freq)), by=.(gvkey,datadate)]
FUNDA[,dom_freq_big:=max(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)]
FUNDA_save<-FUNDA

##so now we have a dataset of quarterly observations, and it includes the frequency variables
##which are constant for each firm

##lets compute the cash conversion cycle##
FUNDA[,DIO:=((invtq+lagged(invtq,1))/2)/(cogsq/365)/365,gvkey]
FUNDA[,DSO:=((rectq+lagged(rectq,1))/2)/(revtq/365)/365,gvkey]
FUNDA[,DPO:=((apq+lagged(apq,1))/2)/(cogsq/365)/365,gvkey]
FUNDA[is.element(DIO, c(Inf, -Inf)), DIO:= NA]
FUNDA[is.element(DSO, c(Inf, -Inf)), DSO:= NA]
FUNDA[is.element(DPO, c(Inf, -Inf)), DPO:= NA]
FUNDA[,CCC:=DIO+DSO-DPO]
FUNDA[,avg_ISP_cycle:=mean(DIO+DSO+DPO,na.rm = T),gvkey]
FUNDA[,avg_DIO:=mean(DIO, na.rm=T),gvkey]
FUNDA[,avg_DSO:=mean(DSO, na.rm=T),gvkey]
FUNDA[,avg_DPO:=mean(DPO, na.rm=T),gvkey]
FUNDA[,avg_CCC:=mean(CCC, na.rm=T),gvkey]

##switch the names back to FUNDQ for quarterly and FUNDA for annual, and merge the two.
FUNDQ<-FUNDA
FUNDA<-FUNDA_annual
FUNDQ[,fyear:=fyearq]

#Save a copy
FUNDA_save2<-FUNDA
FUNDQ_save2<-FUNDQ

#Merge
FUNDA<-merge(FUNDQ,FUNDA,
             by = c("gvkey","fyear"), all.x = T)

##remove firms that aren't exhange traded, to match ALS
FUNDA<-FUNDA[!(EXCHCD==0)]


##make the firm attributes used in schipper lafond francis ollson 2004 that require quarterly data
##for variables using time series data on a rolling window, I use 20 quarters, instead of 10 years
FUNDA[,log_assets:=log(at)]
FUNDA<-FUNDA[, num_saleq_obs := sum(!is.na(saleq)), gvkey][num_saleq_obs > 20]
FUNDA[, SALES_var:=rollapplyr(saleq, 20, sd, fill=c(NA,NA,NA))/atq, gvkey]
FUNDA[, op_cycle:= log(((invtq+lagged(invtq,1))/2)/(cogsq/91)+((rectq+lagged(rectq,1))/2)/(revtq/91))]
proportion_neg<- function(x) {return(sum(x<0)/length(x))}
FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20]
FUNDA[, neg_earn:=rollapplyr(oibdpq,20, proportion_neg,fill=c(NA,NA,NA)), gvkey]
FUNDA[xrdq == NA, xrdq := 0]
FUNDA[xad == NA, xad := 0] ##ad expense not available quarterly
FUNDA[,intangible_intensity:=(xrdq+xad/4)/saleq]
FUNDA[,absence_intangibles:=0]
FUNDA[intangible_intensity == 0, absence_intangibles:=1]
FUNDA[, capital_intensity:= ppentq/atq]
FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20]
FUNDA[, CFO_var:=rollapplyr(oancfq,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20]
FUNDA<-FUNDA[, num_atq_obs := sum(!is.na(atq)), gvkey][num_atq_obs > 20]
FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20]
FUNDA[, ni_at:=oibdpq/atq]
FUNDA[, cf_at:=oancfq/atq]
FUNDA[, sd_ni_at:=rollapplyr(ni_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
FUNDA[, sd_cf_at:=rollapplyr(cf_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
FUNDA[,smoothness:=sd_ni_at/sd_cf_at]

#Make the FLOS variables that require annual data
SLFO<-data.table(FUNDA_annual)
SLFO[,eps_adj:=epspx/adjex_f]
SLFO[,lag_eps_adj:=lagged(epspx/adjex_f,1),gvkey]
SLFO<-SLFO[is.finite(lag_eps_adj),count:=.N,gvkey][count>10]

#define coefficients for persistance and predicitbility rolling regression
SLFO[, c("p0","p1") := rolling_reg(as.matrix(cbind(lag_eps_adj)),as.matrix(cbind(eps_adj))), gvkey]
SLFO[, persistance:=p1]
SLFO[, predictibility:=rollapplyr((eps_adj-p0-p1*lag_eps_adj),10,sd,fill=c(NA,NA,NA)), gvkey]
#define coefficients for accruals quality
SLFO[, c("a0","a1","a2","a3","a4","a5") :=
       rolling_reg(as.matrix(cbind(sales_growth,emp_growth,lagged_CF,lead_CF,CF)),
                   as.matrix(cbind(ACC))), gvkey]
SLFO[, acc_quality:=rollapplyr((ACC-a0-a1*sales_growth-a2*emp_growth-a3*lagged_CF-a4*lead_CF-a5*CF
),10,sd,fill=c(NA,NA,NA)), gvkey]
SLFO[,vr_return:=lagged(MOM6*(1+ann_return),1), gvkey]  ##this is 18 months, ending 4 months after end of fiscal year
SLFO[,vr_ni:=oibdp/(prcc_f*csho)]
SLFO[,vr_delta_ni:=vr_ni-lagged(vr_ni,1), gvkey]
SLFO[, c("vR") :=
       rolling_regR(as.matrix(cbind(vr_ni, vr_delta_ni)),
                    as.matrix(cbind(vr_return))), gvkey]
SLFO[,relevance:= -vR]

#Merge quarterly set with annual set, so that you have all variables in one place
FUNDA<-merge(FUNDA, SLFO[,.(gvkey,fyear,acc_quality,predictibility,persistance,sale,emp,relevance,emp_growth,sales_growth)], c("gvkey","fyear"), all.x = T)
FUNDA<-unique(FUNDA, by = c("datadate.x", "gvkey"))

#Data having gone through "R Code for Specific Accruals" and this file, up to this point
write.csv(FUNDA, file = "/Users/skylardeture/documents/Research/Specific Accruals/ALSdata5.csv")
FUNDA_save3<-FUNDA

##At this point, go to line 205 of "libraries and functions for ALS replication"
##and run the code from that point on, to merge Fama and French industry definitions
##into the data

#List of variables to Winsorize at 1 and 99
VarsToWins<-c("log_assets","op_cycle","intangible_intensity",
              "capital_intensity","CFO_var","SALES_var","relevance","smoothness",
             "predictability","persistence","acc_quality")
FUNDA[ , (VarsToWins) := lapply(.SD, function(x) wins99(x)), .SDcols = VarsToWins]
FUNDA[ , (VarsToWins) := lapply(.SD, function(x) ifelse(is.finite(x), x, NA)), .SDcols = VarsToWins]

#Create a dummy variable for whether the biggest or smallest frequencies are dominant
FUNDA[, small_freq_important:= (dom_freq_small == max_freq)+0]
FUNDA[, big_freq_important:= (dom_freq_big == max_freq)+0]


##Make a table containing spectra/freq information and header info for each firm, one obs per firm.
##Also make one with firm-year level data
FUNDA_fy_abv <-
  FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], .(gvkey,fyear)]
FUNDA_f_abv <-
  FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], match_CUSIP]


##Time to make the tables

##make summary tables
stargazer(FUNDA_f_abv[,.(max_freq,max2_freq,max3_freq)],
          summary.stat = c("mean", "sd", "p25", "median", "p75"),
          title="Summary Stats by Spectral Density")
stargazer(FUNDA_f_abv[,.(dom_freq_small,dom_freq_med,dom_freq_big)],
          summary.stat = c("mean", "sd", "p25", "median", "p75"),
          title="Summary Stats by Frequency Length")
stargazer(FUNDA[,.(log_assets[is.finite(log_assets)],op_cycle[is.finite(op_cycle)],is.finite(intangible_intensity)*intangible_intensity,
                   capital_intensity,CFO_var[is.finite(CFO_var)],neg_earn,SALES_var[is.finite(SALES_var)],relevance,smoothness,
                   predictibility,persistance,acc_quality)],
          summary.stat = c("mean", "sd", "p25", "median", "p75"),
          covariate.labels=c("log assets","op cycle","intangible intensity",
                             "capital intensity","CFO var","neg earn","SALES var","relevance","smoothness",
                             "predictability","persistence","acc quality"),
          title="Summary Stats of Firm and Earnings Characteristics")


##INCLUDED--firm qualities regressed on frequencies sorted by frequency size
stargazer(list(
  coeftest.cluster(data.frame(FUNDA[is.finite(log_assets)]), lm(log_assets~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(log_assets)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(op_cycle)]), lm(op_cycle~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(op_cycle)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(intangible_intensity)]), lm(intangible_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(intangible_intensity)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(capital_intensity)]), lm(capital_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(capital_intensity)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(CFO_var)]), lm(CFO_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(CFO_var)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(neg_earn)]), lm(neg_earn~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(neg_earn)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(SALES_var)]), lm(SALES_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(SALES_var)]), cluster1="fyear", cluster2="gvkey")
), title="Firm qualities on frequencies", column.labels=(c("size", "op-cycle" , "intang-intensity", "cap-intensity", "CFO-var", "losses" , "Sales-var", "acc-error")))

##INCLUDED--earnings qualities regressed on frequencies sorted by frequency size
stargazer(list(
  coeftest.cluster(data.frame(FUNDA[is.finite(relevance)]), lm(relevance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(relevance)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(smoothness)]), lm(smoothness~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(smoothness)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(persistance)]), lm(persistance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(persistance)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(predictibility)]), lm(predictibility~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(predictibility)]), cluster1="fyear", cluster2="gvkey"),
  coeftest.cluster(data.frame(FUNDA[is.finite(acc_quality)]), lm(acc_quality~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(acc_quality)]), cluster1="fyear", cluster2="gvkey")
), title="Earnings qualities on frequencies", column.labels=(c("VR" , "smooth" , "pers", "pred", "acc quality")))
	##Spectral Analysis##

	##Things you need:
	#(1)packages and libraries
	#(2)FUNDA having already been through the replication analysis in"R code for Specific Accruals"
	#(3)FUNDQ, fresh from WRDS

	#load in data..the data saved at the end of the replication code, and the FUNDQ unrestated universe
	library(readr)
	end_of_replication_data <- read_csv("~/Documents/Research/Specific Accruals/end_of_replication_data_8_26_2017.csv")
	FUNDQ <- read_csv("~/Documents/Research/Specific Accruals/Data Sets/FUNDQ1.csv")

	#convert to data.table format
	FUNDQ<-data.table(FUNDQ)
	FUNDA<-data.table(end_of_replication_data)

	#make a copy of data
	FUNDQ_original <- FUNDQ
	EoRD_original <- FUNDA

	#call FUNDQ FUNDA so you can make FUNDA variables on a quarterly basis
	FUNDA_annual<-FUNDA
	FUNDA<-FUNDQ

	#Some cleanup
	FUNDA<-FUNDA[,.(gvkey,fyearq,datadate,saleq,invtq,cogsq,rectq,revtq,oibdpq,xrdq,atq,ppentq,
	dlcq,txpq,fqtr,oancfy, ibq, apq)]
	FUNDA<-unique(FUNDA, by = c("datadate", "gvkey"))
	FUNDA[,match_CUSIP := sub("^(\\d{8}).*$", "\\1", gvkey)]
	FUNDA[,avg_atq:=(atq+lagged(atq,1))/2,by=gvkey]
	FUNDA[dlcq == NA, dlcq := 0]
	FUNDA[txpq == NA, txpq := 0]
	FUNDA[fqtr >1, oancfq := oancfy - lagged(oancfy,1), gvkey]
	FUNDA[fqtr == 1, oancfq := oancfy]

	#define Accruals variable. One option is to follow ALS, which is the option I commented out.
	#the other option is to take all non-cash net income, which is what I choose to do because
	#I want depreciation included as an accrual.
	#FUNDA[,ACCq:= (delta(actq,1)-delta(cheq,1)-delta(lctq,1)+delta(dlcq,1)+delta(txpq,1)),by=match_CUSIP]
	FUNDA[, ACCq:= ibq-oancfq]
	FUNDA[, ACCq:= ACCq/avg_atq, by=gvkey]

	#remove observations that don't have accruals defined
	FUNDA<-FUNDA[!(is.na(ACCq))]
	FUNDA<-unique(FUNDA, by = c("datadate", "gvkey"))

	##make list, each element of the list is a vector of spectra strength corresponding to a unique gvkey
	FUNDA[, num_accq_obs := sum(!is.na(ACCq)), gvkey]
	kern <- kernel("daniell", c(1, 1, 1))
	list_of_spectra <-
	lapply(unique(FUNDA[num_accq_obs > 20]$gvkey), function(x)
	spec.pgram(FUNDA[gvkey == x]$ACCq, plot = F)$spec)

	##creates indices of the max spectra, the implied period length of the max spectra,
	##and the max spectra; for each gvkey
	max_spectra_index <- lapply(list_of_spectra, function(x)
	which.max(x))
	num_of_spectra <- lapply(list_of_spectra, function(x)
	length(x))
	max_frequency <- lapply(list_of_spectra, function(x) (.5 / length(x) * which.max(x)))
	max_spectra_period_length <-
	lapply(list_of_spectra, function(x)
	1 / (.5 / length(x) * which.max(x)))
	max_spectra <- lapply(list_of_spectra, function(x)
	max(x))
	relative_importance_max_spectra <- lapply(list_of_spectra, function(x)
	max(x)/mean(x))

	##creates indices of the 2nd_max spectra, the implied period length of the 2nd_max spectra,
	## and the 2nd_max spectra; for each gvkey
	n<-2
	max2_spectra_index <- lapply(list_of_spectra, function(x)
	which(x == sort(x)[length(x) - (n - 1)]))
	max2_frequency <- lapply(list_of_spectra, function(x)
	(.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
	max2_spectra_period_length <-
	lapply(list_of_spectra, function(x)
	1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
	max2_spectra <- lapply(list_of_spectra, function(x)
	sort(x)[length(x) - (n - 1)])
	relative_importance_max2_spectra <- lapply(list_of_spectra, function(x)
	sort(x)[length(x) - (n - 1)]/mean(x))

	##creates indices of the 3rd_max spectra, the implied period length of the 2nd_max spectra,
	##and the 3rd_max spectra; for each gvkey
	n<-3
	max3_spectra_index <- lapply(list_of_spectra, function(x)
	which(x == sort(x)[length(x) - (n - 1)]))
	max3_frequency <- lapply(list_of_spectra, function(x)
	(.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
	max3_spectra_period_length <-
	lapply(list_of_spectra, function(x)
	1 / (.5 / length(x) * which(x == sort(x)[length(x) - (n - 1)])))
	max3_spectra <- lapply(list_of_spectra, function(x)
	sort(x)[length(x) - (n - 1)])
	relative_importance_max3_spectra <- lapply(list_of_spectra, function(x)
	sort(x)[length(x) - (n - 1)]/mean(x))

	##make data.table of gvkey and maximal frequency
	max_frequency_lookup <-
	data.table(cbind(
	gvkey = unique(FUNDA[num_accq_obs > 20]$gvkey),
	max_freq = unlist(as.numeric(max_frequency)),
	max2_freq = unlist(as.numeric(max2_frequency)),
	max3_freq = unlist(as.numeric(max3_frequency))
	))

	#Save a copy for future use
	write.csv(max_frequency_lookup, file = "/Users/skylardeture/documents/Research/Specific Accruals/max_frequency_lookup")

	##merge the maximal frequency into FUNDA (which is really quarterly at this point)
	FUNDA <- merge(FUNDA, max_frequency_lookup,by=c("gvkey"),all.x = T)
	FUNDA[,max_freq:=as.numeric(max_freq)]
	FUNDA[,max2_freq:=as.numeric(max2_freq)]
	FUNDA[,max3_freq:=as.numeric(max3_freq)]
	FUNDA[,dom_freq_small:=min(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)]
	FUNDA[,dom_freq_med:=median(c(max_freq,max2_freq,max3_freq)), by=.(gvkey,datadate)]
	FUNDA[,dom_freq_big:=max(max_freq,max2_freq,max3_freq), by=.(gvkey,datadate)]
	FUNDA_save<-FUNDA

	##so now we have a dataset of quarterly observations, and it includes the frequency variables
	##which are constant for each firm

	##lets compute the cash conversion cycle##
	FUNDA[,DIO:=((invtq+lagged(invtq,1))/2)/(cogsq/365)/365,gvkey]
	FUNDA[,DSO:=((rectq+lagged(rectq,1))/2)/(revtq/365)/365,gvkey]
	FUNDA[,DPO:=((apq+lagged(apq,1))/2)/(cogsq/365)/365,gvkey]
	FUNDA[is.element(DIO, c(Inf, -Inf)), DIO:= NA]
	FUNDA[is.element(DSO, c(Inf, -Inf)), DSO:= NA]
	FUNDA[is.element(DPO, c(Inf, -Inf)), DPO:= NA]
	FUNDA[,CCC:=DIO+DSO-DPO]
	FUNDA[,avg_ISP_cycle:=mean(DIO+DSO+DPO,na.rm = T),gvkey]
	FUNDA[,avg_DIO:=mean(DIO, na.rm=T),gvkey]
	FUNDA[,avg_DSO:=mean(DSO, na.rm=T),gvkey]
	FUNDA[,avg_DPO:=mean(DPO, na.rm=T),gvkey]
	FUNDA[,avg_CCC:=mean(CCC, na.rm=T),gvkey]

	##switch the names back to FUNDQ for quarterly and FUNDA for annual, and merge the two.
	FUNDQ<-FUNDA
	FUNDA<-FUNDA_annual
	FUNDQ[,fyear:=fyearq]

	#Save a copy
	FUNDA_save2<-FUNDA
	FUNDQ_save2<-FUNDQ

	#Merge
	FUNDA<-merge(FUNDQ,FUNDA,
	by = c("gvkey","fyear"), all.x = T)

	##remove firms that aren't exhange traded, to match ALS
	FUNDA<-FUNDA[!(EXCHCD==0)]


	##make the firm attributes used in schipper lafond francis ollson 2004 that require quarterly data
	##for variables using time series data on a rolling window, I use 20 quarters, instead of 10 years
	FUNDA[,log_assets:=log(at)]
	FUNDA<-FUNDA[, num_saleq_obs := sum(!is.na(saleq)), gvkey][num_saleq_obs > 20]
	FUNDA[, SALES_var:=rollapplyr(saleq, 20, sd, fill=c(NA,NA,NA))/atq, gvkey]
	FUNDA[, op_cycle:= log(((invtq+lagged(invtq,1))/2)/(cogsq/91)+((rectq+lagged(rectq,1))/2)/(revtq/91))]
	proportion_neg<- function(x) {return(sum(x<0)/length(x))}
	FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20]
	FUNDA[, neg_earn:=rollapplyr(oibdpq,20, proportion_neg,fill=c(NA,NA,NA)), gvkey]
	FUNDA[xrdq == NA, xrdq := 0]
	FUNDA[xad == NA, xad := 0] ##ad expense not available quarterly
	FUNDA[,intangible_intensity:=(xrdq+xad/4)/saleq]
	FUNDA[,absence_intangibles:=0]
	FUNDA[intangible_intensity == 0, absence_intangibles:=1]
	FUNDA[, capital_intensity:= ppentq/atq]
	FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20]
	FUNDA[, CFO_var:=rollapplyr(oancfq,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
	FUNDA<-FUNDA[, num_oibdpq_obs := sum(!is.na(oibdpq)), gvkey][num_oibdpq_obs > 20]
	FUNDA<-FUNDA[, num_atq_obs := sum(!is.na(atq)), gvkey][num_atq_obs > 20]
	FUNDA<-FUNDA[, num_oancfq_obs := sum(!is.na(oancfq)), gvkey][num_oancfq_obs > 20]
	FUNDA[, ni_at:=oibdpq/atq]
	FUNDA[, cf_at:=oancfq/atq]
	FUNDA[, sd_ni_at:=rollapplyr(ni_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
	FUNDA[, sd_cf_at:=rollapplyr(cf_at,20,sd,fill=c(NA,NA,NA))/atq, gvkey]
	FUNDA[,smoothness:=sd_ni_at/sd_cf_at]

	#Make the FLOS variables that require annual data
	SLFO<-data.table(FUNDA_annual)
	SLFO[,eps_adj:=epspx/adjex_f]
	SLFO[,lag_eps_adj:=lagged(epspx/adjex_f,1),gvkey]
	SLFO<-SLFO[is.finite(lag_eps_adj),count:=.N,gvkey][count>10]

	#define coefficients for persistance and predicitbility rolling regression
	SLFO[, c("p0","p1") := rolling_reg(as.matrix(cbind(lag_eps_adj)),as.matrix(cbind(eps_adj))), gvkey]
	SLFO[, persistance:=p1]
	SLFO[, predictibility:=rollapplyr((eps_adj-p0-p1*lag_eps_adj),10,sd,fill=c(NA,NA,NA)), gvkey]
	#define coefficients for accruals quality
	SLFO[, c("a0","a1","a2","a3","a4","a5") :=
	rolling_reg(as.matrix(cbind(sales_growth,emp_growth,lagged_CF,lead_CF,CF)),
	as.matrix(cbind(ACC))), gvkey]
	SLFO[, acc_quality:=rollapplyr((ACC-a0-a1sales_growth-a2emp_growth-a3lagged_CF-a4lead_CF-a5*CF
	),10,sd,fill=c(NA,NA,NA)), gvkey]
	SLFO[,vr_return:=lagged(MOM6*(1+ann_return),1), gvkey] ##this is 18 months, ending 4 months after end of fiscal year
	SLFO[,vr_ni:=oibdp/(prcc_f*csho)]
	SLFO[,vr_delta_ni:=vr_ni-lagged(vr_ni,1), gvkey]
	SLFO[, c("vR") :=
	rolling_regR(as.matrix(cbind(vr_ni, vr_delta_ni)),
	as.matrix(cbind(vr_return))), gvkey]
	SLFO[,relevance:= -vR]

	#Merge quarterly set with annual set, so that you have all variables in one place
	FUNDA<-merge(FUNDA, SLFO[,.(gvkey,fyear,acc_quality,predictibility,persistance,sale,emp,relevance,emp_growth,sales_growth)], c("gvkey","fyear"), all.x = T)
	FUNDA<-unique(FUNDA, by = c("datadate.x", "gvkey"))

	#Data having gone through "R Code for Specific Accruals" and this file, up to this point
	write.csv(FUNDA, file = "/Users/skylardeture/documents/Research/Specific Accruals/ALSdata5.csv")
	FUNDA_save3<-FUNDA

	##At this point, go to line 205 of "libraries and functions for ALS replication"
	##and run the code from that point on, to merge Fama and French industry definitions
	##into the data

	#List of variables to Winsorize at 1 and 99
	VarsToWins<-c("log_assets","op_cycle","intangible_intensity",
	"capital_intensity","CFO_var","SALES_var","relevance","smoothness",
	"predictability","persistence","acc_quality")
	FUNDA[ , (VarsToWins) := lapply(.SD, function(x) wins99(x)), .SDcols = VarsToWins]
	FUNDA[ , (VarsToWins) := lapply(.SD, function(x) ifelse(is.finite(x), x, NA)), .SDcols = VarsToWins]

	#Create a dummy variable for whether the biggest or smallest frequencies are dominant
	FUNDA[, small_freq_important:= (dom_freq_small == max_freq)+0]
	FUNDA[, big_freq_important:= (dom_freq_big == max_freq)+0]



	##Make a table containing spectra/freq information and header info for each firm, one obs per firm.
	##Also make one with firm-year level data
	FUNDA_fy_abv <-
	FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], .(gvkey,fyear)]
	FUNDA_f_abv <-
	FUNDA[!is.na(max_freq + avg_ISP_cycle + DIO + DSO + DPO), .SD[1], match_CUSIP]


	##Time to make the tables

	##make summary tables
	stargazer(FUNDA_f_abv[,.(max_freq,max2_freq,max3_freq)],
	summary.stat = c("mean", "sd", "p25", "median", "p75"),
	title="Summary Stats by Spectral Density")
	stargazer(FUNDA_f_abv[,.(dom_freq_small,dom_freq_med,dom_freq_big)],
	summary.stat = c("mean", "sd", "p25", "median", "p75"),
	title="Summary Stats by Frequency Length")
	stargazer(FUNDA[,.(log_assets[is.finite(log_assets)],op_cycle[is.finite(op_cycle)],is.finite(intangible_intensity)*intangible_intensity,
	capital_intensity,CFO_var[is.finite(CFO_var)],neg_earn,SALES_var[is.finite(SALES_var)],relevance,smoothness,
	predictibility,persistance,acc_quality)],
	summary.stat = c("mean", "sd", "p25", "median", "p75"),
	covariate.labels=c("log assets","op cycle","intangible intensity",
	"capital intensity","CFO var","neg earn","SALES var","relevance","smoothness",
	"predictability","persistence","acc quality"),
	title="Summary Stats of Firm and Earnings Characteristics")


	##INCLUDED--firm qualities regressed on frequencies sorted by frequency size
	stargazer(list(
	coeftest.cluster(data.frame(FUNDA[is.finite(log_assets)]), lm(log_assets~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(log_assets)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(op_cycle)]), lm(op_cycle~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(op_cycle)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(intangible_intensity)]), lm(intangible_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(intangible_intensity)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(capital_intensity)]), lm(capital_intensity~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(capital_intensity)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(CFO_var)]), lm(CFO_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(CFO_var)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(neg_earn)]), lm(neg_earn~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(neg_earn)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(SALES_var)]), lm(SALES_var~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(SALES_var)]), cluster1="fyear", cluster2="gvkey")
	), title="Firm qualities on frequencies", column.labels=(c("size", "op-cycle" , "intang-intensity", "cap-intensity", "CFO-var", "losses" , "Sales-var", "acc-error")))

	##INCLUDED--earnings qualities regressed on frequencies sorted by frequency size
	stargazer(list(
	coeftest.cluster(data.frame(FUNDA[is.finite(relevance)]), lm(relevance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(relevance)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(smoothness)]), lm(smoothness~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(smoothness)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(persistance)]), lm(persistance~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(persistance)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(predictibility)]), lm(predictibility~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(predictibility)]), cluster1="fyear", cluster2="gvkey"),
	coeftest.cluster(data.frame(FUNDA[is.finite(acc_quality)]), lm(acc_quality~dom_freq_small+dom_freq_med+dom_freq_big,FUNDA[is.finite(acc_quality)]), cluster1="fyear", cluster2="gvkey")
	), title="Earnings qualities on frequencies", column.labels=(c("VR" , "smooth" , "pers", "pred", "acc quality")))