Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save astraetech/e5e9b1fbc6aeb5ba47f7bcee194fb627 to your computer and use it in GitHub Desktop.
Save astraetech/e5e9b1fbc6aeb5ba47f7bcee194fb627 to your computer and use it in GitHub Desktop.
Compute 5 measures of firm-year discretionary accruals.
/* Use Jackknife method to compute discretionary accruals */
/* see https://mingze-gao.com/posts/compute-jackknife-coefficient-estimates-in-sas/ */
/* UseHribarCollinsTotalAccruals:
- true: use Hribar-Collins Cashflow Total Accruals
- false: use normal method */
%let UseHribarCollinsTotalAccruals = false;
/* Include %array and %do_over */
filename do_over url "https://mingze-gao.com/utils/do_over.sas";
filename array url "https://mingze-gao.com/utils/array.sas";
%include do_over array;
/* Winsorize macro */
filename winsor url "https://mingze-gao.com/utils/winsor.sas";
%include winsor;
/*
Earnings management models
Author: Mingze (Adrian) Gao, Feb 2019
Modified based on the work by Joost Impink, March 2016
Models estimated (Note that the intercept a0 is removed in the modified code below):
- Jones model, tac = a0 + a1 1/TAt-1 + a2chSales + a3PPE + a4ROA + error.
- variable names DA_Jones
- Modified Jones model, as Jones model, but using chSales - chREC to compute fitted values.
- variable names DA_mJones
- Kothari 2005, controlling for ROA, tac = a0 + a1 1/TAt-1 + a2(chSales - chREC) + a3PPE + a4ROA + error.
- variable names DA_Kothari
- Kothari 2005, performance matched, Jones model, difference in discretionary accruals between firm and closest firm in terms of (contemporaneous) roa
- variable names DA_pmKothari_Jones
- Kothari 2005, performance matched, modified Jones model, difference in discretionary accruals between firm and closest firm in terms of (contemporaneous) roa
- variable names DA_pmKothari_mJones
tac: Total accruals, computed as net profit after tax before extraordinary items less cash flows from operations
1/TAt-1: Inverse of beginning of year total assets
chSales: Change in net sales revenue
chREC: Change in net receivables
PPE: Gross property, plant, and equipment
ROA: Return on assets.
Variables used Compustat Funda
AT: Total assets
IB: Income Before Extraordinary Items
IBC: Income Before Extraordinary Items (Cash Flow) (used if IB is missing)
OANCF: Operating Activities - Net Cash Flow
PPEGT: Property, Plant and Equipment - Total (Gross)
RECT: Receivables - Total
SALE: Sales
INVT: Inventories - Total
LCO: Current Liabilities Other Total
DP: Depreciation and Amortization
ACO: Current Assets Other Total
AP: Accounts Payable - Trade
*/
/* Get Funda variables */
%let fundaVars = at ib ibc oancf ppegt rect sale xidoc lco dp aco invt ap;
data work.a_funda(keep=key gvkey fyear datadate sich &fundaVars);
set comp.funda;
if 1980 <= fyear <= 2018;
/* Generic filter */
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C';
/* Firm-year identifier */
key = gvkey || fyear;
/* Keep if sale > 0, at > 0 */
if sale > 0 and at > 0;
/* Use Income Before Extraordinary Items (Cash Flow) if ib is missing */
if ib =. then ib=ibc;
run;
/* Lagged values for: at sale rect invt aco ap lco */
%let lagVars = at sale rect invt aco ap lco;
/* Self join to get lagged values at_l, sale_l, rect_l */
proc sql;
create table work.b_funda as select a.*, %do_over(values=&lagVars, between=comma, phrase=b.? as ?_l)
from work.a_funda a, work.a_funda b
where a.gvkey = b.gvkey and a.fyear-1 = b.fyear;
quit;
/* Construct additional variables */
data work.b_funda(compress=yes);
set work.b_funda;
/* 2-digit SIC */
sic2 = int(sich/100);
/* variables */
if "&UseHribarCollinsTotalAccruals." eq "false" then
tac = ((rect-rect_l)+(invt-invt_l)+(aco-aco_l)-(ap-ap_l)-(lco-lco_l)-dp)/at_l; /* Accruals ratio */
else
tac = (ibc - oancf + xidoc)/at_l; /* Hribar Collins total cash flow accruals */
inv_at_l = 1 / at_l;
drev = (sale - sale_l) / at_l;
drevadj = (sale - sale_l)/at_l - (rect - rect_l)/at_l;
ppe = ppegt / at_l;
roa = ib / at_l;
/* these variables may not be missing (cmiss counts missing variables)*/
*if cmiss (of tac inv_at_l drevadj ppe roa) eq 0;
run;
/* Optional winsorization before industry-year regression */
%let winsVars = tac inv_at_l drev drevadj ppe roa ;
%winsor(dsetin=work.b_funda, dsetout=work.b_funda_wins, byvar=fyear, vars=&winsVars, type=winsor, pctl=1 99);
/* Regression by industry-year
edf(error degrees of freedom) + #params will equal the number of obs (no need for proc univariate to count) */
proc sort data=work.b_funda_wins; by fyear sic2; run;
/* regressors */
%array(vars, values=inv_at_l drev ppe drevadj roa);
ods listing close;
proc reg data=work.b_funda_wins edf outest=work.c_parms;
by fyear sic2;
id key;
/* Jones Model */
Jones: model tac = inv_at_l drev ppe / noint influence i;
/* Kothari with ROA in model */
Kothari: model tac = inv_at_l drevadj ppe roa / noint influence i;
ods output OutputStatistics=work.outstats InvXPX=work.xpxinv;
run;
ods listing;
/* Compute discretionary accrual measures */
proc sql;
/* Compute firm-year Jackknifed coefficient estimates */
create table work.xpxinv2 as
/* Extract the diagnol elements of the symmetric inv(X'X) for each firm-year */
select fyear, sic2, model,
%do_over(vars, phrase=sum(case when variable="?" then xpxinv else . end) as ?, between=comma)
from (select fyear, sic2, model, variable,
case %do_over(vars, phrase=when variable="?" then ?) else . end as xpxinv
from work.xpxinv where variable ~= 'tac')
group by fyear, sic2, model
order by fyear, sic2, model;
/* The difference between original coefficient estimates and the Jackknifed estimates */
create table work.bias as
select a.fyear, a.sic2, a.model, a.key,
%do_over(vars, phrase=a.DFB_?*(a.Residual/(a.RStudent*sqrt(1-a.HatDiagonal)))*sqrt(b.?) as bias_?, between=comma)
from work.outstats as a left join work.xpxinv2 as b
on a.fyear=b.fyear and a.sic2=b.sic2 and a.model=b.model
order by a.fyear, a.sic2, a.model, a.key;
/* Compute Jackknifed coefficient estimates by subtracting the bias from the original estimates */
create table work.Jackknifed_params as
select a.fyear, a.sic2, a.model, a.key, %do_over(vars, phrase=b.? - a.bias_? as ?, between=comma), b._EDF_
from work.bias as a left join work.c_parms as b
on a.fyear=b.fyear and a.sic2=b.sic2 and a.model=b._MODEL_
order by a.fyear, a.sic2, a.model, a.key;
/* Compute discretionary accruals */
create table work.tmp as
select distinct a.fyear, a.sic2, a.gvkey, a.key,
/* Jones model at a minimum 8 obs (5 degrees of freedom + 3 params) */
sum(case when b.model eq 'Jones' and b._EDF_ ge 5 then
a.tac - (%do_over(values=inv_at_l drev ppe, between=%str(+), phrase=a.? * b.?)) else . end) as DA_Jones,
/* Modified Jones model: drev is used in first model, but drevadj is used to compute fitted value */
sum(case when b.model eq 'Jones' and b._EDF_ ge 5 then
a.tac - (a.drevadj * b.drev + %do_over(values=inv_at_l ppe, between=%str(+), phrase=a.? * b.?)) else . end) as DA_mJones,
/* Kothari model (with ROA in regression) at a minimum 8 obs (4 degrees of freedom + 4 params) */
sum(case when b.model eq 'Kothari' and b._EDF_ ge 4 then
a.tac - (%do_over(values=inv_at_l drevadj ppe roa, between=%str(+), phrase=a.? * b.?)) else . end) as DA_Kothari
from work.b_funda_wins as a left join work.Jackknifed_params as b
on a.key=b.key
group by a.key
order by a.gvkey, a.fyear;
/* Kothari performance matching: get DA_Jones (DA_mJones) accruals for the matched firm closest in ROA */
create table work.da_roa as select a.*, b.roa from work.tmp as a left join work.b_funda_wins as b on a.key=b.key;
create table work.da_all as
select a.*,
/* gvkey of matched firm */
b.gvkey as gvkey_m,
/* difference in ROA */
abs(a.roa - b.roa) as Difference,
/* difference in DA_Jones */
a.DA_Jones - b.DA_Jones as DA_pmKothari_Jones,
a.DA_mJones - b.DA_mJones as DA_pmKothari_mJones
from work.da_roa as a left join work.da_roa as b
on a.fyear = b.fyear and a.sic2 = b.sic2 /* same 2-digit SIC industry-year */
and a.key ne b.key /* not the same firm */
group by a.gvkey, a.fyear
having Difference = min(Difference) /* keep best match for size difference */
order by gvkey, fyear;
quit;
/* drop possible multiple matches (with the same difference) in previous step */
proc sort data=work.da_all nodupkey; by key; run;
%let DAVars = DA_Jones DA_mJones DA_Kothari DA_pmKothari_Jones DA_pmKothari_mJones;
/* Winsorize discretionary accrual variables (Optional) */
%winsor(dsetin=work.da_all, dsetout=work.accruals_HribarCollins_&UseHribarCollinsTotalAccruals., byvar=fyear, vars=&DAVars, type=winsor, pctl=1 99);
/* Means, medians for key variables */
proc means data=work.accruals_HribarCollins_&UseHribarCollinsTotalAccruals. n mean min median max; var &DAVars; run;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment