Skip to content

Instantly share code, notes, and snippets.

@joosti
Created November 30, 2014 01:09
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save joosti/213050de42d6e78f1634 to your computer and use it in GitHub Desktop.
Save joosti/213050de42d6e78f1634 to your computer and use it in GitHub Desktop.
Compustat Segment files, count industrial segments per firm-year
/* count industrial segments
starting dataset, work.a_start with variables gvkey, fyear, datadate from Funda
*/
data b_segm (keep = GVKEY datadate STYPE SID IAS CAPXS NAICS NAICSH NAICSS1 NAICSS2 NIS OPS SALES SICS1 SICS2 SNMS SOPTP1 INTSEG);
set segments.Wrds_segmerged;
/* prevent duplicates: use the data when first published (not later years)*/
if srcdate eq datadate;
/* select business/operating (or, industrial) segments */
if stype IN ("BUSSEG", "OPSEG");
/* keep segments that have SIC industry code */
if SICS1 ne "";
/* keep segments with positive sales */
if sales > 0;
run;
proc sql;
create table work.c_joined as
select a.*, b.SICS1 as sics
from work.a_start a, work.b_segm
where a.gvkey = b.gvkey and a.datadate = b.datadate;
quit;
/* drop segments with same industry (since we want a count on unique industry codes) */
proc sort data=c_joined nodupkey; by gvkey fyear sics;run;
/* count */
proc sql;
create table work.d_count as
select distinct gvkey, fyear, count(*) as numSegs from work.c_joined
group by gvkey, fyear;
quit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment