Skip to content

Instantly share code, notes, and snippets.

@edwinhu
Created September 10, 2018 20:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edwinhu/abdb84baa3e246051f9210a156926ea9 to your computer and use it in GitHub Desktop.
Save edwinhu/abdb84baa3e246051f9210a156926ea9 to your computer and use it in GitHub Desktop.
Get all COMPUSTAT names
/* The goal of this code is to identify all gvkeys in Compustat,
and to estimate approximate start/end dates for these gvkeys */
proc sql;
/* All NA Companies */
create table comp_name_range as
select *
from
/* Company files contains all NA gvkeys */
(select gvkey, ein, cik,
UPCASE(COMPBL(PRXCHANGE('s/[\W_]+/ /',-1,conm))) as conm,
ipodate format yymmdd10.,
dldte format yymmdd10.,
costat,
fic, state
from comp.company) a
left join
/* Note that there is overlap
between names and bank_names
but the dates need not be the same
as the data can come from different sources
(i.e., the INDFMT) */
(select gvkey, cik,
min(year1) as fund_date_min format yymmdd10.,
min(year2) as fund_date_max format yymmdd10.
from
(select gvkey, cik, year1, year2
from comp.names
union all
select gvkey, cik, year1, year2
from comp.bank_names)
group by gvkey) b
on a.gvkey=b.gvkey
left join
/* Some companies don't have fundamentals,
but do have registered securities and hence
they still have Compustat gvkeys
(e.g., private firms)
NA Security History file dates do not appear
to be accurate, hence I am using the dates
from the namesm file */
(select distinct gvkey, cik,
year1 as sec_date_min format yymmdd10.,
year2 as sec_date_max format yymmdd10.
from comp.namesm) c
on a.gvkey = c.gvkey
;
/* All Compustat Global Companies */
create table g_comp_name_range as
select *
from
/* Global Company file is analogous
to NA Company file */
(select gvkey, ein, cik,
UPCASE(COMPBL(PRXCHANGE('s/[\W_]+/ /',-1,conm))) as conm,
ipodate format yymmdd10.,
dldte format yymmdd10.,
costat,
fic, state
from comp.g_company) a
left join
/* Note: there is no Global Bank file */
(select gvkey,
year1 as fund_date_min format yymmdd10.,
year2 as fund_date_max format yymmdd10.
from comp.g_names) b
on a.gvkey=b.gvkey
left join
/* Note: there is no (sec)namesm file
for Compustat Global, but the Security History file
also has dates */
(select distinct gvkey,
min(effdate) as sec_date_min format yymmdd10.,
max(thrudate) as sec_date_max format yymmdd10.
from comp.g_sec_history
group by gvkey) c
on a.gvkey = c.gvkey
;
/* Combine the two, accounting for overlaps */
create table comp_name_range_all as
select *,
/* Get the earliest date for which there is Compustat data
this may also be the IPO Date or Deletion Date */
smallest(1, ipodate, fund_date_min, sec_date_min) as comp_date_min format yymmdd10.,
largest(1, dldte, fund_date_max, sec_date_max) as comp_date_max format yymmdd10.
from
/* Some firms show up in both NA and Global */
(select distinct gvkey, ein, cik,
min(ipodate) as ipodate format yymmdd10.,
max(dldte) as dldte format yymmdd10.,
min(fund_date_min) as fund_date_min format yymmdd10.,
max(fund_date_max) as fund_date_max format yymmdd10.,
min(sec_date_min) as sec_date_min format yymmdd10.,
max(sec_date_max) as sec_date_max format yymmdd10.,
conm, costat,
fic as country, state
from
(select *
from comp_name_range
union all
select *
from g_comp_name_range)
group by gvkey)
order by gvkey
;
quit;
/* There shouldn't be duplicates unless
a firm shows up in both NA and Global,
and the company metadata (e.g., name
or status) is different */
proc sql;
create table _dup as
select * from comp_name_range_all
group by gvkey
having count(*) >= 2
;
quit;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment