Last active
September 7, 2023 23:35
-
-
Save JoostImpink/0e5a8ae738cc8ef14baf to your computer and use it in GitHub Desktop.
WRDS - create dataset with Compustat, CRSP and IBES identifiers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Macro that creates a dataset with Compustat, CRSP and IBES identifiers (gvkey, permno, Ibes ticker) | |
for fiscal years in period start-end. | |
macro parameters: | |
@dsout: name of dataset to create | |
@start: start year | |
@end: end year | |
@compvars: list of variables to get from compustat, default value: at sale ceq ni | |
@minscore: ibes iclink minimum score (0 [default] is best score, 6 worst, see iclink.sas) | |
sample usage: %CCI(dsout=work.a_cci, start=2000, end=2014); | |
Invoke the macro from a filed that is saved in the same directory (this is needed for SAS to figure out | |
the relative path; change relative path if needs to be stored in another directory). | |
The created dataset will include the Compustat records with missing permno and missing Ibes ticker. | |
*/ | |
%macro CCI (dsout=, start=, end=, compvars=at sale ceq ni, minscore=0); | |
/* sign on wrds */ | |
%let wrds = wrds.wharton.upenn.edu 4016;options comamid = TCP remote=WRDS; | |
signon username=_prompt_; | |
/* determine file path (for relative paths) */ | |
%let path= %sysfunc(tranwrd(%sysget(SAS_EXECFILEPATH),%sysget(SAS_EXECFILENAME),)); | |
/* syslput pushes macro variables to the remote connection */ | |
%syslput dsout = &dsout; | |
%syslput start = &start; | |
%syslput end = &end; | |
%syslput compvars = &compvars; | |
%syslput minscore = &minscore; | |
%syslput path = &path; | |
/* remote submit start */ | |
rsubmit; | |
/* upload iclink.sas (file iclink needs to be in same directory as this file) */ | |
proc upload infile="&path.iclink.sas" outfile='~/iclink.sas'; run; | |
/* execute iclink.sas (creates home.iclink) */ | |
%include '~/iclink.sas'; | |
/* Firms in Compustat in fiscal year range*/ | |
data a_comp (keep = gvkey fyear datadate &compvars ); | |
set comp.funda; | |
/* require fyear to be within start-end range */ | |
if &start <=fyear <= &end; | |
/* prevent double records */ | |
if indfmt='INDL' and datafmt='STD' and popsrc='D' and consol='C' ; | |
run; | |
/* Match with CCM linktable to get permno */ | |
proc sql; | |
create table b_permno as | |
select a.*, b.lpermno as permno | |
from a_comp a left join crsp.ccmxpf_linktable b | |
on a.gvkey eq b.gvkey | |
and b.lpermno ne . | |
and b.linktype in ("LC" "LN" "LU" "LX" "LD" "LS") | |
and b.linkprim IN ("C", "P") | |
and ((a.datadate >= b.LINKDT) or b.LINKDT eq .B) and | |
((a.datadate <= b.LINKENDDT) or b.LINKENDDT eq .E) ; | |
quit; | |
/* Match with home.iclink to get Ibes ticker */ | |
proc sql; | |
create table c_ibes as | |
select a.*, b.ticker as ibesticker | |
from b_permno a left join home.iclink b | |
on a.permno = b.permno and b.score <= &minscore; | |
quit; | |
/* download */ | |
proc download data=c_ibes out=&dsout;run; | |
/* remote submit end */ | |
endrsubmit; | |
%mend; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*******************************************************************************************/ | |
/* FileName: iclink.sas */ | |
/* Date: Sept 25, 2006 */ | |
/* Author: Rabih Moussawi */ | |
/* Description: Create IBES - CRSP Link Table */ | |
/* FUNCTION: - Creates a link table between IBES TICKER and CRSP PERMNO */ | |
/* - Scores links from 0 (best link) to 6 */ | |
/* */ | |
/* INPUT: */ | |
/* - IBES: IDUSM file */ | |
/* - CRSP: STOCKNAMES file */ | |
/* */ | |
/* OUTPUT: ICLINK set stored in home directory */ | |
/* ICLINK has 15,187 unique IBES TICKER - CRSP PERMNO links */ | |
/* ICLINK contains IBES TICKER and the matching CRSP PERMNO and other fields: */ | |
/* - IBES and CRSP Company names */ | |
/* - SCORE variable: lower scores are better and high scores may need further */ | |
/* checking before using them to link CRSP & IBES data. */ | |
/* In computing the score, a CUSIP match is considered better than a */ | |
/* TICKER match. The score also includes a penalty for differences in */ | |
/* company names-- CNAME in IBES and COMNAM in CRSP. The name penalty is */ | |
/* based upon SPEDIS, which is the spelling distance function in SAS. */ | |
/* SPEDIS(cname,comnam)=0 is a perfect score and SPEDIS < 30 is usually good */ | |
/* enough to be considered a name match. */ | |
/* */ | |
/* "SCORE" levels: */ | |
/* - 0: BEST match: using (cusip, cusip dates and company names) */ | |
/* or (exchange ticker, company names and 6-digit cusip) */ | |
/* - 1: Cusips and cusip dates match but company names do not match */ | |
/* - 2: Cusips and company names match but cusip dates do not match */ | |
/* - 3: Cusips match but cusip dates and company names do not match */ | |
/* - 4: Exch tickers and 6-digit cusips match but company names do not match */ | |
/* - 5: Exch tickers and company names match but 6-digit cusips do not match */ | |
/* - 6: Exch tickers match but company names and 6-digit cusips do not match */ | |
/* */ | |
/* ICLINK Example: */ | |
/* TICKER CNAME PERMNO COMNAM SCORE */ | |
/* BAC BANKAMERICA CORPORATION 58827 BANKAMERICA CORP 0 */ | |
/* DELL DELL INC 11081 DELL INC 0 */ | |
/* FFS 1ST FED BCP DEL 75161 FIRST FEDERAL BANCORP DE 3 */ | |
/* IBM INTERNATIONAL BUSINESS MACHINES 12490 INTERNATIONAL BUSINESS MACHS CO 0 */ | |
/* MSFT MICROSOFT CORP 10107 MICROSOFT CORP 0 */ | |
/* */ | |
/*******************************************************************************************/ | |
* Possible IBES ID (names) file to use (as of April 2006); | |
* Detail History: ID file : 23808 unique US and Canadian company IBES TICKERs; | |
* Summary History: IDSUM File: 15576 unique US company IBES TICKERs; | |
* Recommendation Summary Statistics: RECDSUM File 12465 unique US company IBES tickers; | |
* It seems that the Summary History Identifier file IDSUM is best | |
because USFIRM dummy is used to designate only US companies; | |
%let IBES1= IBES.IDSUM; | |
%let CRSP1= CRSP.STOCKNAMES; | |
libname home '~'; * Save link table in home directory; | |
/* Step 1: Link by CUSIP */ | |
/* IBES: Get the list of IBES TICKERS for US firms in IBES */ | |
proc sort data=&IBES1 out=IBES1 (keep=ticker cusip CNAME sdates); | |
where USFIRM=1 and not(missing(cusip)); | |
by ticker cusip sdates; | |
run; | |
/* Create first and last 'start dates' for CUSIP link */ | |
proc sql; | |
create table IBES2 | |
as select *, min(sdates) as fdate, max(sdates) as ldate | |
from IBES1 | |
group by ticker, cusip | |
order by ticker, cusip, sdates; | |
quit; | |
/* Label date range variables and keep only most recent company name for CUSIP link */ | |
data IBES2; | |
set IBES2; | |
by ticker cusip; | |
if last.cusip; | |
label fdate="First Start date of CUSIP record"; | |
label ldate="Last Start date of CUSIP record"; | |
format fdate ldate date9.; | |
drop sdates; | |
run; | |
/* CRSP: Get all PERMNO-NCUSIP combinations */ | |
proc sort data=&CRSP1 out=CRSP1 (keep=PERMNO NCUSIP comnam namedt nameenddt); | |
where not missing(NCUSIP); | |
by PERMNO NCUSIP namedt; | |
run; | |
/* Arrange effective dates for CUSIP link */ | |
proc sql; | |
create table CRSP2 | |
as select PERMNO,NCUSIP,comnam,min(namedt)as namedt,max(nameenddt) as nameenddt | |
from CRSP1 | |
group by PERMNO, NCUSIP | |
order by PERMNO, NCUSIP, NAMEDT; | |
quit; | |
/* Label date range variables and keep only most recent company name */ | |
data CRSP2; | |
set CRSP2; | |
by permno ncusip; | |
if last.ncusip; | |
label namedt="Start date of CUSIP record"; | |
label nameenddt="End date of CUSIP record"; | |
format namedt nameenddt date9.; | |
run; | |
/* Create CUSIP Link Table */ | |
/* CUSIP date ranges are only used in scoring as CUSIPs are not reused for | |
different companies overtime */ | |
proc sql; | |
create table LINK1_1 | |
as select * | |
from IBES2 as a, CRSP2 as b | |
where a.CUSIP = b.NCUSIP | |
order by TICKER, PERMNO, ldate; | |
quit; * 14,591 IBES TICKERs matched to CRSP PERMNOs; | |
/* Score links using CUSIP date range and company name spelling distance */ | |
/* Idea: date ranges the same cusip was used in CRSP and IBES should intersect */ | |
data LINK1_2; | |
set LINK1_1; | |
by TICKER PERMNO; | |
if last.permno; * Keep link with most recent company name; | |
name_dist = min(spedis(cname,comnam),spedis(comnam,cname)); | |
if (not ((ldate<namedt) or="" (fdate="">nameenddt))) and name_dist < 30 then SCORE = 0; | |
else if (not ((ldate<namedt) or="" (fdate="">nameenddt))) then score = 1; | |
else if name_dist < 30 then SCORE = 2; | |
else SCORE = 3; | |
keep TICKER PERMNO cname comnam score; | |
run; | |
/* Step 2: Find links for the remaining unmatched cases using Exchange Ticker */ | |
/* Identify remaining unmatched cases */ | |
proc sql; | |
create table NOMATCH1 | |
as select distinct a.* | |
from IBES1 (keep=ticker) as a | |
where a.ticker NOT in (select ticker from LINK1_2) | |
order by a.ticker; | |
quit; * 990 IBES TICKERs not matched with CRSP PERMNOs using CUSIP; | |
/* Add IBES identifying information */ | |
proc sql; | |
create table NOMATCH2 | |
as select b.ticker, b.CNAME, b.OFTIC, b.sdates, b.cusip | |
from NOMATCH1 as a, &IBES1 as b | |
where a.ticker = b.ticker and not (missing(b.OFTIC)) | |
order by ticker, oftic, sdates; | |
quit; * 4,157 observations; | |
/* Create first and last 'start dates' for Exchange Tickers */ | |
proc sql; | |
create table NOMATCH3 | |
as select *, min(sdates) as fdate, max(sdates) as ldate | |
from NOMATCH2 | |
group by ticker, oftic | |
order by ticker, oftic, sdates; | |
quit; | |
/* Label date range variables and keep only most recent company name */ | |
data NOMATCH3; | |
set NOMATCH3; | |
by ticker oftic; | |
if last.oftic; | |
label fdate="First Start date of OFTIC record"; | |
label ldate="Last Start date of OFTIC record"; | |
format fdate ldate date9.; | |
drop sdates; | |
run; | |
/* Get entire list of CRSP stocks with Exchange Ticker information */ | |
proc sort data=&CRSP1 out=CRSP1 (keep=ticker comnam permno ncusip namedt nameenddt); | |
where not missing(ticker); | |
by permno ticker namedt; | |
run; | |
/* Arrange effective dates for link by Exchange Ticker */ | |
proc sql; | |
create table CRSP2 | |
as select permno,comnam,ticker as crsp_ticker,ncusip, | |
min(namedt)as namedt,max(nameenddt) as nameenddt | |
from CRSP1 | |
group by permno, ticker | |
order by permno, crsp_ticker, namedt; | |
quit; * CRSP exchange ticker renamed to crsp_ticker to avoid confusion with IBES TICKER; | |
/* Label date range variables and keep only most recent company name */ | |
data CRSP2; | |
set CRSP2; | |
if last.crsp_ticker; | |
by permno crsp_ticker; | |
label namedt="Start date of exch. ticker record"; | |
label nameenddt="End date of exch. ticker record"; | |
format namedt nameenddt date9.; | |
run; | |
/* Merge remaining unmatched cases using Exchange Ticker */ | |
/* Note: Use ticker date ranges as exchange tickers are reused overtime */ | |
proc sql; | |
create table LINK2_1 | |
as select a.ticker,a.oftic, b.permno, a.cname, b.comnam, a.cusip, b.ncusip, a.ldate | |
from NOMATCH3 as a, CRSP2 as b | |
where a.oftic = b.crsp_ticker and | |
(ldate>=namedt) and (fdate<=nameenddt) | |
order by ticker, oftic, ldate; | |
quit; * 146 new match of 136 IBES TICKERs; | |
/* Score using company name using 6-digit CUSIP and company name spelling distance */ | |
data LINK2_2; | |
set LINK2_1; | |
name_dist = min(spedis(cname,comnam),spedis(comnam,cname)); | |
if substr(cusip,1,6)=substr(ncusip,1,6) and name_dist < 30 then SCORE=0; | |
else if substr(cusip,1,6)=substr(ncusip,1,6) then score = 4; | |
else if name_dist < 30 then SCORE = 5; | |
else SCORE = 6; | |
run; | |
/* Some companies may have more than one TICKER-PERMNO link, */ | |
/* so re-sort and keep the case (PERMNO & Company name from CRSP) */ | |
/* that gives the lowest score for each IBES TICKER (first.ticker=1) */ | |
proc sort data=LINK2_2; by ticker score; run; | |
data LINK2_3; | |
set LINK2_2; | |
by ticker score; | |
if first.ticker; | |
keep ticker permno cname comnam permno score; | |
run; | |
/* Step 3: Add Exchange Ticker links to CUSIP links */ | |
/* Create final link table and save it in home directory */ | |
data home.ICLINK; | |
set LINK1_2 LINK2_3; | |
run; | |
proc sort data=home.ICLINK; by TICKER PERMNO; run; | |
/* Create Labels for ICLINK dataset and variables */ | |
proc datasets lib=home nolist; | |
modify ICLINK (label="IBES-CRSP Link Table"); | |
label CNAME = "Company Name in IBES"; | |
label COMNAM= "Company Name in CRSP"; | |
label SCORE= "Link Score: 0(best) - 6"; | |
run; | |
quit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment