Skip to content

Instantly share code, notes, and snippets.

@alexchinco
Created October 21, 2014 17:27
Show Gist options
  • Save alexchinco/5c988014be9340934c86 to your computer and use it in GitHub Desktop.
Save alexchinco/5c988014be9340934c86 to your computer and use it in GitHub Desktop.
Code for No Coincidence, No Story, No Trade (2014)
import time, subprocess
def replaceYear(year1, year2):
inFile = open("prog-2--create-regression-data--template--21oct2014.sas").read()
inFile = inFile.replace("YYYY", year1)
inFile = inFile.replace("ZZZZ", year2)
outFile = open("prog-2--create-regression-data--Y-" + year2 + "--21oct2014.sas", 'w')
outFile.write(inFile)
outFile.close()
for year in range(1978,2013):
year1 = str(year-1)
year2 = str(year)
replaceYear(year1, year2)
subprocess.call("sas prog-2--create-regression-data--Y-" + year2 + "--21oct2014.sas -noterminal", shell=True)
print year2 + "complete!"
print time.ctime()
;/*************************************************************************************
***************************************************************************************
@title: Pull Daily NYSE Data
-----------------------------------------------------------------------------------
@author: ALEX CHINCO
@date: 21-OCT-2013
***************************************************************************************
**************************************************************************************/;
OPTIONS LINESIZE = 208;
OPTIONS PAGESIZE = 208;
LIBNAME comp '/wrds/comp/sasdata/naa';
LIBNAME co '/wrds/comp/sasdata/naa/company';
LIBNAME crsp '/wrds/crsp/sasdata/a_stock';
LIBNAME cc '/wrds/crsp/sasdata/a_ccm';
LIBNAME ff '/wrds/ff/sasdata';
LIBNAME home '/home/uiuc/chinco/trading-on-coincidences/data';
LIBNAME temp '/sastemp3';
%LET startDate = '01JUN1975'd;
%LET endDate = '31DEC2012'd;
;/*************************************************************************************
@sec: Grab firm-level daily stock data for NYSE.
**************************************************************************************/;
* @desc: Grab issue-level identifiers.;
PROC SQL;
CREATE TABLE nyseIssLst AS
SELECT DISTINCT a.permno
FROM crsp.dsenames AS a
WHERE (a.nameendt >= &startDate) AND
(a.exchcd = 1);
QUIT;
* @desc: Grab issue-level common stock data.;
PROC SQL;
CREATE TABLE nyseIssLvlDat AS
SELECT a.date FORMAT=DATE9. AS date,
a.permno FORMAT=8. AS permno,
a.permco FORMAT=8. AS permco,
a.ret FORMAT=BEST8. AS ret,
a.prc FORMAT=DOLLAR12.2 AS prc,
a.shrout*1000 FORMAT=BEST12. AS shares,
a.vol FORMAT=BEST8. AS vlm
FROM crsp.dsf AS a,
nyseIssLst AS b
WHERE (a.permno = b.permno) AND
(&startDate <= a.date) AND
(&endDate >= a.date)
ORDER BY a.permco, a.permno, a.date;
QUIT;
* @desc: Add on issue characteristics such as share code, share class, trading;
* status, and security status.;
PROC SQL;
CREATE TABLE nyseIssLvlDat AS
SELECT a.*,
b.comnam FORMAT=$35. AS name,
b.tsymbol FORMAT=$4. AS ticker,
b.shrcd FORMAT=BEST8. AS shrCode,
b.shrcls FORMAT=$4. AS shrClss,
b.secstat FORMAT=$1. AS SecStat
FROM nyseIssLvlDat AS a,
crsp.dsenames AS b
WHERE (a.permco = b.permco) AND
(a.permno = b.permno) AND
(b.trdstat = "A") AND
(b.namedt <= a.date) AND
(b.nameendt >= a.date)
ORDER BY a.permco, a.permno, a.date;
QUIT;
* @desc: Aggregate daily issue-level data for each firm to creat firm-level data.;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.date FORMAT=DATE9. AS date,
a.permco FORMAT=8. AS permco,
SUM(COALESCE(a.ret,0) * a.shares)/SUM(a.shares) FORMAT=BEST10. AS ret,
SUM(ABS(COALESCE(a.prc,0)) * a.shares) FORMAT=DOLLAR24.2 AS mcap,
SUM(COALESCE(a.vlm,0)) FORMAT=BEST10. AS vlm
FROM nyseIssLvlDat AS a
GROUP BY a.permco, a.date
ORDER BY a.permco, a.date;
QUIT;
;/*************************************************************************************
@sec: Add on firm-level characteristics.
**************************************************************************************/;
* @desc: Add on company name.;
PROC SQL;
CREATE TABLE frmNamDat AS
SELECT DISTINCT a.permco FORMAT=8. AS permco,
BTRIM(a.comnam) FORMAT=$35. AS name,
MIN(a.namedt) FORMAT=DATE9. AS namedt,
MAX(a.nameenddt) FORMAT=DATE9. AS nameenddt
FROM crsp.stocknames AS a
WHERE (a.nameenddt >= &startDate)
GROUP BY a.permco,
BTRIM(a.comnam)
ORDER BY a.permco;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
b.name FORMAT=$35. AS name,
b.namedt FORMAT=DATE9. AS namedt
FROM nyseFrmLvlDat AS a LEFT JOIN
frmNamDat AS b
ON (a.permco = b.permco) AND
(b.namedt <= a.date) AND
(b.nameenddt >= a.date)
ORDER BY a.permco,
a.date,
b.namedt;
QUIT;
DATA nyseFrmLvlDat;
SET nyseFrmLvlDat;
BY permco date;
RETAIN lagPermco lagDate;
IF (first.permco AND first.date) THEN
DO;
lagPermco = permco;
lagDate = date;
dupObs = 0;
END;
ELSE IF ((permco NE lagPermco) OR (date NE lagDate)) THEN
DO;
lagPermco = permco;
lagDate = date;
dupObs = 0;
END;
ELSE
DO;
lagPermco = permco;
lagDate = date;
dupObs = 1;
END;
RUN;
DATA nyseFrmLvlDat;
SET nyseFrmLvlDat;
IF (dupObs = 1) THEN
DO;
DELETE;
END;
DROP lagPermco lagDate dupObs namedt;
RUN;
* @desc: Merge on COMPUSTAT identifier.;
PROC SQL;
CREATE TABLE compIdDat AS
SELECT DISTINCT a.lpermco FORMAT=8. AS permco,
a.gvkey FORMAT=$6. AS gvkey,
a.linkprim FORMAT=$1. AS linkPrim,
a.linktype FORMAT=$2. AS linkType,
a.linkdt FORMAT=DATE9. AS linkdt,
COALESCE(a.linkenddt,'31DEC2012'd) FORMAT=DATE9. AS linkenddt
FROM cc.ccmxpf_linktable AS a
WHERE (a.linktype IN ("LC", "LX", "LU", "LN", "LS")) AND
(a.linkPrim IN ('P', 'C')) AND
(a.usedflag = 1)
ORDER BY a.gvkey, a.linkdt;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
b.gvkey FORMAT=$6. AS gvkey,
b.linkprim FORMAT=$1. AS linkPrim,
b.linktype FORMAT=$2. AS linkType
FROM nyseFrmLvlDat AS a LEFT JOIN
compIdDat AS b
ON (b.permco = a.permco) AND
(b.linkdt <= a.date) AND
(b.linkenddt >= a.date)
ORDER BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE dblObsLst AS
SELECT a.permco,
a.date,
SUM(a.gvkey NE "") AS numGvkeys
FROM nyseFrmLvlDat AS a
GROUP BY a.permco, a.date
ORDER BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
COALESCE(b.numGvkeys, 0) AS numGvkeys
FROM nyseFrmLvlDat AS a LEFT JOIN
dblObsLst AS b
ON (a.permco = b.permco) AND
(a.date = b.date)
ORDER BY a.permco, a.date, a.gvkey;
QUIT;
DATA nyseFrmLvlDat;
SET nyseFrmLvlDat;
BY permco date;
RETAIN lagPermco lagDate;
IF (first.permco AND first.date) THEN
DO;
lagPermco = permco;
lagDate = date;
dupObs = 0;
END;
ELSE IF ((permco NE lagPermco) OR (date NE lagDate)) THEN
DO;
lagPermco = permco;
lagDate = date;
dupObs = 0;
END;
ELSE
DO;
lagPermco = permco;
lagDate = date;
dupObs = 1;
END;
RUN;
DATA nyseFrmLvlDat;
SET nyseFrmLvlDat;
IF (dupObs = 1) THEN
DO;
DELETE;
END;
DROP lagPermco lagDate dupObs linkPrim linkType numGvkeys;
RUN;
* @desc: Add on industry classification and city of corporate HQ.;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
b.city FORMAT=$25. AS city1,
b.state FORMAT=$8. AS state1,
b.addzip FORMAT=$5. AS zip1,
b.gind FORMAT=8. AS gics1,
b.naics FORMAT=$8. AS naics1,
b.sic FORMAT=8. AS sic1,
b.idbflag FORMAT=$1. AS idb1,
b.loc FORMAT=$3. AS loc1
FROM nyseFrmLvlDat AS a LEFT JOIN
cc.comphead AS b
ON (b.gvkey = a.gvkey)
ORDER BY a.permco,
a.gvkey,
a.date;
QUIT;
PROC SQL;
CREATE TABLE compHistDat AS
SELECT a.gvkey,
CASE
WHEN (a.hchgdt = '14APR2007'd) THEN &startDate
ELSE a.hchgdt
END FORMAT=DATE9. AS startDate,
COALESCE(a.hchgenddt,'31DEC2012'd) FORMAT=DATE9. AS endDate,
a.hcity FORMAT=$25. AS city,
a.hstate FORMAT=$8. AS state,
a.haddzip FORMAT=$5. AS zip,
INPUT(a.hgind, 8.) FORMAT=8. AS gics,
a.hnaics FORMAT=$8. AS naics,
a.hsic FORMAT=8. AS sic,
a.hidbflag FORMAT=$1. AS idb,
a.hloc FORMAT=$3. AS loc
FROM cc.comphist AS a
ORDER BY a.gvkey, a.hchgdt, a.hchgenddt;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
b.city FORMAT=$25. AS city2,
b.state FORMAT=$8. AS state2,
b.zip FORMAT=$5. AS zip2,
b.gics FORMAT=8. AS gics2,
b.naics FORMAT=$8. AS naics2,
b.sic FORMAT=8. AS sic2,
b.idb FORMAT=$1. AS idb2,
b.loc FORMAT=$3. AS loc2
FROM nyseFrmLvlDat AS a LEFT JOIN
compHistDat AS b
ON (a.gvkey = b.gvkey) AND
(a.date <= b.endDate) AND
(a.date >= b.startDate)
ORDER BY a.permco,
a.gvkey,
a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
COALESCE(a.city2, a.city1) FORMAT=$25. AS city,
COALESCE(a.state2, a.state1) FORMAT=$8. AS state,
COALESCE(a.zip2, a.zip1) FORMAT=$5. AS zip,
COALESCE(a.gics2, a.gics1) FORMAT=8. AS gics,
COALESCE(a.naics2, a.naics1) FORMAT=$8. AS naics,
COALESCE(a.sic2, a.sic1) FORMAT=8. AS sic,
COALESCE(a.idb2, a.idb1) FORMAT=$1. AS idb,
COALESCE(a.loc2, a.loc1) FORMAT=$3. AS loc
FROM nyseFrmLvlDat AS a
ORDER BY a.permco, a.gvkey, a.date;
QUIT;
DATA nyseFrmLvlDat;
SET nyseFrmLvlDat;
DROP city1 city2 state1 state2 zip1 zip2 gics1 gics2 naics1 naics2 sic1 sic2 idb1 ibd1 loc1 loc2;
RUN;
;/*************************************************************************************
@sec: Save final data.
**************************************************************************************/;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM nyseFrmLvlDat AS a;
QUIT;
PROC SQL;
SELECT COUNT(DISTINCT(a.permco)) AS numNyseFirms
FROM nyseFrmLvlDat AS a;
QUIT;
PROC SQL;
SELECT COUNT(a.permco) AS numNyseObs
FROM nyseFrmLvlDat AS a;
QUIT;
DATA temp.nyseFrmLvlDat_final;
SET nyseFrmLvlDat;
RUN;
;/*************************************************************************************
***************************************************************************************
@title: Create Regression Data
-----------------------------------------------------------------------------------
@author: ALEX CHINCO
@date: 21-OCT-2013
***************************************************************************************
**************************************************************************************/;
OPTIONS LINESIZE = 208;
OPTIONS PAGESIZE = 208;
LIBNAME comp '/wrds/comp/sasdata/naa';
LIBNAME co '/wrds/comp/sasdata/naa/company';
LIBNAME crsp '/wrds/crsp/sasdata/a_stock';
LIBNAME ind '/wrds/crsp/sasdata/a_indexes';
LIBNAME cc '/wrds/crsp/sasdata/a_ccm';
LIBNAME ff '/wrds/ff/sasdata';
LIBNAME home '/home/uiuc/chinco/trading-on-coincidences/data';
LIBNAME temp '/sastemp3';
%INCLUDE '/home/uiuc/chinco/trading-on-coincidences/ROLLING_REG.sas';
%LET startDate = '01JULYYYY'd;
%LET endDate = '31DECZZZZ'd;
;/*************************************************************************************
@sec: Load daily firm-level NYSE data created by Program 1.
**************************************************************************************/;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
YEAR(a.date) FORMAT=4. AS year,
MONTH(a.date) FORMAT=2. AS month
FROM temp.nyseFrmLvlDat_final AS a
WHERE (a.date >= &startDate) AND
(a.date <= &endDate)
ORDER BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
b.rf FORMAT=BEST10. AS rf,
(a.ret - b.rf) FORMAT=BEST10. AS xRet,
b.mktrf FORMAT=BEST10. AS mkt,
b.smb FORMAT=BEST10. AS smb,
b.hml FORMAT=BEST10. AS hml
FROM nyseFrmLvlDat AS a LEFT JOIN
ff.factors_daily AS b
ON (a.date = b.date)
ORDER BY a.permco, a.date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM nyseFrmLvlDat AS a
ORDER BY a.date;
QUIT;
;/*************************************************************************************
@sec: Identify first day of each month.
**************************************************************************************/;
PROC SQL;
CREATE TABLE dateDat AS
SELECT DISTINCT a.date,
a.year,
a.month
FROM nyseFrmLvlDat AS a
ORDER BY a.year, a.month, a.date;
QUIT;
DATA dateDat;
SET dateDat;
BY year month;
IF (first.month) THEN
DO;
ftdotm = 1;
END;
ELSE
DO;
ftdotm = 0;
END;
unit = 1;
RUN;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
b.ftdotm FORMAT=1. AS ftdotm
FROM nyseFrmLvlDat AS a LEFT JOIN
dateDat AS b
ON (a.date = b.date)
ORDER BY a.permco, a.date;
QUIT;
;/*************************************************************************************
@sec: Compute factor loadings over previous 3 months.
**************************************************************************************/;
%ROLLINGREG(
DATA = nyseFrmLvlDat,
OUT_DS = nyseFrmLvlCoefDat,
ID = permco,
DATE = date,
MODEL_EQUATION = xRet = mkt smb hml / NOINT,
START_DATE = 7-1-YYYY,
END_DATE = 12-31-ZZZZ,
FREQ = day,
S = 1,
N = 90
);
PROC SQL;
CREATE TABLE nyseFrmLvlCoefDat AS
SELECT a.permco,
a.date2 FORMAT=DATE9. AS date,
a.mkt FORMAT=BEST8. AS tetMkt,
a.smb FORMAT=BEST8. AS tetSmb,
a.hml FORMAT=BEST8. AS tetHml,
a.regObs FORMAT=2. AS regObs
FROM nyseFrmLvlCoefDat AS a;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM nyseFrmLvlCoefDat AS a
ORDER BY a.permco, a.date;
QUIT;
;/*************************************************************************************
@sec: Compute ranking period returns and average factor loadings.
**************************************************************************************/;
* @desc: Ranking period returns over the previous 3 months.;
PROC SQL;
CREATE TABLE nyseFrmLvlRankDat AS
SELECT DISTINCT a.date,
a.permco,
a.name
FROM nyseFrmLvlDat AS a
WHERE (a.month IN (1,4,7,10)) AND
(a.ftdotm = 1);
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlRankDat AS
SELECT DISTINCT a.*,
(EXP(SUM(LOG(1+b.ret))) - 1) FORMAT=BEST8. as cRetRank
FROM nyseFrmLvlRankDat AS a LEFT JOIN
nyseFrmLvlDat AS b
ON (a.permco = b.permco) AND
(0 < INTCK('month', b.date, a.date) <= 3)
GROUP BY a.permco, a.date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM nyseFrmLvlRankDat AS a
ORDER BY a.permco, a.date;
QUIT;
* @desc: Average factor loadings over the previous 3 months.;
PROC SQL;
CREATE TABLE nyseFrmLvlFcldDat AS
SELECT DISTINCT a.date,
a.permco,
a.name
FROM nyseFrmLvlDat AS a
WHERE (a.month IN (1,4,7,10)) AND
(a.ftdotm = 1);
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlFcldDat AS
SELECT DISTINCT a.*,
MEAN(b.tetMkt) FORMAT=BEST8. as tetMkt,
MEAN(b.tetSmb) FORMAT=BEST8. as tetSmb,
MEAN(b.tetHml) FORMAT=BEST8. as tetHml,
MEAN(b.regObs) FORMAT=BEST8. as regObs
FROM nyseFrmLvlFcldDat AS a LEFT JOIN
nyseFrmLvlCoefDat AS b
ON (a.permco = b.permco) AND
(0 < INTCK('month', b.date, a.date) <= 3)
GROUP BY a.permco, a.date;
QUIT;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM nyseFrmLvlFcldDat AS a
ORDER BY a.permco, a.date;
QUIT;
;/*************************************************************************************
@sec: Compute holding period excess and abnormal returns as well as trading volumes.
**************************************************************************************/;
PROC SQL;
CREATE TABLE nyseFrmLvlDat AS
SELECT a.*,
b.tetMkt,
b.tetSmb,
b.tetHml,
b.regObs
FROM nyseFrmLvlDat AS a LEFT JOIN
nyseFrmLvlFcldDat AS b
ON (a.permco = b.permco) AND
(a.date = b.date)
ORDER BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlHoldDat AS
SELECT DISTINCT a.date,
a.permco,
a.tetMkt,
a.tetSmb,
a.tetHml
FROM nyseFrmLvlDat AS a
WHERE (a.month IN (1,4,7,10)) AND
(a.ftdotm = 1) AND
(a.regObs >= 60);
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlHoldDat AS
SELECT DISTINCT a.date,
a.permco,
(EXP(SUM(LOG(1 + b.xret))) - 1) FORMAT=BEST16. AS cxRetHold,
(EXP(SUM(LOG(1 + b.xret - (a.tetMkt * b.mkt + a.tetSmb * b.smb + a.tetHml * b.hml)))) - 1) FORMAT=BEST16. AS caRetHold,
MEAN(b.vlm) FORMAT=BEST16. AS vlmHold
FROM nyseFrmLvlHoldDat AS a LEFT JOIN
nyseFrmLvlDat AS b
ON (a.permco = b.permco) AND
(INTCK('month', a.date, b.date) = 0)
GROUP BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseFrmLvlLagHoldDat AS
SELECT DISTINCT a.date,
a.permco,
MEAN(b.vlm) FORMAT=BEST16. AS vlmLagHold
FROM nyseFrmLvlHoldDat AS a LEFT JOIN
nyseFrmLvlDat AS b
ON (a.permco = b.permco) AND
(INTCK('month', b.date, a.date) = 1)
GROUP BY a.permco, a.date;
QUIT;
;/*************************************************************************************
@sec: Create regression data.
**************************************************************************************/;
PROC SQL;
CREATE TABLE nyseRegDat AS
SELECT a.date,
a.permco,
a.gvkey,
a.mcap,
a.name,
a.gics,
a.naics,
a.sic,
a.city,
a.state,
a.zip,
a.idb,
a.loc
FROM nyseFrmLvlDat AS a
WHERE (a.year = ZZZZ) AND
(a.month IN (1,4,7,10)) AND
(a.ftdotm = 1)
ORDER BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseRegDat AS
SELECT a.*,
b.cRetRank
FROM nyseRegDat AS a LEFT JOIN
nyseFrmLvlRankDat AS b
ON (a.date = b.date) AND
(a.permco = b.permco)
ORDER BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseRegDat AS
SELECT a.*,
b.cxRetHold,
b.caRetHold,
b.vlmHold
FROM nyseRegDat AS a LEFT JOIN
nyseFrmLvlHoldDat AS b
ON (a.date = b.date) AND
(a.permco = b.permco)
ORDER BY a.permco, a.date;
QUIT;
PROC SQL;
CREATE TABLE nyseRegDat AS
SELECT a.*,
b.vlmLagHold
FROM nyseRegDat AS a LEFT JOIN
nyseFrmLvlLagHoldDat AS b
ON (a.date = b.date) AND
(a.permco = b.permco)
ORDER BY a.permco, a.date;
QUIT;
;/*************************************************************************************
@sec: Export data.
**************************************************************************************/;
PROC SQL OUTOBS = 25;
SELECT a.*
FROM nyseRegDat AS a;
QUIT;
PROC SQL;
SELECT COUNT(DISTINCT(a.permco)) AS numNyseFirms
FROM nyseRegDat AS a;
QUIT;
PROC SQL;
SELECT COUNT(a.permco) AS numNyseObs
FROM nyseRegDat AS a;
QUIT;
PROC SQL;
SELECT a.date,
COUNT(a.permco) AS numNyseObsPerDate
FROM nyseRegDat AS a
GROUP BY a.date;
QUIT;
PROC EXPORT
DATA = nyseRegDat
OUTFILE = "nyse-reg-data--R-qtr--H-mon--ZZZZ.csv"
DBMS = CSV
REPLACE;
RUN;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment