Created
April 5, 2017 13:58
-
-
Save Jay-Jay-D/2a1e457c40185763f26e2aec4938c4ed to your computer and use it in GitHub Desktop.
CointegrationTest
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cointegration_test(df_data, start_date=None, end_date=None, verbose=False): | |
"""This method test for correlation and cointegration all the combinations | |
of stocks in the period defined by start_date and end_date. | |
Following the paper "High Frecuency and Dynamic Pairs Trading Based on | |
Statistical Arbitrage Using a Two-Stage Correlation and Cointegration Approach", | |
the method first test if the correlation has a pearson coefficient bigger than 0.9. | |
Then run a simple cointegration test and if the p-values is less than 0.1, then run | |
an Augmented Dickey–Fuller test. | |
Args: | |
df_data (pandas.DataFrame): | |
A DataFrame with the close prices for each stock/security. | |
- Example: | |
index AAPL MSFT ... SBUX | |
2014-01-09 79.18 35.53 ... 38.8 | |
2014-01-10 79.62 35.76 ... 39.02 | |
2014-01-11 78.06 36.41 ... 38.6 | |
2014-01-12 76.53 36.13 ... 38.08 | |
start_date (Optional[datetime.date]): | |
The start date of the period to analyze, if None, then is equal | |
to the df_data first observation date. Defaults to None. | |
end_date (Optional[datetime.date]): | |
The end date of the period to analyze, if None, then is equal | |
to the df_data last observation date. Defaults to None. | |
verbose (bool): | |
If True, print information about the pairs being tested and the test results. | |
Default: False. | |
Returns: | |
pandas.DataFrame: | |
A DataFrame with the cointegrated pairs as index and as columns: | |
- The pearson coefficient of the correlation | |
- The test_value of the ADF Test | |
- The p_value of the ADF Test | |
- The gamma coefficient of the regession. | |
- The sd residuals of the regression. | |
- Example: | |
ADF_test_value gamma p_value pearson sd_residuals | |
Stock_A Stock_B | |
ESV NE -3.336918 1.630184 0.013307 0.976602 0.764803 | |
CNQ -3.464779 1.601749 0.008939 0.958392 0.971339 | |
RDC CNQ -2.833936 1.016649 0.053587 0.931975 0.792285 | |
SGY VLO -4.638204 1.215127 0.000110 0.947521 1.055146 | |
""" | |
dt_start_date = start_date if start_date is not None else df_data.index[0].date() | |
dt_end_date = end_date if end_date is not None else df_data.index[-1].date() | |
cointegrated_pairs = [] | |
pearson_coeff = [] | |
p_value = [] | |
test_value = [] | |
gamma_coeff = [] | |
res_sd = [] | |
counter = 0 | |
st_start_date = dt_start_date.strftime("%Y-%m") | |
st_end_date = dt_end_date.strftime("%Y-%m") | |
ticket_list = df_data.columns.tolist() | |
for stock_pair in combinations(ticket_list, 2): | |
X = df_data.ix[st_start_date:st_end_date, stock_pair[0]] | |
Y = df_data.ix[st_start_date:st_end_date, stock_pair[1]] | |
if verbose: print "Testing pair: ", stock_pair | |
corr_coeff = pearsonr(X, Y)[0] | |
if corr_coeff >= 0.9: | |
if verbose: print "\t=> Pair Correlated" | |
if coint(X, Y)[1] < 0.1: | |
if verbose: print "\t=> Pair Cointegrated" | |
reg = sm.OLS(X, Y).fit() | |
res = reg.resid | |
ADF_test = adfuller(res) | |
if ADF_test[1] < 0.1: | |
if verbose: print "\t=> ADF test passed" | |
cointegrated_pairs.append(stock_pair) | |
p_value.append(ADF_test[1]) | |
test_value.append(ADF_test[0]) | |
gamma_coeff.append(reg.params[0]) | |
pearson_coeff.append(corr_coeff) | |
res_sd.append(res.std()) | |
counter += 1 | |
if verbose: print "From {0} to {1}, {2} cointegrated pairs were found\n".format(dt_start_date.strftime("%Y-%m-%d"), | |
dt_end_date.strftime("%Y-%m-%d"), | |
counter) | |
if counter != 0: | |
index = pd.MultiIndex.from_tuples(cointegrated_pairs, names=['Stock_A', 'Stock_B']) | |
df = pd.DataFrame(data={'pearson':pearson_coeff, 'ADF_test_value':test_value, 'p_value':p_value, | |
'gamma':gamma_coeff, 'sd_residuals':res_sd}, index=index) | |
if verbose: print df, '\n' | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment