Last active
May 1, 2016 13:23
-
-
Save werediver/e5348bf945aeecfc5a2a to your computer and use it in GitHub Desktop.
Demonstration of different approaches to calculating cross-correlation (code written in IPython Notebook).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division, print_function | |
import numpy as np | |
import matplotlib.pyplot as pp | |
%matplotlib inline | |
def xcorr(a, b, mode='same'): | |
a = np.asarray(a) | |
b = np.asarray(b) | |
a = (a - a.mean()) / np.sqrt(np.correlate(a, a)) | |
b = (b - b.mean()) / np.sqrt(np.correlate(b, b)) | |
r = np.correlate(a, b, mode) | |
lag_max = int(len(r) / 2) | |
lags = np.asarray(xrange(-lag_max, lag_max + len(r) % 2)) | |
sl = 2 / np.sqrt(len(r)) | |
return r, lags, sl | |
def xcorr_sf(a, b, lag_max=None): | |
a = np.asarray(a) | |
b = np.asarray(b) | |
assert len(a) == len(b) | |
n = len(a) | |
if lag_max == None: | |
lag_max = n - 1 | |
assert lag_max >= 0 and lag_max < n | |
lags = xrange(-lag_max, lag_max + 1) | |
sl = np.zeros(len(lags)) # Significance level | |
r = np.zeros(len(lags)) | |
for i in xrange(len(lags)): | |
lag = lags[i] | |
sl[i] = 2 / (np.sqrt(n - abs(lag))) | |
a2 = a[:n - lag] if lag >= 0 else a[-lag:] | |
a2 -= a2.mean() | |
b2 = b[lag:] if lag >= 0 else b[:lag] | |
b2 -= b2.mean() | |
r[i] = np.correlate(a2, b2) / np.sqrt(np.correlate(a2, a2) * np.correlate(b2, b2)) | |
return r, lags, sl | |
n=100 | |
x=np.array(range(n)) | |
a=np.random.normal(size=n) | |
b=np.random.normal(size=n) | |
r, lags, sl = xcorr(a, a) | |
pp.plot(lags, r) | |
pp.plot(lags, np.ones(len(lags)) * sl, color='r') | |
pp.plot(lags, np.ones(len(lags)) * -sl, color='r') | |
pp.show() | |
r, lags, sl = xcorr_sf(a, a, lag_max=int(len(a)/2)) | |
pp.plot(lags, r) | |
pp.plot(lags, sl, color='r') | |
pp.plot(lags, -sl, color='r') | |
pp.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi,
I am a researcher from India and was looking for a code to do cross correlation for my research and stumbled upon this code from my google search. I got this code and tested it. It works fine and as I understand the strategy followed in the code is to remove the impact of auto-correlations of the input series. Also I note that the significant level is calculated at each lags separately in xcorr_sf, which is a very good strategy.
I would like to report here a strange behaviour I observe to the input data from a pandas data_frame after call to the xcorr_sf, which I am not able to understand. Can you please help me in understanding this strange behaviour?
Below is the series
in[18] : df.head()
Out[18]:
sst t2m prcp
2002-01-01 12:00:00 0.025684 0.315641 -0.171182
2002-01-02 12:00:00 0.088497 0.306607 -0.326295
2002-01-03 12:00:00 0.152503 0.286796 -0.474805
2002-01-04 12:00:00 0.216098 0.256310 -0.604994
2002-01-05 12:00:00 0.277633 0.215637 -0.705857
then I issue command to calculate cross correlation as below
in [19]: cc,lags,sl=xcorr_sf(df.sst,df.prcp)
Then again testing the input series
In [20]: df.head()
Out[20]:
sst t2m prcp
2002-01-01 12:00:00 0.000000 0.315641 0.369804
2002-01-02 12:00:00 0.030503 0.306607 0.051056
2002-01-03 12:00:00 0.060888 0.286796 -0.259537
2002-01-04 12:00:00 0.089914 0.256310 -0.553517
2002-01-05 12:00:00 0.116331 0.215637 -0.817661
To my surprise the order of data in the input data got changed
for all 3 variables
May I know the reason for this strange behaviour?