Distance Correlation in Python
from scipy.spatial.distance import pdist, squareform | |
import numpy as np | |
from numbapro import jit, float32 | |
def distcorr(X, Y): | |
""" Compute the distance correlation function | |
>>> a = [1,2,3,4,5] | |
>>> b = np.array([1,2,9,4,4]) | |
>>> distcorr(a, b) | |
0.762676242417 | |
""" | |
X = np.atleast_1d(X) | |
Y = np.atleast_1d(Y) | |
if np.prod(X.shape) == len(X): | |
X = X[:, None] | |
if np.prod(Y.shape) == len(Y): | |
Y = Y[:, None] | |
X = np.atleast_2d(X) | |
Y = np.atleast_2d(Y) | |
n = X.shape[0] | |
if Y.shape[0] != X.shape[0]: | |
raise ValueError('Number of samples must match') | |
a = squareform(pdist(X)) | |
b = squareform(pdist(Y)) | |
A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean() | |
B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean() | |
dcov2_xy = (A * B).sum()/float(n * n) | |
dcov2_xx = (A * A).sum()/float(n * n) | |
dcov2_yy = (B * B).sum()/float(n * n) | |
dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy)) | |
return dcor |
This comment has been minimized.
This comment has been minimized.
Thanks so much for this :) |
This comment has been minimized.
This comment has been minimized.
I added support for p-value estimation: https://gist.github.com/wladston/c931b1495184fbb99bec |
This comment has been minimized.
This comment has been minimized.
Is this the same as the work by Rizzo? |
This comment has been minimized.
This comment has been minimized.
Thank you, this is so helpful |
This comment has been minimized.
This comment has been minimized.
@seanlaw, yes, that seems to be the case. (Eq. (2.8) - (2.10) in Székely, Rizzo, and Bakirov, 2007) |
This comment has been minimized.
This comment has been minimized.
yes, this is the same distance correlation. Small reproducible example:
returns:
|
This comment has been minimized.
This comment has been minimized.
Can anyone help me how to plot this like a matrix in pandas? Is it possible to have a distance correlation matrix similar to a correlation matrix? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
This is great. I'm working on computational neuroscience and this is exactly what I was looking for. Thanks.