Distance Correlation in Python
from scipy.spatial.distance import pdist, squareform
import numpy as np
from numbapro import jit, float32
def distcorr(X, Y):
""" Compute the distance correlation function
>>> a = [1,2,3,4,5]
>>> b = np.array([1,2,9,4,4])
>>> distcorr(a, b)
X = np.atleast_1d(X)
Y = np.atleast_1d(Y)
if == len(X):
X = X[:, None]
if == len(Y):
Y = Y[:, None]
X = np.atleast_2d(X)
Y = np.atleast_2d(Y)
n = X.shape[0]
if Y.shape[0] != X.shape[0]:
raise ValueError('Number of samples must match')
a = squareform(pdist(X))
b = squareform(pdist(Y))
A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
dcov2_xy = (A * B).sum()/float(n * n)
dcov2_xx = (A * A).sum()/float(n * n)
dcov2_yy = (B * B).sum()/float(n * n)
dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
return dcor
omadav commented Jan 14, 2015

This is great. I'm working on computational neuroscience and this is exactly what I was looking for. Thanks.

wladston commented Jan 29, 2015

Thanks so much for this :)

wladston commented Jan 29, 2015

I added support for p-value estimation:

seanlaw commented Apr 7, 2017

Is this the same as the work by Rizzo?

drop-out commented Sep 21, 2017

Thank you, this is so helpful

tfiers commented Apr 18, 2018

@seanlaw, yes, that seems to be the case.

(Eq. (2.8) - (2.10) in Székely, Rizzo, and Bakirov, 2007)

mycarta commented Sep 24, 2019

@seanlaw and @tfiers :

yes, this is the same distance correlation.

Small reproducible example:

from sklearn.datasets import load_iris
import pandas as pd
import dcor

iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

print ("dcor distance correlation = {:.3f}".format(dcor.distance_correlation(iris_df['sepal length (cm)'], 
                                                                        iris_df['petal length (cm)'])))
print ("distcorr distance correlation = {:.3f}".format(distcorr(iris_df['sepal length (cm)'], iris_df['petal length (cm)'])))


dcor distance correlation = 0.859
distcorr distance correlation = 0.859

JSVJ commented Dec 8, 2019

Can anyone help me how to plot this like a matrix in pandas?

Is it possible to have a distance correlation matrix similar to a correlation matrix?

