Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
Distance Correlation in Python
from scipy.spatial.distance import pdist, squareform
import numpy as np
from numbapro import jit, float32
def distcorr(X, Y):
""" Compute the distance correlation function
>>> a = [1,2,3,4,5]
>>> b = np.array([1,2,9,4,4])
>>> distcorr(a, b)
X = np.atleast_1d(X)
Y = np.atleast_1d(Y)
if == len(X):
X = X[:, None]
if == len(Y):
Y = Y[:, None]
X = np.atleast_2d(X)
Y = np.atleast_2d(Y)
n = X.shape[0]
if Y.shape[0] != X.shape[0]:
raise ValueError('Number of samples must match')
a = squareform(pdist(X))
b = squareform(pdist(Y))
A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
dcov2_xy = (A * B).sum()/float(n * n)
dcov2_xx = (A * A).sum()/float(n * n)
dcov2_yy = (B * B).sum()/float(n * n)
dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
return dcor
Copy link

omadav commented Jan 14, 2015

This is great. I'm working on computational neuroscience and this is exactly what I was looking for. Thanks.

Copy link

wladston commented Jan 29, 2015

Thanks so much for this :)

Copy link

wladston commented Jan 29, 2015

I added support for p-value estimation:

Copy link

seanlaw commented Apr 7, 2017

Is this the same as the work by Rizzo?

Copy link

drop-out commented Sep 21, 2017

Thank you, this is so helpful

Copy link

tfiers commented Apr 18, 2018

@seanlaw, yes, that seems to be the case.

(Eq. (2.8) - (2.10) in Székely, Rizzo, and Bakirov, 2007)

Copy link

mycarta commented Sep 24, 2019

@seanlaw and @tfiers :

yes, this is the same distance correlation.

Small reproducible example:

from sklearn.datasets import load_iris
import pandas as pd
import dcor

iris = load_iris()
iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

print ("dcor distance correlation = {:.3f}".format(dcor.distance_correlation(iris_df['sepal length (cm)'], 
                                                                        iris_df['petal length (cm)'])))
print ("distcorr distance correlation = {:.3f}".format(distcorr(iris_df['sepal length (cm)'], iris_df['petal length (cm)'])))


dcor distance correlation = 0.859
distcorr distance correlation = 0.859

Copy link

JSVJ commented Dec 8, 2019

Can anyone help me how to plot this like a matrix in pandas?

Is it possible to have a distance correlation matrix similar to a correlation matrix?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment