Instantly share code, notes, and snippets.

# satra/distcorr.py

Created Oct 16, 2014
Distance Correlation in Python
 from scipy.spatial.distance import pdist, squareform import numpy as np from numbapro import jit, float32 def distcorr(X, Y): """ Compute the distance correlation function >>> a = [1,2,3,4,5] >>> b = np.array([1,2,9,4,4]) >>> distcorr(a, b) 0.762676242417 """ X = np.atleast_1d(X) Y = np.atleast_1d(Y) if np.prod(X.shape) == len(X): X = X[:, None] if np.prod(Y.shape) == len(Y): Y = Y[:, None] X = np.atleast_2d(X) Y = np.atleast_2d(Y) n = X.shape if Y.shape != X.shape: raise ValueError('Number of samples must match') a = squareform(pdist(X)) b = squareform(pdist(Y)) A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean() B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean() dcov2_xy = (A * B).sum()/float(n * n) dcov2_xx = (A * A).sum()/float(n * n) dcov2_yy = (B * B).sum()/float(n * n) dcor = np.sqrt(dcov2_xy)/np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy)) return dcor

### omadav commented Jan 14, 2015

 This is great. I'm working on computational neuroscience and this is exactly what I was looking for. Thanks.

### wladston commented Jan 29, 2015

 Thanks so much for this :)

### seanlaw commented Apr 7, 2017

 Is this the same as the work by Rizzo?

### drop-out commented Sep 21, 2017

 Thank you, this is so helpful

### tfiers commented Apr 18, 2018 • edited

 @seanlaw, yes, that seems to be the case. (Eq. (2.8) - (2.10) in Székely, Rizzo, and Bakirov, 2007)

### mycarta commented Sep 24, 2019 • edited

 @seanlaw and @tfiers : yes, this is the same distance correlation. Small reproducible example: ``````from sklearn.datasets import load_iris import pandas as pd import dcor # https://dcor.readthedocs.io/en/latest/energycomparison.html iris = load_iris() iris_df = pd.DataFrame(data= np.c_[iris['data'], iris['target']], columns= iris['feature_names'] + ['target']) print ("dcor distance correlation = {:.3f}".format(dcor.distance_correlation(iris_df['sepal length (cm)'], iris_df['petal length (cm)']))) print ("distcorr distance correlation = {:.3f}".format(distcorr(iris_df['sepal length (cm)'], iris_df['petal length (cm)']))) `````` returns: ``````dcor distance correlation = 0.859 distcorr distance correlation = 0.859 ``````

### JSVJ commented Dec 8, 2019

 Can anyone help me how to plot this like a matrix in pandas? Is it possible to have a distance correlation matrix similar to a correlation matrix?