Skip to content

Instantly share code, notes, and snippets.

@WeiHao97
Forked from wladston/distcorr.py
Created June 26, 2019 01:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save WeiHao97/b3f43beef2f450581f6dc131d9254d4c to your computer and use it in GitHub Desktop.
Save WeiHao97/b3f43beef2f450581f6dc131d9254d4c to your computer and use it in GitHub Desktop.
Distance correlation with p-value
from scipy.spatial.distance import pdist, squareform
import numpy as np
import copy
def distcorr(Xval, Yval, pval=True, nruns=500):
""" Compute the distance correlation function, returning the p-value.
Based on Satra/distcorr.py (gist aa3d19a12b74e9ab7941)
>>> a = [1,2,3,4,5]
>>> b = np.array([1,2,9,4,4])
>>> distcorr(a, b)
(0.76267624241686671, 0.404)
"""
X = np.atleast_1d(Xval)
Y = np.atleast_1d(Yval)
if np.prod(X.shape) == len(X):
X = X[:, None]
if np.prod(Y.shape) == len(Y):
Y = Y[:, None]
X = np.atleast_2d(X)
Y = np.atleast_2d(Y)
n = X.shape[0]
if Y.shape[0] != X.shape[0]:
raise ValueError('Number of samples must match')
a = squareform(pdist(X))
b = squareform(pdist(Y))
A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()
dcov2_xy = (A * B).sum() / float(n * n)
dcov2_xx = (A * A).sum() / float(n * n)
dcov2_yy = (B * B).sum() / float(n * n)
dcor = np.sqrt(dcov2_xy) / np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))
if pval:
greater = 0
for i in range(nruns):
Y_r = copy.copy(Yval)
np.random.shuffle(Y_r)
if distcorr(Xval, Y_r, pval=False) >= dcor:
greater += 1
return (dcor, greater / float(nruns))
else:
return dcor
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment