wladston/distcorr.py

## distcorr.py
from scipy.spatial.distance import pdist, squareform
import numpy as np
import copy


def distcorr(Xval, Yval, pval=True, nruns=500):
    """ Compute the distance correlation function, returning the p-value.
    Based on Satra/distcorr.py (gist aa3d19a12b74e9ab7941)

    >>> a = [1,2,3,4,5]
    >>> b = np.array([1,2,9,4,4])
    >>> distcorr(a, b)
    (0.76267624241686671, 0.266)
    """
    X = np.atleast_1d(Xval)
    Y = np.atleast_1d(Yval)
    if np.prod(X.shape) == len(X):
        X = X[:, None]
    if np.prod(Y.shape) == len(Y):
        Y = Y[:, None]
    X = np.atleast_2d(X)
    Y = np.atleast_2d(Y)
    n = X.shape[0]
    if Y.shape[0] != X.shape[0]:
        raise ValueError('Number of samples must match')
    a = squareform(pdist(X))
    b = squareform(pdist(Y))
    A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
    B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()

    dcov2_xy = (A * B).sum() / float(n * n)
    dcov2_xx = (A * A).sum() / float(n * n)
    dcov2_yy = (B * B).sum() / float(n * n)
    dcor = np.sqrt(dcov2_xy) / np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))

    if pval:
        greater = 0
        for i in range(nruns):
            Y_r = copy.copy(Yval)
            np.random.shuffle(Y_r)
            if distcorr(Xval, Y_r, pval=False) > dcor:
                greater += 1
        return (dcor, greater / float(nruns))
    else:
        return dcor
	from scipy.spatial.distance import pdist, squareform
	import numpy as np
	import copy


	def distcorr(Xval, Yval, pval=True, nruns=500):
	""" Compute the distance correlation function, returning the p-value.
	Based on Satra/distcorr.py (gist aa3d19a12b74e9ab7941)

	>>> a = [1,2,3,4,5]
	>>> b = np.array([1,2,9,4,4])
	>>> distcorr(a, b)
	(0.76267624241686671, 0.266)
	"""
	X = np.atleast_1d(Xval)
	Y = np.atleast_1d(Yval)
	if np.prod(X.shape) == len(X):
	X = X[:, None]
	if np.prod(Y.shape) == len(Y):
	Y = Y[:, None]
	X = np.atleast_2d(X)
	Y = np.atleast_2d(Y)
	n = X.shape[0]
	if Y.shape[0] != X.shape[0]:
	raise ValueError('Number of samples must match')
	a = squareform(pdist(X))
	b = squareform(pdist(Y))
	A = a - a.mean(axis=0)[None, :] - a.mean(axis=1)[:, None] + a.mean()
	B = b - b.mean(axis=0)[None, :] - b.mean(axis=1)[:, None] + b.mean()

	dcov2_xy = (A * B).sum() / float(n * n)
	dcov2_xx = (A * A).sum() / float(n * n)
	dcov2_yy = (B * B).sum() / float(n * n)
	dcor = np.sqrt(dcov2_xy) / np.sqrt(np.sqrt(dcov2_xx) * np.sqrt(dcov2_yy))

	if pval:
	greater = 0
	for i in range(nruns):
	Y_r = copy.copy(Yval)
	np.random.shuffle(Y_r)
	if distcorr(Xval, Y_r, pval=False) > dcor:
	greater += 1
	return (dcor, greater / float(nruns))
	else:
	return dcor