Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
distance covariance and correlation
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 15 14:00:29 2012
Author: Josef Perktold
License: MIT, BSD-3 (for statsmodels)
http://en.wikipedia.org/wiki/Distance_correlation
Yaroslav and Satrajit on sklearn mailing list
Univariate only, distance measure is just absolute distance
Note: Same as R package energy DCOR, except DCOR reports sqrt of all returns of dcov_all
"""
import numpy as np
def dist(x, y):
#1d only
return np.abs(x[:, None] - y)
def d_n(x):
d = dist(x, x)
dn = d - d.mean(0) - d.mean(1)[:,None] + d.mean()
return dn
def dcov_all(x, y):
dnx = d_n(x)
dny = d_n(y)
denom = np.product(dnx.shape)
dc = (dnx * dny).sum() / denom
dvx = (dnx**2).sum() / denom
dvy = (dny**2).sum() / denom
dr = dc / (np.sqrt(dvx) * np.sqrt(dvy))
return dc, dr, dvx, dvy
import matplotlib.pyplot as plt
fig = plt.figure()
for case in range(1,5):
np.random.seed(9854673)
x = np.linspace(-1,1, 501)
if case == 1:
y = - x**2 + 0.2 * np.random.rand(len(x))
elif case == 2:
y = np.cos(x*2*np.pi) + 0.1 * np.random.rand(len(x))
elif case == 3:
x = np.sin(x*2*np.pi) + 0.0 * np.random.rand(len(x)) #circle
elif case == 4:
x = np.sin(x*1.5*np.pi) + 0.1 * np.random.rand(len(x)) #bretzel
dc, dr, dvx, dvy = dcov_all(x, y)
print dc, dr, dvx, dvy
ax = fig.add_subplot(2,2, case)
#ax.set_xlim(-1, 1)
ax.plot(x, y, '.')
yl = ax.get_ylim()
ax.text(-0.95, yl[0] + 0.9 * np.diff(yl), 'dr=%4.2f' % dr)
plt.show()
@nickhsmith

This comment has been minimized.

Show comment Hide comment
@nickhsmith

nickhsmith Oct 15, 2014

What python version are you running? And what exactly would the inputs be? I have 2 lists of floats (x,y) x =[5.1,5.3,5.2 ...] and y = [7.0,6.8,7.2 ...] should I use those as the input?

I'm using 2.7 and getting an error on the distance method. x[:, None] gives me the error TypeError: list indices must be integers, not tuple

Thanks

What python version are you running? And what exactly would the inputs be? I have 2 lists of floats (x,y) x =[5.1,5.3,5.2 ...] and y = [7.0,6.8,7.2 ...] should I use those as the input?

I'm using 2.7 and getting an error on the distance method. x[:, None] gives me the error TypeError: list indices must be integers, not tuple

Thanks

@tranberg

This comment has been minimized.

Show comment Hide comment
@tranberg

tranberg Feb 23, 2015

@nickhsmith: Format your inputs as arrays like so:
x = np.array([5.1, 5.3, 5.2, ...])
y = np.array([7.0, 6.8, 7.2, ...])

@nickhsmith: Format your inputs as arrays like so:
x = np.array([5.1, 5.3, 5.2, ...])
y = np.array([7.0, 6.8, 7.2, ...])

@macoj

This comment has been minimized.

Show comment Hide comment
@macoj

macoj Dec 12, 2016

Hi, where you have in line 41

dr = dc / (np.sqrt(dvx) * np.sqrt(dvy))

I believe it should be

dr = np.sqrt(dc) / np.sqrt(np.sqrt(dvx) * np.sqrt(dvy))

macoj commented Dec 12, 2016

Hi, where you have in line 41

dr = dc / (np.sqrt(dvx) * np.sqrt(dvy))

I believe it should be

dr = np.sqrt(dc) / np.sqrt(np.sqrt(dvx) * np.sqrt(dvy))
@GraniteConsultingReviews

This comment has been minimized.

Show comment Hide comment
@GraniteConsultingReviews

GraniteConsultingReviews Sep 4, 2017

This code is hard to understand

This code is hard to understand

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment