{{ message }}

Instantly share code, notes, and snippets.

# josef-pkt/try_distance_corr.py

Created Jun 15, 2012
distance covariance and correlation
 # -*- coding: utf-8 -*- """ Created on Fri Jun 15 14:00:29 2012 Author: Josef Perktold License: MIT, BSD-3 (for statsmodels) http://en.wikipedia.org/wiki/Distance_correlation Yaroslav and Satrajit on sklearn mailing list Univariate only, distance measure is just absolute distance Note: Same as R package energy DCOR, except DCOR reports sqrt of all returns of dcov_all """ import numpy as np def dist(x, y): #1d only return np.abs(x[:, None] - y) def d_n(x): d = dist(x, x) dn = d - d.mean(0) - d.mean(1)[:,None] + d.mean() return dn def dcov_all(x, y): dnx = d_n(x) dny = d_n(y) denom = np.product(dnx.shape) dc = (dnx * dny).sum() / denom dvx = (dnx**2).sum() / denom dvy = (dny**2).sum() / denom dr = dc / (np.sqrt(dvx) * np.sqrt(dvy)) return dc, dr, dvx, dvy import matplotlib.pyplot as plt fig = plt.figure() for case in range(1,5): np.random.seed(9854673) x = np.linspace(-1,1, 501) if case == 1: y = - x**2 + 0.2 * np.random.rand(len(x)) elif case == 2: y = np.cos(x*2*np.pi) + 0.1 * np.random.rand(len(x)) elif case == 3: x = np.sin(x*2*np.pi) + 0.0 * np.random.rand(len(x)) #circle elif case == 4: x = np.sin(x*1.5*np.pi) + 0.1 * np.random.rand(len(x)) #bretzel dc, dr, dvx, dvy = dcov_all(x, y) print dc, dr, dvx, dvy ax = fig.add_subplot(2,2, case) #ax.set_xlim(-1, 1) ax.plot(x, y, '.') yl = ax.get_ylim() ax.text(-0.95, yl + 0.9 * np.diff(yl), 'dr=%4.2f' % dr) plt.show()

### nickhsmith commented Oct 15, 2014

 What python version are you running? And what exactly would the inputs be? I have 2 lists of floats (x,y) x =[5.1,5.3,5.2 ...] and y = [7.0,6.8,7.2 ...] should I use those as the input? I'm using 2.7 and getting an error on the distance method. x[:, None] gives me the error TypeError: list indices must be integers, not tuple Thanks

### tranberg commented Feb 23, 2015

 @nickhsmith: Format your inputs as arrays like so: x = np.array([5.1, 5.3, 5.2, ...]) y = np.array([7.0, 6.8, 7.2, ...])

### macoj commented Dec 12, 2016

 Hi, where you have in line 41 `dr = dc / (np.sqrt(dvx) * np.sqrt(dvy))` I believe it should be `dr = np.sqrt(dc) / np.sqrt(np.sqrt(dvx) * np.sqrt(dvy))`

### GraniteConsultingReviews commented Sep 4, 2017

 This code is hard to understand