-
-
Save GaelVaroquaux/ead9898bd3c973c40429 to your computer and use it in GitHub Desktop.
''' | |
Non-parametric computation of entropy and mutual-information | |
Adapted by G Varoquaux for code created by R Brette, itself | |
from several papers (see in the code). | |
This code is maintained at https://github.com/mutualinfo/mutual_info | |
Please download the latest code there, to have improvements and | |
bug fixes. | |
These computations rely on nearest-neighbor statistics | |
''' | |
import numpy as np | |
from scipy.special import gamma,psi | |
from scipy import ndimage | |
from scipy.linalg import det | |
from numpy import pi | |
from sklearn.neighbors import NearestNeighbors | |
__all__=['entropy', 'mutual_information', 'entropy_gaussian'] | |
EPS = np.finfo(float).eps | |
def nearest_distances(X, k=1): | |
''' | |
X = array(N,M) | |
N = number of points | |
M = number of dimensions | |
returns the distance to the kth nearest neighbor for every point in X | |
''' | |
knn = NearestNeighbors(n_neighbors=k + 1) | |
knn.fit(X) | |
d, _ = knn.kneighbors(X) # the first nearest neighbor is itself | |
return d[:, -1] # returns the distance to the kth nearest neighbor | |
def entropy_gaussian(C): | |
''' | |
Entropy of a gaussian variable with covariance matrix C | |
''' | |
if np.isscalar(C): # C is the variance | |
return .5*(1 + np.log(2*pi)) + .5*np.log(C) | |
else: | |
n = C.shape[0] # dimension | |
return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C))) | |
def entropy(X, k=1): | |
''' Returns the entropy of the X. | |
Parameters | |
=========== | |
X : array-like, shape (n_samples, n_features) | |
The data the entropy of which is computed | |
k : int, optional | |
number of nearest neighbors for density estimation | |
Notes | |
====== | |
Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy | |
of a random vector. Probl. Inf. Transm. 23, 95-101. | |
See also: Evans, D. 2008 A computationally efficient estimator for | |
mutual information, Proc. R. Soc. A 464 (2093), 1203-1215. | |
and: | |
Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual | |
information. Phys Rev E 69(6 Pt 2):066138. | |
''' | |
# Distance to kth nearest neighbor | |
r = nearest_distances(X, k) # squared distances | |
n, d = X.shape | |
volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1) | |
''' | |
F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures | |
for Continuous Random Variables. Advances in Neural Information | |
Processing Systems 21 (NIPS). Vancouver (Canada), December. | |
return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k) | |
''' | |
return (d*np.mean(np.log(r + np.finfo(X.dtype).eps)) | |
+ np.log(volume_unit_ball) + psi(n) - psi(k)) | |
def mutual_information(variables, k=1): | |
''' | |
Returns the mutual information between any number of variables. | |
Each variable is a matrix X = array(n_samples, n_features) | |
where | |
n = number of samples | |
dx,dy = number of dimensions | |
Optionally, the following keyword argument can be specified: | |
k = number of nearest neighbors for density estimation | |
Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5) | |
''' | |
if len(variables) < 2: | |
raise AttributeError( | |
"Mutual information must involve at least 2 variables") | |
all_vars = np.hstack(variables) | |
return (sum([entropy(X, k=k) for X in variables]) | |
- entropy(all_vars, k=k)) | |
def mutual_information_2d(x, y, sigma=1, normalized=False): | |
""" | |
Computes (normalized) mutual information between two 1D variate from a | |
joint histogram. | |
Parameters | |
---------- | |
x : 1D array | |
first variable | |
y : 1D array | |
second variable | |
sigma: float | |
sigma for Gaussian smoothing of the joint histogram | |
Returns | |
------- | |
nmi: float | |
the computed similariy measure | |
""" | |
bins = (256, 256) | |
jh = np.histogram2d(x, y, bins=bins)[0] | |
# smooth the jh with a gaussian filter of given sigma | |
ndimage.gaussian_filter(jh, sigma=sigma, mode='constant', | |
output=jh) | |
# compute marginal histograms | |
jh = jh + EPS | |
sh = np.sum(jh) | |
jh = jh / sh | |
s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0])) | |
s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1)) | |
# Normalised Mutual Information of: | |
# Studholme, jhill & jhawkes (1998). | |
# "A normalized entropy measure of 3-D medical image alignment". | |
# in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143. | |
if normalized: | |
mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2))) | |
/ np.sum(jh * np.log(jh))) - 1 | |
else: | |
mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1)) | |
- np.sum(s2 * np.log(s2))) | |
return mi | |
############################################################################### | |
# Tests | |
def test_entropy(): | |
# Testing against correlated Gaussian variables | |
# (analytical results are known) | |
# Entropy of a 3-dimensional gaussian variable | |
rng = np.random.RandomState(0) | |
n = 50000 | |
d = 3 | |
P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]]) | |
C = np.dot(P, P.T) | |
Y = rng.randn(d, n) | |
X = np.dot(P, Y) | |
H_th = entropy_gaussian(C) | |
H_est = entropy(X.T, k=5) | |
# Our estimated entropy should always be less that the actual one | |
# (entropy estimation undershoots) but not too much | |
np.testing.assert_array_less(H_est, H_th) | |
np.testing.assert_array_less(.9*H_th, H_est) | |
def test_mutual_information(): | |
# Mutual information between two correlated gaussian variables | |
# Entropy of a 2-dimensional gaussian variable | |
n = 50000 | |
rng = np.random.RandomState(0) | |
#P = np.random.randn(2, 2) | |
P = np.array([[1, 0], [0.5, 1]]) | |
C = np.dot(P, P.T) | |
U = rng.randn(2, n) | |
Z = np.dot(P, U).T | |
X = Z[:, 0] | |
X = X.reshape(len(X), 1) | |
Y = Z[:, 1] | |
Y = Y.reshape(len(Y), 1) | |
# in bits | |
MI_est = mutual_information((X, Y), k=5) | |
MI_th = (entropy_gaussian(C[0, 0]) | |
+ entropy_gaussian(C[1, 1]) | |
- entropy_gaussian(C) | |
) | |
# Our estimator should undershoot once again: it will undershoot more | |
# for the 2D estimation that for the 1D estimation | |
print((MI_est, MI_th)) | |
np.testing.assert_array_less(MI_est, MI_th) | |
np.testing.assert_array_less(MI_th, MI_est + .3) | |
def test_degenerate(): | |
# Test that our estimators are well-behaved with regards to | |
# degenerate solutions | |
rng = np.random.RandomState(0) | |
x = rng.randn(50000) | |
X = np.c_[x, x] | |
assert np.isfinite(entropy(X)) | |
assert np.isfinite(mutual_information((x[:, np.newaxis], | |
x[:, np.newaxis]))) | |
assert 2.9 < mutual_information_2d(x, x) < 3.1 | |
def test_mutual_information_2d(): | |
# Mutual information between two correlated gaussian variables | |
# Entropy of a 2-dimensional gaussian variable | |
n = 50000 | |
rng = np.random.RandomState(0) | |
#P = np.random.randn(2, 2) | |
P = np.array([[1, 0], [.9, .1]]) | |
C = np.dot(P, P.T) | |
U = rng.randn(2, n) | |
Z = np.dot(P, U).T | |
X = Z[:, 0] | |
X = X.reshape(len(X), 1) | |
Y = Z[:, 1] | |
Y = Y.reshape(len(Y), 1) | |
# in bits | |
MI_est = mutual_information_2d(X.ravel(), Y.ravel()) | |
MI_th = (entropy_gaussian(C[0, 0]) | |
+ entropy_gaussian(C[1, 1]) | |
- entropy_gaussian(C) | |
) | |
print((MI_est, MI_th)) | |
# Our estimator should undershoot once again: it will undershoot more | |
# for the 2D estimation that for the 1D estimation | |
np.testing.assert_array_less(MI_est, MI_th) | |
np.testing.assert_array_less(MI_th, MI_est + .2) | |
if __name__ == '__main__': | |
# Run our tests | |
test_entropy() | |
test_mutual_information() | |
test_degenerate() | |
test_mutual_information_2d() |
Creating a library for collective maintenance was really the right move. I'll update the gist to point there.
volume_unit_ball = 1
for dim in range(1, d+1):
volume_unit_ball = volume_unit_ball * (pi**(dim/2)/gamma(dim/2 +1)/2**dim)
I've copy pasta'd your change into this branch. https://github.com/mutualinfo/mutual_info/tree/thismartian
There is some test failure but I haven't looked at it yet. I suspect if you still have it in your head it is an easy test change. I'm not ever sure what is correct since it's been a few months since I've used this stuff.
Hi, cottrell. Thanks a lot for this. I figured out what the problem is. The distance used to calculate the entropy should be 2x the distance to the nearest neighbor. Not sure I'm doing it right but I don't seem to have the permission to make changes to the file, perhaps you could try this: in the entropy function: return d * np.mean(np.log(2*r + np.finfo(X.dtype).eps)) + np.log(volume_unit_ball) + psi(n) - psi(k)
Hi, cottrell. Thanks a lot for this. I figured out what the problem is. The distance used to calculate the entropy should be 2x the distance to the nearest neighbor. Not sure I'm doing it right but I don't seem to have the permission to make changes to the file, perhaps you could try this: in the entropy function: return d * np.mean(np.log(2*r + np.finfo(X.dtype).eps)) + np.log(volume_unit_ball) + psi(n) - psi(k)
Hmmm, I can past it in and try but let's try to fix the permissions. Having a look now ...
Ok, I think I've now added @thismartian and @GaelVaroquaux as admins on the org and the repo now. For some reason it only sets you up as members initially when I punched them in. Let me know if you can't access it.
@thismartian The tests now pass after I pasted the change in. It's still just on that branch if you want to review it. If there is an obvious test that would have failed before your change and passed after, maybe we can add that?
@cottrell the changes look fine. I think the entropy test was failing (as expected). Is that what you mean? Sorry it's not clear to me...
@cottrell the changes look fine. I think the entropy test was failing (as expected). Is that what you mean? Sorry it's not clear to me...
No I mean before the change, no tests were failing but you noticed something was wrong. What is a missing test that would have shown something was wrong?
Ah, for me the main indication was just that for some random combinations the mutual info was negative (as you had pointed out earlier too). I think in the test itself, if you change the covariance matrix to unit variance, it shows that the test failed. Honestly, I am not sure I understand how the covariance matrix is generated in the tests (I need to read about it). What I would do is: define a correlation coefficient (r), standard deviation (s), and based on these two it's possible to have a unit variance, zero mean covariance matrix:
`
s = 0.6
r = 0.9
C= np.array([[0.5*s**2, r*0.5*s**2], [r*0.5*s**2, 0.5*s**2]])
mean = [0,0]
x,y = np.random.multivariate_normal(mean, C, size = n).T
X = x.reshape(len(x), 1)
Y = y.reshape(len(y), 1) `
I was probably not clear, let me try to rephrase:
- In the mutual info test written by @GaelVaroquaux, the covariance matrix does not have a unit variance. If you change it to a unit variance matrix, the test fails. The gaussian reference used in the paper is based on a zero mean, unit variance covariance matrix.
- I don't have an expertise in statistics so I don't really understand the approach used in the code to generate the correlated XY pair. I have instead generated the XY pair based on a numpy function (previous comment), which I understand better.
Thanks @thismartian I will take a look a try to jam a test in shortly.
@thismartian Yeah I can't remember where I left it. I think I put some new tests in and have a branch there. I think I was looking at differential entropy in general and how it sort of has a dimensionality issue. The branch should run and we could just merge. Likely I'll dig into it more next time I am looking for entropy of continuous random variables.
friends
lets finish this great task?
Yes, I let cottrell manage the merge after reviewing thismartian branch.
friends
lets finish this great task?
I can merge it in. There isn't a ton of testing or anything because it's just the gist copied over so we can break and fix as we go along. I should probably set up my notifications for that org ... am not noticing these often enough.
friends
lets finish this great task?I can merge it in. There isn't a ton of testing or anything because it's just the gist copied over so we can break and fix as we go along. I should probably set up my notifications for that org ... am not noticing these often enough.
I think my reply got dropped. It is all merged in now. I had some minor refactors and added some tests. I can't remember the details but was trying to make in shift and scale invariant.
Hi everyone, thanks for the shared code, very interesting.
I've a question regarding the estimation of the entropy for gaussian variables. What is the reason for taking the absolute value of the determinant? Is it for numerical reasons?
Thank you in advance,
Hi everyone, thanks for the shared code, very interesting.
I've a question regarding the estimation of the entropy for gaussian variables. What is the reason for taking the absolute value of the determinant? Is it for numerical reasons?
Thank you in advance,
That is in the definition of the pdf basically. Plus there is a log there so you need an abs.
You can also think of it as a scale transformation (change of variables, law of the unconsious statistician) on gaussian with identity covariance.
@thismartion I haven't yet found it in another library where it should probably sit. Have created this and added you and @GaelVaroquaux . You should have admin privileges if I did it right.