-
-
Save shurain/09421ae79ce81e67060a to your computer and use it in GitHub Desktop.
Non-parametric computation of entropy and mutual-information
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Non-parametric computation of entropy and mutual-information | |
Adapted by G Varoquaux for code created by R Brette, itself | |
from several papers (see in the code). | |
These computations rely on nearest-neighbor statistics | |
''' | |
import numpy as np | |
from scipy.special import gamma,psi | |
from scipy.linalg import det | |
from numpy import pi | |
from sklearn.neighbors import NearestNeighbors | |
__all__=['entropy', 'mutual_information', 'entropy_gaussian'] | |
def nearest_distances(X, k=1): | |
''' | |
X = array(N,M) | |
N = number of points | |
M = number of dimensions | |
returns the distance to the kth nearest neighbor for every point in X | |
''' | |
knn = NearestNeighbors(n_neighbors=k) | |
knn.fit(X) | |
d, _ = knn.kneighbors(X) # the first nearest neighbor is itself | |
return d[:, -1] # returns the distance to the kth nearest neighbor | |
def entropy_gaussian(C): | |
''' | |
Entropy of a gaussian variable with covariance matrix C | |
''' | |
if np.isscalar(C): # C is the variance | |
return .5*(1 + np.log(2*pi)) + .5*np.log(C) | |
else: | |
n = C.shape[0] # dimension | |
return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C))) | |
def entropy(X, k=1): | |
''' Returns the entropy of the X. | |
Parameters | |
=========== | |
X : array-like, shape (n_samples, n_features) | |
The data the entropy of which is computed | |
k : int, optional | |
number of nearest neighbors for density estimation | |
Notes | |
====== | |
Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy | |
of a random vector. Probl. Inf. Transm. 23, 95-101. | |
See also: Evans, D. 2008 A computationally efficient estimator for | |
mutual information, Proc. R. Soc. A 464 (2093), 1203-1215. | |
and: | |
Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual | |
information. Phys Rev E 69(6 Pt 2):066138. | |
''' | |
# Distance to kth nearest neighbor | |
r = nearest_distances(X, k) # squared distances | |
n, d = X.shape | |
volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1) | |
''' | |
F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures | |
for Continuous Random Variables. Advances in Neural Information | |
Processing Systems 21 (NIPS). Vancouver (Canada), December. | |
return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k) | |
''' | |
return (d*np.mean(np.log(r + np.finfo(X.dtype).eps)) | |
+ np.log(volume_unit_ball) + psi(n) - psi(k)) | |
def mutual_information(variables, k=1): | |
''' | |
Returns the mutual information between any number of variables. | |
Each variable is a matrix X = array(n_samples, n_features) | |
where | |
n = number of samples | |
dx,dy = number of dimensions | |
Optionally, the following keyword argument can be specified: | |
k = number of nearest neighbors for density estimation | |
Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5) | |
''' | |
if len(variables) < 2: | |
raise AttributeError( | |
"Mutual information must involve at least 2 variables") | |
all_vars = np.hstack(variables) | |
return (sum([entropy(X, k=k) for X in variables]) | |
- entropy(all_vars, k=k)) | |
def test_entropy(): | |
# Testing against correlated Gaussian variables | |
# (analytical results are known) | |
# Entropy of a 3-dimensional gaussian variable | |
rng = np.random.RandomState(0) | |
n = 50000 | |
d = 3 | |
P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]]) | |
C = np.dot(P, P.T) | |
Y = rng.randn(d, n) | |
X = np.dot(P, Y) | |
H_th = entropy_gaussian(C) | |
H_est = entropy(X.T, k=5) | |
# Our estimated entropy should always be less that the actual one | |
# (entropy estimation undershoots) but not too much | |
np.testing.assert_array_less(H_est, H_th) | |
np.testing.assert_array_less(.9*H_th, H_est) | |
def test_mutual_information(): | |
# Mutual information between two correlated gaussian variables | |
# Entropy of a 2-dimensional gaussian variable | |
n = 50000 | |
rng = np.random.RandomState(0) | |
#P = np.random.randn(2, 2) | |
P = np.array([[1, 0], [0.5, 1]]) | |
C = np.dot(P, P.T) | |
U = rng.randn(2, n) | |
Z = np.dot(P, U).T | |
X = Z[:, 0] | |
X = X.reshape(len(X), 1) | |
Y = Z[:, 1] | |
Y = Y.reshape(len(Y), 1) | |
# in bits | |
MI_est = mutual_information((X, Y), k=5) | |
MI_th = (entropy_gaussian(C[0, 0]) | |
+ entropy_gaussian(C[1, 1]) | |
- entropy_gaussian(C) | |
) | |
# Our estimator should undershoot once again: it will undershoot more | |
# for the 2D estimation that for the 1D estimation | |
print MI_est, MI_th | |
np.testing.assert_array_less(MI_est, MI_th) | |
np.testing.assert_array_less(MI_th, MI_est + .3) | |
def test_degenerate(): | |
# Test that our estimators are well-behaved with regards to | |
# degenerate solutions | |
rng = np.random.RandomState(0) | |
x = rng.randn(50000) | |
X = np.c_[x, x] | |
assert np.isfinite(entropy(X)) | |
assert np.isfinite(mutual_information((x[:, np.newaxis], | |
x[:, np.newaxis]))) | |
if __name__ == '__main__': | |
# Run our tests | |
test_entropy() | |
test_mutual_information() | |
test_degenerate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment