GaelVaroquaux/mutual_info.py

## mutual_info.py
'''
Non-parametric computation of entropy and mutual-information

Adapted by G Varoquaux for code created by R Brette, itself
from several papers (see in the code).

This code is maintained at https://github.com/mutualinfo/mutual_info
Please download the latest code there, to have improvements and
bug fixes.

These computations rely on nearest-neighbor statistics
'''
import numpy as np

from scipy.special import gamma,psi
from scipy import ndimage
from scipy.linalg import det
from numpy import pi

from sklearn.neighbors import NearestNeighbors

__all__=['entropy', 'mutual_information', 'entropy_gaussian']

EPS = np.finfo(float).eps


def nearest_distances(X, k=1):
    '''
    X = array(N,M)
    N = number of points
    M = number of dimensions

    returns the distance to the kth nearest neighbor for every point in X
    '''
    knn = NearestNeighbors(n_neighbors=k + 1)
    knn.fit(X)
    d, _ = knn.kneighbors(X) # the first nearest neighbor is itself
    return d[:, -1] # returns the distance to the kth nearest neighbor


def entropy_gaussian(C):
    '''
    Entropy of a gaussian variable with covariance matrix C
    '''
    if np.isscalar(C): # C is the variance
        return .5*(1 + np.log(2*pi)) + .5*np.log(C)
    else:
        n = C.shape[0] # dimension
        return .5*n*(1 + np.log(2*pi)) + .5*np.log(abs(det(C)))


def entropy(X, k=1):
    ''' Returns the entropy of the X.

    Parameters
    ===========

    X : array-like, shape (n_samples, n_features)
        The data the entropy of which is computed

    k : int, optional
        number of nearest neighbors for density estimation

    Notes
    ======

    Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
    of a random vector. Probl. Inf. Transm. 23, 95-101.
    See also: Evans, D. 2008 A computationally efficient estimator for
    mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
    and:
    Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
    information. Phys Rev E 69(6 Pt 2):066138.
    '''

    # Distance to kth nearest neighbor
    r = nearest_distances(X, k) # squared distances
    n, d = X.shape
    volume_unit_ball = (pi**(.5*d)) / gamma(.5*d + 1)
    '''
    F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
    for Continuous Random Variables. Advances in Neural Information
    Processing Systems 21 (NIPS). Vancouver (Canada), December.

    return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
    '''
    return (d*np.mean(np.log(r + np.finfo(X.dtype).eps))
            + np.log(volume_unit_ball) + psi(n) - psi(k))


def mutual_information(variables, k=1):
    '''
    Returns the mutual information between any number of variables.
    Each variable is a matrix X = array(n_samples, n_features)
    where
      n = number of samples
      dx,dy = number of dimensions

    Optionally, the following keyword argument can be specified:
      k = number of nearest neighbors for density estimation

    Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5)
    '''
    if len(variables) < 2:
        raise AttributeError(
                "Mutual information must involve at least 2 variables")
    all_vars = np.hstack(variables)
    return (sum([entropy(X, k=k) for X in variables])
            - entropy(all_vars, k=k))


def mutual_information_2d(x, y, sigma=1, normalized=False):
    """
    Computes (normalized) mutual information between two 1D variate from a
    joint histogram.

    Parameters
    ----------
    x : 1D array
        first variable

    y : 1D array
        second variable

    sigma: float
        sigma for Gaussian smoothing of the joint histogram

    Returns
    -------
    nmi: float
        the computed similariy measure

    """
    bins = (256, 256)

    jh = np.histogram2d(x, y, bins=bins)[0]

    # smooth the jh with a gaussian filter of given sigma
    ndimage.gaussian_filter(jh, sigma=sigma, mode='constant',
                                 output=jh)

    # compute marginal histograms
    jh = jh + EPS
    sh = np.sum(jh)
    jh = jh / sh
    s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0]))
    s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1))

    # Normalised Mutual Information of:
    # Studholme,  jhill & jhawkes (1998).
    # "A normalized entropy measure of 3-D medical image alignment".
    # in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143.
    if normalized:
        mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2)))
                / np.sum(jh * np.log(jh))) - 1
    else:
        mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1))
               - np.sum(s2 * np.log(s2)))

    return mi


###############################################################################
# Tests

def test_entropy():
    # Testing against correlated Gaussian variables
    # (analytical results are known)
    # Entropy of a 3-dimensional gaussian variable
    rng = np.random.RandomState(0)
    n = 50000
    d = 3
    P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]])
    C = np.dot(P, P.T)
    Y = rng.randn(d, n)
    X = np.dot(P, Y)
    H_th = entropy_gaussian(C)
    H_est = entropy(X.T, k=5)
    # Our estimated entropy should always be less that the actual one
    # (entropy estimation undershoots) but not too much
    np.testing.assert_array_less(H_est, H_th)
    np.testing.assert_array_less(.9*H_th, H_est)


def test_mutual_information():
    # Mutual information between two correlated gaussian variables
    # Entropy of a 2-dimensional gaussian variable
    n = 50000
    rng = np.random.RandomState(0)
    #P = np.random.randn(2, 2)
    P = np.array([[1, 0], [0.5, 1]])
    C = np.dot(P, P.T)
    U = rng.randn(2, n)
    Z = np.dot(P, U).T
    X = Z[:, 0]
    X = X.reshape(len(X), 1)
    Y = Z[:, 1]
    Y = Y.reshape(len(Y), 1)
    # in bits
    MI_est = mutual_information((X, Y), k=5)
    MI_th = (entropy_gaussian(C[0, 0])
             + entropy_gaussian(C[1, 1])
             - entropy_gaussian(C)
            )
    # Our estimator should undershoot once again: it will undershoot more
    # for the 2D estimation that for the 1D estimation
    print((MI_est, MI_th))
    np.testing.assert_array_less(MI_est, MI_th)
    np.testing.assert_array_less(MI_th, MI_est  + .3)


def test_degenerate():
    # Test that our estimators are well-behaved with regards to
    # degenerate solutions
    rng = np.random.RandomState(0)
    x = rng.randn(50000)
    X = np.c_[x, x]
    assert np.isfinite(entropy(X))
    assert np.isfinite(mutual_information((x[:, np.newaxis],
                                           x[:,  np.newaxis])))
    assert 2.9 < mutual_information_2d(x, x) < 3.1


def test_mutual_information_2d():
    # Mutual information between two correlated gaussian variables
    # Entropy of a 2-dimensional gaussian variable
    n = 50000
    rng = np.random.RandomState(0)
    #P = np.random.randn(2, 2)
    P = np.array([[1, 0], [.9, .1]])
    C = np.dot(P, P.T)
    U = rng.randn(2, n)
    Z = np.dot(P, U).T
    X = Z[:, 0]
    X = X.reshape(len(X), 1)
    Y = Z[:, 1]
    Y = Y.reshape(len(Y), 1)
    # in bits
    MI_est = mutual_information_2d(X.ravel(), Y.ravel())
    MI_th = (entropy_gaussian(C[0, 0])
             + entropy_gaussian(C[1, 1])
             - entropy_gaussian(C)
            )
    print((MI_est, MI_th))
    # Our estimator should undershoot once again: it will undershoot more
    # for the 2D estimation that for the 1D estimation
    np.testing.assert_array_less(MI_est, MI_th)
    np.testing.assert_array_less(MI_th, MI_est  + .2)


if __name__ == '__main__':
    # Run our tests
    test_entropy()
    test_mutual_information()
    test_degenerate()
    test_mutual_information_2d()
	'''
	Non-parametric computation of entropy and mutual-information

	Adapted by G Varoquaux for code created by R Brette, itself
	from several papers (see in the code).

	This code is maintained at https://github.com/mutualinfo/mutual_info
	Please download the latest code there, to have improvements and
	bug fixes.

	These computations rely on nearest-neighbor statistics
	'''
	import numpy as np

	from scipy.special import gamma,psi
	from scipy import ndimage
	from scipy.linalg import det
	from numpy import pi

	from sklearn.neighbors import NearestNeighbors

	__all__=['entropy', 'mutual_information', 'entropy_gaussian']

	EPS = np.finfo(float).eps


	def nearest_distances(X, k=1):
	'''
	X = array(N,M)
	N = number of points
	M = number of dimensions

	returns the distance to the kth nearest neighbor for every point in X
	'''
	knn = NearestNeighbors(n_neighbors=k + 1)
	knn.fit(X)
	d, _ = knn.kneighbors(X) # the first nearest neighbor is itself
	return d[:, -1] # returns the distance to the kth nearest neighbor


	def entropy_gaussian(C):
	'''
	Entropy of a gaussian variable with covariance matrix C
	'''
	if np.isscalar(C): # C is the variance
	return .5(1 + np.log(2pi)) + .5*np.log(C)
	else:
	n = C.shape[0] # dimension
	return .5n(1 + np.log(2pi)) + .5np.log(abs(det(C)))


	def entropy(X, k=1):
	''' Returns the entropy of the X.

	Parameters
	===========

	X : array-like, shape (n_samples, n_features)
	The data the entropy of which is computed

	k : int, optional
	number of nearest neighbors for density estimation

	Notes
	======

	Kozachenko, L. F. & Leonenko, N. N. 1987 Sample estimate of entropy
	of a random vector. Probl. Inf. Transm. 23, 95-101.
	See also: Evans, D. 2008 A computationally efficient estimator for
	mutual information, Proc. R. Soc. A 464 (2093), 1203-1215.
	and:
	Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
	information. Phys Rev E 69(6 Pt 2):066138.
	'''

	# Distance to kth nearest neighbor
	r = nearest_distances(X, k) # squared distances
	n, d = X.shape
	volume_unit_ball = (pi*(.5d)) / gamma(.5*d + 1)
	'''
	F. Perez-Cruz, (2008). Estimation of Information Theoretic Measures
	for Continuous Random Variables. Advances in Neural Information
	Processing Systems 21 (NIPS). Vancouver (Canada), December.

	return d*mean(log(r))+log(volume_unit_ball)+log(n-1)-log(k)
	'''
	return (d*np.mean(np.log(r + np.finfo(X.dtype).eps))
	+ np.log(volume_unit_ball) + psi(n) - psi(k))


	def mutual_information(variables, k=1):
	'''
	Returns the mutual information between any number of variables.
	Each variable is a matrix X = array(n_samples, n_features)
	where
	n = number of samples
	dx,dy = number of dimensions

	Optionally, the following keyword argument can be specified:
	k = number of nearest neighbors for density estimation

	Example: mutual_information((X, Y)), mutual_information((X, Y, Z), k=5)
	'''
	if len(variables) < 2:
	raise AttributeError(
	"Mutual information must involve at least 2 variables")
	all_vars = np.hstack(variables)
	return (sum([entropy(X, k=k) for X in variables])
	- entropy(all_vars, k=k))


	def mutual_information_2d(x, y, sigma=1, normalized=False):
	"""
	Computes (normalized) mutual information between two 1D variate from a
	joint histogram.

	Parameters
	----------
	x : 1D array
	first variable

	y : 1D array
	second variable

	sigma: float
	sigma for Gaussian smoothing of the joint histogram

	Returns
	-------
	nmi: float
	the computed similariy measure

	"""
	bins = (256, 256)

	jh = np.histogram2d(x, y, bins=bins)[0]

	# smooth the jh with a gaussian filter of given sigma
	ndimage.gaussian_filter(jh, sigma=sigma, mode='constant',
	output=jh)

	# compute marginal histograms
	jh = jh + EPS
	sh = np.sum(jh)
	jh = jh / sh
	s1 = np.sum(jh, axis=0).reshape((-1, jh.shape[0]))
	s2 = np.sum(jh, axis=1).reshape((jh.shape[1], -1))

	# Normalised Mutual Information of:
	# Studholme, jhill & jhawkes (1998).
	# "A normalized entropy measure of 3-D medical image alignment".
	# in Proc. Medical Imaging 1998, vol. 3338, San Diego, CA, pp. 132-143.
	if normalized:
	mi = ((np.sum(s1 * np.log(s1)) + np.sum(s2 * np.log(s2)))
	/ np.sum(jh * np.log(jh))) - 1
	else:
	mi = ( np.sum(jh * np.log(jh)) - np.sum(s1 * np.log(s1))
	- np.sum(s2 * np.log(s2)))

	return mi



	###############################################################################
	# Tests

	def test_entropy():
	# Testing against correlated Gaussian variables
	# (analytical results are known)
	# Entropy of a 3-dimensional gaussian variable
	rng = np.random.RandomState(0)
	n = 50000
	d = 3
	P = np.array([[1, 0, 0], [0, 1, .5], [0, 0, 1]])
	C = np.dot(P, P.T)
	Y = rng.randn(d, n)
	X = np.dot(P, Y)
	H_th = entropy_gaussian(C)
	H_est = entropy(X.T, k=5)
	# Our estimated entropy should always be less that the actual one
	# (entropy estimation undershoots) but not too much
	np.testing.assert_array_less(H_est, H_th)
	np.testing.assert_array_less(.9*H_th, H_est)


	def test_mutual_information():
	# Mutual information between two correlated gaussian variables
	# Entropy of a 2-dimensional gaussian variable
	n = 50000
	rng = np.random.RandomState(0)
	#P = np.random.randn(2, 2)
	P = np.array([[1, 0], [0.5, 1]])
	C = np.dot(P, P.T)
	U = rng.randn(2, n)
	Z = np.dot(P, U).T
	X = Z[:, 0]
	X = X.reshape(len(X), 1)
	Y = Z[:, 1]
	Y = Y.reshape(len(Y), 1)
	# in bits
	MI_est = mutual_information((X, Y), k=5)
	MI_th = (entropy_gaussian(C[0, 0])
	+ entropy_gaussian(C[1, 1])
	- entropy_gaussian(C)
	)
	# Our estimator should undershoot once again: it will undershoot more
	# for the 2D estimation that for the 1D estimation
	print((MI_est, MI_th))
	np.testing.assert_array_less(MI_est, MI_th)
	np.testing.assert_array_less(MI_th, MI_est + .3)


	def test_degenerate():
	# Test that our estimators are well-behaved with regards to
	# degenerate solutions
	rng = np.random.RandomState(0)
	x = rng.randn(50000)
	X = np.c_[x, x]
	assert np.isfinite(entropy(X))
	assert np.isfinite(mutual_information((x[:, np.newaxis],
	x[:, np.newaxis])))
	assert 2.9 < mutual_information_2d(x, x) < 3.1


	def test_mutual_information_2d():
	# Mutual information between two correlated gaussian variables
	# Entropy of a 2-dimensional gaussian variable
	n = 50000
	rng = np.random.RandomState(0)
	#P = np.random.randn(2, 2)
	P = np.array([[1, 0], [.9, .1]])
	C = np.dot(P, P.T)
	U = rng.randn(2, n)
	Z = np.dot(P, U).T
	X = Z[:, 0]
	X = X.reshape(len(X), 1)
	Y = Z[:, 1]
	Y = Y.reshape(len(Y), 1)
	# in bits
	MI_est = mutual_information_2d(X.ravel(), Y.ravel())
	MI_th = (entropy_gaussian(C[0, 0])
	+ entropy_gaussian(C[1, 1])
	- entropy_gaussian(C)
	)
	print((MI_est, MI_th))
	# Our estimator should undershoot once again: it will undershoot more
	# for the 2D estimation that for the 1D estimation
	np.testing.assert_array_less(MI_est, MI_th)
	np.testing.assert_array_less(MI_th, MI_est + .2)


	if __name__ == '__main__':
	# Run our tests
	test_entropy()
	test_mutual_information()
	test_degenerate()
	test_mutual_information_2d()