diogojc/multivariateGaussian.py

## multivariateGaussian.py
import numpy as np
import matplotlib.pyplot as plt


def params(X):
    """
    Calculates the mean vector and covariance matrix for the given data.

    Arguments
    ---------
    X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
       distribution.

    Returns
    ---------
    (mu, Sigma)
    mu: n sized vector with the means of the n independent variables.
    Sigma: nxn covariance matrix of the n independent variables.
    """
    return (np.mean(X, 0), np.cov(X.T))


def density(X, mu, sigma):
    """
    Calculates the probability of the data according to an estimated
    multivariate gaussian distribution.

    Arguments
    ---------
    X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
       distribution.
    mu: n sized vector with the means of the n independent variables.
    Sigma: nxn covariance matrix of the n independent variables.

    Returns
    ---------
    m sized vector with probabilities of the data.
    """
    assert X.shape[0] > X.shape[1], "Degenerate matrix, m must be greater than n."
    detsigma = np.linalg.det(sigma)
    n = len(mu)
    a = 1 / (np.power(2 * np.pi, n / 2.) * np.sqrt(detsigma))
    invsigma = np.linalg.inv(sigma)
    b = np.array([np.exp(-0.5 * np.dot(np.dot((x - mu), invsigma), (x - mu).T))
                  for x in X])
    return a * b


class Classification(object):
    """
    Model data as a mixture of multivariate gaussian distributions
    """
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mus, self.sigmas = dict(), dict()
        for i in self.classes:
            self.mus[i], self.sigmas[i] = params(X[y == i])

    def predict_proba(self, X):
        return np.array([density(X, self.mus[i], self.sigmas[i])
                         for i in self.classes]).T

    def predict(self, X):
        Y = self.predict_proba(X)
        return self.classes[np.argmax(Y, 1)]


class OutlierDetection(object):

    def fit(self, X):
        self.mu, self.sigma = params(X)

    def predict_proba(self, X):
        return density(X, self.mu, self.sigma)


if __name__ == '__main__':
    # Create synthetic data
    mux = np.array([2, 2])
    sigmax = np.array([[3, 0], [0, 3]])
    X = np.random.multivariate_normal(mux, sigmax, 100)
    yx = 1 * np.ones(X.shape[0])

    muz = np.array([10, 10])
    sigmaz = np.array([[1, .8], [.8, 1]])
    Z = np.random.multivariate_normal(muz, sigmaz, 100)
    yz = 2 * np.ones(Z.shape[0])

    muw = np.array([15, 0])
    sigmaw = np.array([[2, 0], [0, 1]])
    W = np.random.multivariate_normal(muw, sigmaw, 100)
    yw = 3 * np.ones(W.shape[0])

    # Model data
    c = Classification()
    X = np.vstack((X, Z, W))
    y = np.hstack((yx, yz, yw))
    c.fit(X, y)

    # Visualize data and model
    fig = plt.figure()
    ax1 = fig.add_subplot(211)
    ax1.plot(X[:, 0], X[:, 1], 'rx')
    ax1.plot(Z[:, 0], Z[:, 1], 'bx')
    ax1.plot(W[:, 0], W[:, 1], 'gx')
    ax2 = fig.add_subplot(212)
    x, y = np.meshgrid(np.linspace(-5, 20, 100), np.linspace(-5, 15, 100))
    p = c.predict_proba(np.vstack((x.ravel(), y.ravel())).T)
    z = np.reshape(np.max(p, 1), (x.shape[0], x.shape[1]))
    ax2.imshow(z, extent=[np.min(x), np.max(x), np.min(y), np.max(y)], origin='lower')

    plt.show()
	import numpy as np
	import matplotlib.pyplot as plt


	def params(X):
	"""
	Calculates the mean vector and covariance matrix for the given data.

	Arguments
	---------
	X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
	distribution.

	Returns
	---------
	(mu, Sigma)
	mu: n sized vector with the means of the n independent variables.
	Sigma: nxn covariance matrix of the n independent variables.
	"""
	return (np.mean(X, 0), np.cov(X.T))


	def density(X, mu, sigma):
	"""
	Calculates the probability of the data according to an estimated
	multivariate gaussian distribution.

	Arguments
	---------
	X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
	distribution.
	mu: n sized vector with the means of the n independent variables.
	Sigma: nxn covariance matrix of the n independent variables.

	Returns
	---------
	m sized vector with probabilities of the data.
	"""
	assert X.shape[0] > X.shape[1], "Degenerate matrix, m must be greater than n."
	detsigma = np.linalg.det(sigma)
	n = len(mu)
	a = 1 / (np.power(2 * np.pi, n / 2.) * np.sqrt(detsigma))
	invsigma = np.linalg.inv(sigma)
	b = np.array([np.exp(-0.5 * np.dot(np.dot((x - mu), invsigma), (x - mu).T))
	for x in X])
	return a * b


	class Classification(object):
	"""
	Model data as a mixture of multivariate gaussian distributions
	"""
	def fit(self, X, y):
	self.classes = np.unique(y)
	self.mus, self.sigmas = dict(), dict()
	for i in self.classes:
	self.mus[i], self.sigmas[i] = params(X[y == i])

	def predict_proba(self, X):
	return np.array([density(X, self.mus[i], self.sigmas[i])
	for i in self.classes]).T

	def predict(self, X):
	Y = self.predict_proba(X)
	return self.classes[np.argmax(Y, 1)]


	class OutlierDetection(object):

	def fit(self, X):
	self.mu, self.sigma = params(X)

	def predict_proba(self, X):
	return density(X, self.mu, self.sigma)


	if __name__ == '__main__':
	# Create synthetic data
	mux = np.array([2, 2])
	sigmax = np.array([[3, 0], [0, 3]])
	X = np.random.multivariate_normal(mux, sigmax, 100)
	yx = 1 * np.ones(X.shape[0])

	muz = np.array([10, 10])
	sigmaz = np.array([[1, .8], [.8, 1]])
	Z = np.random.multivariate_normal(muz, sigmaz, 100)
	yz = 2 * np.ones(Z.shape[0])

	muw = np.array([15, 0])
	sigmaw = np.array([[2, 0], [0, 1]])
	W = np.random.multivariate_normal(muw, sigmaw, 100)
	yw = 3 * np.ones(W.shape[0])

	# Model data
	c = Classification()
	X = np.vstack((X, Z, W))
	y = np.hstack((yx, yz, yw))
	c.fit(X, y)

	# Visualize data and model
	fig = plt.figure()
	ax1 = fig.add_subplot(211)
	ax1.plot(X[:, 0], X[:, 1], 'rx')
	ax1.plot(Z[:, 0], Z[:, 1], 'bx')
	ax1.plot(W[:, 0], W[:, 1], 'gx')
	ax2 = fig.add_subplot(212)
	x, y = np.meshgrid(np.linspace(-5, 20, 100), np.linspace(-5, 15, 100))
	p = c.predict_proba(np.vstack((x.ravel(), y.ravel())).T)
	z = np.reshape(np.max(p, 1), (x.shape[0], x.shape[1]))
	ax2.imshow(z, extent=[np.min(x), np.max(x), np.min(y), np.max(y)], origin='lower')

	plt.show()