Skip to content

Instantly share code, notes, and snippets.

@diogojc
Created January 1, 2012 22:51
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save diogojc/1548571 to your computer and use it in GitHub Desktop.
Save diogojc/1548571 to your computer and use it in GitHub Desktop.
Density estimation using multivariate gaussians
import numpy as np
import matplotlib.pyplot as plt
def params(X):
"""
Calculates the mean vector and covariance matrix for the given data.
Arguments
---------
X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
distribution.
Returns
---------
(mu, Sigma)
mu: n sized vector with the means of the n independent variables.
Sigma: nxn covariance matrix of the n independent variables.
"""
return (np.mean(X, 0), np.cov(X.T))
def density(X, mu, sigma):
"""
Calculates the probability of the data according to an estimated
multivariate gaussian distribution.
Arguments
---------
X: mxn matrix with m samples drawn from an unknown multivariate Gaussian
distribution.
mu: n sized vector with the means of the n independent variables.
Sigma: nxn covariance matrix of the n independent variables.
Returns
---------
m sized vector with probabilities of the data.
"""
assert X.shape[0] > X.shape[1], "Degenerate matrix, m must be greater than n."
detsigma = np.linalg.det(sigma)
n = len(mu)
a = 1 / (np.power(2 * np.pi, n / 2.) * np.sqrt(detsigma))
invsigma = np.linalg.inv(sigma)
b = np.array([np.exp(-0.5 * np.dot(np.dot((x - mu), invsigma), (x - mu).T))
for x in X])
return a * b
class Classification(object):
"""
Model data as a mixture of multivariate gaussian distributions
"""
def fit(self, X, y):
self.classes = np.unique(y)
self.mus, self.sigmas = dict(), dict()
for i in self.classes:
self.mus[i], self.sigmas[i] = params(X[y == i])
def predict_proba(self, X):
return np.array([density(X, self.mus[i], self.sigmas[i])
for i in self.classes]).T
def predict(self, X):
Y = self.predict_proba(X)
return self.classes[np.argmax(Y, 1)]
class OutlierDetection(object):
def fit(self, X):
self.mu, self.sigma = params(X)
def predict_proba(self, X):
return density(X, self.mu, self.sigma)
if __name__ == '__main__':
# Create synthetic data
mux = np.array([2, 2])
sigmax = np.array([[3, 0], [0, 3]])
X = np.random.multivariate_normal(mux, sigmax, 100)
yx = 1 * np.ones(X.shape[0])
muz = np.array([10, 10])
sigmaz = np.array([[1, .8], [.8, 1]])
Z = np.random.multivariate_normal(muz, sigmaz, 100)
yz = 2 * np.ones(Z.shape[0])
muw = np.array([15, 0])
sigmaw = np.array([[2, 0], [0, 1]])
W = np.random.multivariate_normal(muw, sigmaw, 100)
yw = 3 * np.ones(W.shape[0])
# Model data
c = Classification()
X = np.vstack((X, Z, W))
y = np.hstack((yx, yz, yw))
c.fit(X, y)
# Visualize data and model
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax1.plot(X[:, 0], X[:, 1], 'rx')
ax1.plot(Z[:, 0], Z[:, 1], 'bx')
ax1.plot(W[:, 0], W[:, 1], 'gx')
ax2 = fig.add_subplot(212)
x, y = np.meshgrid(np.linspace(-5, 20, 100), np.linspace(-5, 15, 100))
p = c.predict_proba(np.vstack((x.ravel(), y.ravel())).T)
z = np.reshape(np.max(p, 1), (x.shape[0], x.shape[1]))
ax2.imshow(z, extent=[np.min(x), np.max(x), np.min(y), np.max(y)], origin='lower')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment