rmcgibbo/wls-em.py

## wls-em.py
import numpy as np
import matplotlib.pyplot as pp
from matplotlib import cm

def maximization(X, y, weights):
    """Calculate the beta that maximize the likelihood, given the weights (fuzzy assignment)

    Parameters
    ----------
    X : np.array, shape=[n_cases, n_features]
        Inputs
    y : np.array, shape=[n_cases]
        Response variable
    weights : np.array, shape=[n_cases, n_classes]
        For each case, gives the probability is was generated by each of the classes

    Returns
    -------
    beta : np.array, shape=[n_classes, n_features]
        beta[i] gives the regression parameters betahat for class i
    """

    n_cases, n_features = X.shape
    n_classes = weights.shape[1]
    assert weights.shape[0] == n_cases
    beta = np.zeros((n_classes, n_features))

    for i in xrange(n_classes):
        W = np.diag(weights[:,i])

        # intermediate computation
        X_T_W = np.dot(X.T, W)

        # weighted least squares
        beta[i, :] = np.dot(np.linalg.pinv(np.dot(X_T_W, X)), np.dot(X_T_W, y))

    return beta


def expectation(X, y, beta):
    """Calculate the expected weights (fuzzy assignment) given the values of beta

    Parameters
    ----------
    X : np.array, shape=[n_cases, n_features]
        Inputs
    y : np.array, shape=[n_cases]
        Response variable
    beta : np.array, shape=[n_classes, n_features]
        beta[i] gives the regression parameters betahat for class i

    Returns
    -------
    weights : np.array, shape=[n_cases, n_classes]
        For each case, gives the probability is was generated by each of the classes
    """

    prob = np.exp(-np.square(y - np.dot(X, beta.T).T))
    return (prob / np.sum(prob, axis=0)).T


def plot_2d_example():
    a = 1; b=3
    X = 10*np.random.randn(100,1)
    y = np.concatenate((X[:50,0]*a, X[50:,0]*b)) + np.random.randn(100)

    weights = np.random.randn(100,2)**2
    weights = (weights.T / np.sum(weights, axis=1)).T

    pp.scatter(X[:,0], y, edgecolor='k', facecolors='none')
    max_iterations = 2
    for i in range(max_iterations):
        beta = maximization(X, y, weights)
        weights = expectation(X, y, beta)

        print 'beta %s' % i
        print beta
        x = np.linspace(np.min(X), np.max(X))
        for j in range(beta.shape[0]):
            pp.plot(x, beta[j]*x, c=cm.jet(float(i)/max_iterations),
                    label='it %s' % i, lw=2)

    pp.legend()
    pp.savefig('wls-em-2d-example.png')

if __name__ == '__main__':
    plot_2d_example()
	import numpy as np
	import matplotlib.pyplot as pp
	from matplotlib import cm

	def maximization(X, y, weights):
	"""Calculate the beta that maximize the likelihood, given the weights (fuzzy assignment)

	Parameters
	----------
	X : np.array, shape=[n_cases, n_features]
	Inputs
	y : np.array, shape=[n_cases]
	Response variable
	weights : np.array, shape=[n_cases, n_classes]
	For each case, gives the probability is was generated by each of the classes

	Returns
	-------
	beta : np.array, shape=[n_classes, n_features]
	beta[i] gives the regression parameters betahat for class i
	"""

	n_cases, n_features = X.shape
	n_classes = weights.shape[1]
	assert weights.shape[0] == n_cases
	beta = np.zeros((n_classes, n_features))

	for i in xrange(n_classes):
	W = np.diag(weights[:,i])

	# intermediate computation
	X_T_W = np.dot(X.T, W)

	# weighted least squares
	beta[i, :] = np.dot(np.linalg.pinv(np.dot(X_T_W, X)), np.dot(X_T_W, y))

	return beta


	def expectation(X, y, beta):
	"""Calculate the expected weights (fuzzy assignment) given the values of beta

	Parameters
	----------
	X : np.array, shape=[n_cases, n_features]
	Inputs
	y : np.array, shape=[n_cases]
	Response variable
	beta : np.array, shape=[n_classes, n_features]
	beta[i] gives the regression parameters betahat for class i

	Returns
	-------
	weights : np.array, shape=[n_cases, n_classes]
	For each case, gives the probability is was generated by each of the classes
	"""

	prob = np.exp(-np.square(y - np.dot(X, beta.T).T))
	return (prob / np.sum(prob, axis=0)).T


	def plot_2d_example():
	a = 1; b=3
	X = 10*np.random.randn(100,1)
	y = np.concatenate((X[:50,0]a, X[50:,0]b)) + np.random.randn(100)

	weights = np.random.randn(100,2)**2
	weights = (weights.T / np.sum(weights, axis=1)).T

	pp.scatter(X[:,0], y, edgecolor='k', facecolors='none')
	max_iterations = 2
	for i in range(max_iterations):
	beta = maximization(X, y, weights)
	weights = expectation(X, y, beta)

	print 'beta %s' % i
	print beta
	x = np.linspace(np.min(X), np.max(X))
	for j in range(beta.shape[0]):
	pp.plot(x, beta[j]*x, c=cm.jet(float(i)/max_iterations),
	label='it %s' % i, lw=2)

	pp.legend()
	pp.savefig('wls-em-2d-example.png')

	if __name__ == '__main__':
	plot_2d_example()