Skip to content

Instantly share code, notes, and snippets.

Created January 9, 2013 16:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/4494613 to your computer and use it in GitHub Desktop.
Save anonymous/4494613 to your computer and use it in GitHub Desktop.
# Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
#
# License: BSD (3-clause)
from math import log
import numpy as np
from scipy import linalg
import pylab as pl
def ard(y, X, noise_cov, maxit=500, tol=1e-20, update_mode=1, gammas=None):
"""ARD / Sparse Bayesian Learning / Gamma-MAP / WTF
Parameters
----------
y : ndarray
The target
X : ndarray
The data / design matrix
noise_cov : ndarray
Noise covariance to compute whitener
maxit : int
Maximum number of iterations
tol : float
Tolerance parameter
gammas : array
Initial value for the gammas
Returns
-------
coef : ndarray
The estimated coefficients
References
----------
Wipf et al. Analysis of Empirical Bayesian Methods for Neuroelectromagnetic
Source Localization.
Advances in Neural Information Processing Systems (2007)
"""
ndim = y.ndim
if ndim == 1:
y = y[:, np.newaxis]
if gammas is None:
gammas = np.ones(X.shape[1], dtype=np.float)
eps = np.finfo(float).eps
n_sensors, n_times = y.shape
yinit = y.copy()
yyt = np.dot(y, y.T)
pobj = np.nan
Xinit = X.copy()
n_points = X.shape[1]
n_active = n_points
active_set = np.arange(n_points)
E = []
counter_n_active_fixed = 0
for k in xrange(maxit):
counter_n_active_fixed += 1
gammas[np.isnan(gammas)] = 0.0
gidx = (np.abs(gammas) > eps)
active_set = active_set[gidx]
gammas = gammas[gidx]
# update only active gammas (once set to zero it stays at zero)
if n_active > len(active_set):
n_active = active_set.size
X = X[:, gidx]
counter_n_active_fixed = 0
Cy = noise_cov + np.dot(X * gammas[np.newaxis, :], X.T)
# Invert Cy keeping symmetry
U, S, V = linalg.svd(Cy, full_matrices=False)
S = S[np.newaxis, :]
Cy = np.dot(U * S, U.T)
Cyinv = np.dot(U / (S + eps), U.T)
CyinvX = np.dot(Cyinv, X)
A = np.dot(CyinvX.T, y)
if update_mode == 1:
# Default update rule for the gammas
gammas = (gammas ** 2 * np.mean(np.abs(A) ** 2, axis=1)
+ gammas * (1 - gammas * np.sum(X * CyinvX).T))
elif update_mode == 2:
# MacKay fixed point update (equivalent to VB Sato update)
gammas *= np.mean(np.abs(A) ** 2, axis=1) / np.sum(X * CyinvX).T
elif update_mode == 3:
# modified MacKay fixed point update
gammas *= np.sqrt(np.mean(np.abs(A) ** 2, axis=1)
/ np.sum(X * CyinvX).T)
pobj_old = pobj
_, logdet_Cy = np.linalg.slogdet(Cy)
# log likelihood of gaussian density
pobj = 0.5 * (n_times * (logdet_Cy + n_sensors) * log(2 * np.pi)) \
+ np.abs(np.trace(np.dot(yyt, Cyinv)))
E.append(pobj)
err = np.abs(pobj - pobj_old) / np.abs(pobj_old)
if err < tol:
break
# if counter_n_active_fixed > maxit_n_active:
# break
if n_active == 0:
break
if k < maxit - 1:
print('\nConvergence reached !\n')
else:
print('\nConvergence NOT reached !\n')
full_gammas = np.zeros(n_points)
full_gammas[active_set] = gammas
gammas = full_gammas
Xinv = np.dot(gammas[:, np.newaxis] * Xinit.T, Cyinv)
coef = np.dot(Xinv, yinit)
if ndim == 1:
coef = np.ravel(coef)
return coef, E
if __name__ == '__main__':
from sklearn.metrics import r2_score
pl.close('all')
###########################################################################
# generate some sparse data to play with
np.random.seed(42)
n_samples, n_features = 50, 200
n_samples, n_features = 100, 200
X = np.random.randn(n_samples, n_features)
coef = 3 * np.random.randn(n_features)
inds = np.arange(n_features)
np.random.shuffle(inds)
coef[inds[10:]] = 0 # sparsify coef
y = np.dot(X, coef)
# add noise
sigma = 0.01
y += sigma * np.random.normal((n_samples,))
# Split data in train set and test set
n_samples = X.shape[0]
X_train, y_train = X[:n_samples / 2], y[:n_samples / 2]
X_test, y_test = X[n_samples / 2:], y[n_samples / 2:]
###########################################################################
# ARD
cov = np.diag(sigma ** 2 * np.ones(len(y_train)))
ard_coef_, E = ard(y_train, X_train, cov, maxit=100000, update_mode=2)
pl.plot(E)
pl.xlabel('Iteration')
pl.ylabel('Cost function')
pl.show()
y_pred_ard = np.dot(X_test, ard_coef_)
r2_score_ard = r2_score(y_test, y_pred_ard)
print 'ARD'
print "r^2 on test data : %f" % r2_score_ard
print "||coef_ - coef|| : %f" % linalg.norm(coef - ard_coef_)
###########################################################################
# Lasso
from sklearn.linear_model import Lasso
alpha = 0.1
lasso = Lasso(alpha=alpha)
y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test)
r2_score_lasso = r2_score(y_test, y_pred_lasso)
print "Lasso"
print "r^2 on test data : %f" % r2_score_lasso
print "||coef_ - coef|| : %f" % linalg.norm(coef - lasso.coef_)
###########################################################################
# ElasticNet
from sklearn.linear_model import ElasticNet
enet = ElasticNet(alpha=alpha, l1_ratio=0.7)
y_pred_enet = enet.fit(X_train, y_train).predict(X_test)
r2_score_enet = r2_score(y_test, y_pred_enet)
print "Elastic Net"
print "r^2 on test data : %f" % r2_score_enet
print "||coef_ - coef|| : %f" % linalg.norm(coef - enet.coef_)
pl.figure()
pl.plot(enet.coef_, label='Elastic net coefficients')
pl.plot(lasso.coef_, label='Lasso coefficients')
pl.plot(ard_coef_, label='ARD coefficients')
pl.plot(coef, '--', label='original coefficients')
pl.legend(loc='best')
pl.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment