c4goldsw/Templated Lars

## Templated Lars
/*
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 3 of the License, or
 * (at your option) any later version.
 *
 * Written (W) 2012 Chiyuan Zhang
 * Copyright (C) 2012 Chiyuan Zhang
 */

#include <shogun/lib/config.h>

#include <vector>
#include <limits>
#include <algorithm>

#include <shogun/features/DenseFeatures.h>
#include <shogun/mathematics/Math.h>
#include <shogun/regression/LeastAngleRegression.h>
#include <shogun/labels/RegressionLabels.h>
#include <shogun/mathematics/eigen3.h>

using namespace Eigen;
using namespace shogun;
using namespace std;

template <class ST> static vector<ST> make_vector(int32_t size, ST val)
{
	vector<ST> result(size);
	fill(result.begin(), result.end(), val);
	return result;
}

template <class ST> static void plane_rot(ST x0, ST x1,
	ST &y0, ST &y1, SGMatrix<ST> &G)
{
	G.zero();
	if (x1 == 0)
	{
		G(0, 0) = G(1, 1) = 1;
		y0 = x0;
		y1 = x1;
	}
	else
	{
		ST r = (ST) CMath::sqrt((float64_t) (x0*x0+x1*x1) ) ;
		ST sx0 = x0 / r;
		ST sx1 = x1 / r;

		G(0,0) = sx0;
		G(1,0) = -sx1;
		G(0,1) = sx1;
		G(1,1) = sx0;

		y0 = r;
		y1 = 0;
	}
}

template <class ST> static void find_max_abs(const vector<ST> &vec, const vector<bool> &ignore_mask,
	int32_t &imax, ST& vmax)
{
	imax = -1;
	vmax = -1;
	for (uint32_t i=0; i < vec.size(); ++i)
	{
		if (ignore_mask[i])
			continue;

		if (CMath::abs(vec[i]) > vmax)
		{
			vmax = CMath::abs(vec[i]);
			imax = i;
		}
	}
}

template <class ST> CLeastAngleRegression<ST>::CLeastAngleRegression(bool lasso) :
	CLinearMachine(), m_lasso(lasso),
	m_max_nonz(0), m_max_l1_norm(0)
{
	m_epsilon = CMath::MACHINE_EPSILON;
	SG_ADD(&m_epsilon, "epsilon", "Epsilon for early stopping", MS_AVAILABLE);
	SG_ADD(&m_max_nonz, "max_nonz", "Max number of non-zero variables", MS_AVAILABLE);
	SG_ADD(&m_max_l1_norm, "max_l1_norm", "Max l1-norm of estimator", MS_AVAILABLE);
}

template <class ST> CLeastAngleRegression<ST>::~CLeastAngleRegression()
{

}

template <class ST> bool CLeastAngleRegression<ST>::train_machine(CFeatures* data)
{
	if (!m_labels)
		SG_ERROR("No labels set\n")
	if (m_labels->get_label_type() != LT_REGRESSION)
		SG_ERROR("Expected RegressionLabels\n")

	if (!data)
		data=features;

	if (!data)
		SG_ERROR("No features set\n")

	if (m_labels->get_num_labels() != data->get_num_vectors())
		SG_ERROR("Number of training vectors does not match number of labels\n")

	if (data->get_feature_class() != C_DENSE)
		SG_ERROR("Expected Simple Features\n")

	if (data->get_feature_type() != F_DREAL)
		SG_ERROR("Expected Real Features\n")

	CDenseFeatures<ST>* feats=(CDenseFeatures<ST>*) data;
	int32_t n_fea = feats->get_num_features();
	int32_t n_vec = feats->get_num_vectors();

	bool lasso_cond = false;
	bool stop_cond = false;

	// init facilities
	m_beta_idx.clear();
	m_beta_path.clear();
	m_num_active = 0;
	m_active_set.clear();
	m_is_active.resize(n_fea);
	fill(m_is_active.begin(), m_is_active.end(), false);

	SGVector<ST> y = ((CRegressionLabels*) m_labels)->get_labels();
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_y(y.vector, y.size());

	// transpose(X) is more convenient to work with since we care
	// about features here. After transpose, each row will be a data
	// point while each column corresponds to a feature
	SGMatrix<ST> X (n_vec, n_fea);
	Map<Matrix<ST, Dynamic, Dynamic>> map_Xr = feats->get_eigen_map();
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_X(X.matrix, n_vec, n_fea);
	map_X = map_Xr.transpose();

	SGMatrix<ST> X_active(n_vec, n_fea);

	// beta is the estimator
	vector<ST> beta = make_vector<ST>(n_fea, 0);

	vector<ST> Xy = make_vector<ST>(n_fea, 0);
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_Xy(&Xy[0], n_fea);
	// Xy = X' * y
	map_Xy=map_Xr*map_y;

	// mu is the prediction
	vector<ST> mu = make_vector<ST>(n_vec, 0);

	// correlation
	vector<ST> corr = make_vector<ST>(n_fea, 0);
	// sign of correlation
	vector<ST> corr_sign(n_fea);

	// Cholesky factorization R'R = X'X, R is upper triangular
	SGMatrix<ST> R;

	ST max_corr = 1;
	int32_t i_max_corr = 1;

	// first entry: all coefficients are zero
	m_beta_path.push_back(beta);
	m_beta_idx.push_back(0);

	//maximum allowed active variables at a time
	int32_t max_active_allowed = CMath::min(n_vec-1, n_fea);

	//========================================
	// main loop
	//========================================
	int32_t nloop=0;
	while (m_num_active < max_active_allowed && max_corr/n_vec > m_epsilon && !stop_cond)
	{
		// corr = X' * (y-mu) = - X'*mu + Xy
		Map<Matrix<ST, -1, 1, 0,-1, 1>> map_corr(&corr[0], n_fea);
		Map<Matrix<ST, -1, 1, 0,-1, 1>> map_mu(&mu[0], n_vec);

		map_corr = map_Xy - (map_Xr*map_mu);

		// corr_sign = sign(corr)
		for (uint32_t i=0; i < corr.size(); ++i)
			corr_sign[i] = CMath::sign(corr[i]);

		// find max absolute correlation in inactive set
		find_max_abs(corr, m_is_active, i_max_corr, max_corr);

		if (!lasso_cond)
		{
			// update Cholesky factorization matrix
			if (m_num_active == 0)
			{
				// R isn't allocated yet
				R=SGMatrix<ST>(1,1);
				ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr));
				R(0,0) = (ST) CMath::sqrt((float64_t) diag_k);
			}
			else
				R=cholesky_insert(X, X_active, R, i_max_corr, m_num_active);
			activate_variable(i_max_corr);
		}

		// Active variables
		Map<Matrix<ST,-1,-1,0,-1,-1>> map_Xa(X_active.matrix, n_vec, m_num_active);
		if (!lasso_cond)
			map_Xa.col(m_num_active-1)=map_X.col(i_max_corr);

		SGVector<ST> corr_sign_a(m_num_active);
		for (int32_t i=0; i < m_num_active; ++i)
			corr_sign_a[i] = corr_sign[m_active_set[i]];

		Map<Matrix<ST, -1, 1, 0,-1, 1>> map_corr_sign_a(corr_sign_a.vector, corr_sign_a.size()); //Map<VectorXd>
		Map<Matrix<ST,-1,-1,0,-1,-1>> map_R(R.matrix, R.num_rows, R.num_cols); //Map<MatrixXd>
		Matrix<ST, -1, 1, 0,-1, 1> solve = map_R.transpose().template triangularView<Lower>().template solve<OnTheLeft>(map_corr_sign_a); //VectorXd
		Matrix<ST, -1, 1, 0,-1, 1> GA1 = map_R.template triangularView<Upper>().template solve<OnTheLeft>(solve); //VectorXd

		// AA = 1/sqrt(GA1' * corr_sign_a)
		ST AA = GA1.dot(map_corr_sign_a);
		AA = 1/(ST) CMath::sqrt( (float64_t) AA);

		Matrix<ST, -1, 1, 0,-1, 1> wA = AA*GA1;

		// equiangular direction (unit vector)
		vector<ST> u = make_vector<ST>(n_vec, 0);
		Map<Matrix<ST, -1, 1, 0,-1, 1>> map_u(&u[0], n_vec);

		map_u = map_Xa*wA;

		ST gamma = max_corr / AA;
		if (m_num_active < n_fea)
		{
			#pragma omp parallel for shared(gamma)
			for (int32_t i=0; i < n_fea; ++i)
			{
				if (m_is_active[i])
					continue;

				// correlation between X[:,i] and u
				ST dir_corr = map_u.dot(map_X.col(i));

				ST tmp1 = (max_corr-corr[i])/(AA-dir_corr);
				ST tmp2 = (max_corr+corr[i])/(AA+dir_corr);
				#pragma omp critical
				{
				if (tmp1 > CMath::MACHINE_EPSILON && tmp1 < gamma)
					gamma = tmp1;
				if (tmp2 > CMath::MACHINE_EPSILON && tmp2 < gamma)
					gamma = tmp2;
				}
			}
		}
		int32_t i_kick=-1;
		int32_t i_change=i_max_corr;
		if (m_lasso)
		{
			// lasso modification to further refine gamma
			lasso_cond = false;
			ST lasso_bound = numeric_limits<ST>::max();

			for (int32_t i=0; i < m_num_active; ++i)
			{
				ST tmp = -beta[m_active_set[i]] / wA(i);
				if (tmp > CMath::MACHINE_EPSILON && tmp < lasso_bound)
				{
					lasso_bound = tmp;
					i_kick = i;
				}
			}

			if (lasso_bound < gamma)
			{
				gamma = lasso_bound;
				lasso_cond = true;
				i_change = m_active_set[i_kick];
			}
		}

		// update prediction: mu = mu + gamma * u
		map_mu += gamma*map_u;

		// update estimator
		for (int32_t i=0; i < m_num_active; ++i)
			beta[m_active_set[i]] += gamma * wA(i);
		// early stopping on max l1-norm
		if (m_max_l1_norm > 0)
		{
			ST l1 = SGVector<ST>::onenorm(&beta[0], n_fea);
			if (l1 > m_max_l1_norm)
			{
				// stopping with interpolated beta
				stop_cond = true;
				lasso_cond = false;
				ST l1_prev = SGVector<ST>::onenorm(&m_beta_path[nloop][0], n_fea);
				ST s = (m_max_l1_norm-l1_prev)/(l1-l1_prev);

				Map<Matrix<ST, -1, 1, 0,-1, 1>> map_beta(&beta[0], n_fea);
				Map<Matrix<ST, -1, 1, 0,-1, 1>> map_beta_prev(&m_beta_path[nloop][0], n_fea);
				map_beta = (1-s)*map_beta_prev + s*map_beta;
			}
		}

		// if lasso cond, drop the variable from active set
		if (lasso_cond)
		{
			beta[i_change] = 0;
			R=cholesky_delete(R, i_kick);
			deactivate_variable(i_kick);

			// Remove column from active set
			int32_t numRows = map_Xa.rows();
			int32_t numCols = map_Xa.cols()-1;
			if( i_kick < numCols )
				map_Xa.block(0, i_kick, numRows, numCols-i_kick) =
					map_Xa.block(0, i_kick+1, numRows, numCols-i_kick).eval();
		}

		nloop++;
		m_beta_path.push_back(beta);
		if (size_t(m_num_active) >= m_beta_idx.size())
			m_beta_idx.push_back(nloop);
		else
			m_beta_idx[m_num_active] = nloop;

		// early stopping with max number of non-zero variables
		if (m_max_nonz > 0 && m_num_active >= m_max_nonz)
			stop_cond = true;
		SG_DEBUG("Added : %d , Dropped %d, Active set size %d max_corr %.17f \n", i_max_corr, i_kick, m_num_active, max_corr);

	}

	// assign default estimator
	w.vlen = n_fea;
	switch_w(m_beta_idx.size()-1);

	return true;
}

template <class ST> SGMatrix<ST> CLeastAngleRegression<ST>::cholesky_insert(const SGMatrix<ST>& X,
		const SGMatrix<ST>& X_active, SGMatrix<ST>& R, int32_t i_max_corr, int32_t num_active)
{
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_X(X.matrix, X.num_rows, X.num_cols);  //Map<VectorXd>
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_X_active(X_active.matrix, X.num_rows, num_active); //Map<MatrixXd>
	ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr));

	// col_k is the k-th column of (X'X)
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_i_max(X.get_column_vector(i_max_corr), X.num_rows);//Map<VectorXd>
	Matrix<ST, -1, 1, 0,-1, 1> R_k = map_X_active.transpose()*map_i_max;//VectorXd
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_R(R.matrix, R.num_rows, R.num_cols);//Map<MatrixXd>

	// R' * R_k = (X' * X)_k = col_k, solving to get R_k
	map_R.transpose().template triangularView<Lower>().template solveInPlace<OnTheLeft>(R_k);
	ST R_kk = (ST) CMath::sqrt( (float64_t) (diag_k - R_k.dot(R_k)));

	SGMatrix<ST> R_new(num_active+1, num_active+1);
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols);

	map_R_new.block(0, 0, num_active, num_active) = map_R;
	memcpy(R_new.matrix+num_active*(num_active+1), R_k.data(), sizeof(ST)*(num_active));
	map_R_new.row(num_active).setZero();
	map_R_new(num_active, num_active) = R_kk;
	return R_new;
}

template <class ST> SGMatrix<ST> CLeastAngleRegression<ST>::cholesky_delete(SGMatrix<ST>& R, int32_t i_kick)
{
	if (i_kick != m_num_active-1)
	{
		// remove i_kick-th column
		for (int32_t j=i_kick; j < m_num_active-1; ++j)
			for (int32_t i=0; i < m_num_active; ++i)
				R(i,j) = R(i,j+1);

		SGMatrix<ST> G(2,2);
		for (int32_t i=i_kick; i < m_num_active-1; ++i)
		{
			plane_rot(R(i,i),R(i+1,i), R(i,i), R(i+1,i), G);
			if (i < m_num_active-2)
			{
				for (int32_t k=i+1; k < m_num_active-1; ++k)
				{
					// R[i:i+1, k] = G*R[i:i+1, k]
					ST Rik = R(i,k), Ri1k = R(i+1,k);
					R(i,k) = G(0,0)*Rik + G(0,1)*Ri1k;
					R(i+1,k) = G(1,0)*Rik+G(1,1)*Ri1k;
				}
			}
		}
	}

	SGMatrix<ST> nR(m_num_active-1, m_num_active-1);
	for (int32_t i=0; i < m_num_active-1; ++i)
		for (int32_t j=0; j < m_num_active-1; ++j)
			nR(i,j) = R(i,j);

	return nR;
}

//template class CLeastAngleRegression<bool>;
//template class CLeastAngleRegression<char>;
	//template class CLeastAngleRegression<int8_t>;
//template class CLeastAngleRegression<uint8_t>;
	//template class CLeastAngleRegression<int16_t>;
//template class CLeastAngleRegression<uint16_t>;
	//template class CLeastAngleRegression<int32_t>;
//template class CLeastAngleRegression<uint32_t>;
//template class CLeastAngleRegression<int64_t>;
//template class CLeastAngleRegression<uint64_t>;
//template class CLeastAngleRegression<float32_t>;
template class CLeastAngleRegression<float64_t>;
//template class CLeastAngleRegression<floatmax_t>;
	/*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 3 of the License, or
	* (at your option) any later version.
	*
	* Written (W) 2012 Chiyuan Zhang
	* Copyright (C) 2012 Chiyuan Zhang
	*/

	#include <shogun/lib/config.h>

	#include <vector>
	#include <limits>
	#include <algorithm>

	#include <shogun/features/DenseFeatures.h>
	#include <shogun/mathematics/Math.h>
	#include <shogun/regression/LeastAngleRegression.h>
	#include <shogun/labels/RegressionLabels.h>
	#include <shogun/mathematics/eigen3.h>

	using namespace Eigen;
	using namespace shogun;
	using namespace std;

	template <class ST> static vector<ST> make_vector(int32_t size, ST val)
	{
	vector<ST> result(size);
	fill(result.begin(), result.end(), val);
	return result;
	}

	template <class ST> static void plane_rot(ST x0, ST x1,
	ST &y0, ST &y1, SGMatrix<ST> &G)
	{
	G.zero();
	if (x1 == 0)
	{
	G(0, 0) = G(1, 1) = 1;
	y0 = x0;
	y1 = x1;
	}
	else
	{
	ST r = (ST) CMath::sqrt((float64_t) (x0x0+x1x1) ) ;
	ST sx0 = x0 / r;
	ST sx1 = x1 / r;

	G(0,0) = sx0;
	G(1,0) = -sx1;
	G(0,1) = sx1;
	G(1,1) = sx0;

	y0 = r;
	y1 = 0;
	}
	}

	template <class ST> static void find_max_abs(const vector<ST> &vec, const vector<bool> &ignore_mask,
	int32_t &imax, ST& vmax)
	{
	imax = -1;
	vmax = -1;
	for (uint32_t i=0; i < vec.size(); ++i)
	{
	if (ignore_mask[i])
	continue;

	if (CMath::abs(vec[i]) > vmax)
	{
	vmax = CMath::abs(vec[i]);
	imax = i;
	}
	}
	}

	template <class ST> CLeastAngleRegression<ST>::CLeastAngleRegression(bool lasso) :
	CLinearMachine(), m_lasso(lasso),
	m_max_nonz(0), m_max_l1_norm(0)
	{
	m_epsilon = CMath::MACHINE_EPSILON;
	SG_ADD(&m_epsilon, "epsilon", "Epsilon for early stopping", MS_AVAILABLE);
	SG_ADD(&m_max_nonz, "max_nonz", "Max number of non-zero variables", MS_AVAILABLE);
	SG_ADD(&m_max_l1_norm, "max_l1_norm", "Max l1-norm of estimator", MS_AVAILABLE);
	}

	template <class ST> CLeastAngleRegression<ST>::~CLeastAngleRegression()
	{

	}

	template <class ST> bool CLeastAngleRegression<ST>::train_machine(CFeatures* data)
	{
	if (!m_labels)
	SG_ERROR("No labels set\n")
	if (m_labels->get_label_type() != LT_REGRESSION)
	SG_ERROR("Expected RegressionLabels\n")

	if (!data)
	data=features;

	if (!data)
	SG_ERROR("No features set\n")

	if (m_labels->get_num_labels() != data->get_num_vectors())
	SG_ERROR("Number of training vectors does not match number of labels\n")

	if (data->get_feature_class() != C_DENSE)
	SG_ERROR("Expected Simple Features\n")

	if (data->get_feature_type() != F_DREAL)
	SG_ERROR("Expected Real Features\n")

	CDenseFeatures<ST>* feats=(CDenseFeatures<ST>*) data;
	int32_t n_fea = feats->get_num_features();
	int32_t n_vec = feats->get_num_vectors();

	bool lasso_cond = false;
	bool stop_cond = false;

	// init facilities
	m_beta_idx.clear();
	m_beta_path.clear();
	m_num_active = 0;
	m_active_set.clear();
	m_is_active.resize(n_fea);
	fill(m_is_active.begin(), m_is_active.end(), false);

	SGVector<ST> y = ((CRegressionLabels*) m_labels)->get_labels();
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_y(y.vector, y.size());

	// transpose(X) is more convenient to work with since we care
	// about features here. After transpose, each row will be a data
	// point while each column corresponds to a feature
	SGMatrix<ST> X (n_vec, n_fea);
	Map<Matrix<ST, Dynamic, Dynamic>> map_Xr = feats->get_eigen_map();
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_X(X.matrix, n_vec, n_fea);
	map_X = map_Xr.transpose();

	SGMatrix<ST> X_active(n_vec, n_fea);

	// beta is the estimator
	vector<ST> beta = make_vector<ST>(n_fea, 0);

	vector<ST> Xy = make_vector<ST>(n_fea, 0);
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_Xy(&Xy[0], n_fea);
	// Xy = X' * y
	map_Xy=map_Xr*map_y;

	// mu is the prediction
	vector<ST> mu = make_vector<ST>(n_vec, 0);

	// correlation
	vector<ST> corr = make_vector<ST>(n_fea, 0);
	// sign of correlation
	vector<ST> corr_sign(n_fea);

	// Cholesky factorization R'R = X'X, R is upper triangular
	SGMatrix<ST> R;

	ST max_corr = 1;
	int32_t i_max_corr = 1;

	// first entry: all coefficients are zero
	m_beta_path.push_back(beta);
	m_beta_idx.push_back(0);

	//maximum allowed active variables at a time
	int32_t max_active_allowed = CMath::min(n_vec-1, n_fea);

	//========================================
	// main loop
	//========================================
	int32_t nloop=0;
	while (m_num_active < max_active_allowed && max_corr/n_vec > m_epsilon && !stop_cond)
	{
	// corr = X' * (y-mu) = - X'*mu + Xy
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_corr(&corr[0], n_fea);
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_mu(&mu[0], n_vec);

	map_corr = map_Xy - (map_Xr*map_mu);

	// corr_sign = sign(corr)
	for (uint32_t i=0; i < corr.size(); ++i)
	corr_sign[i] = CMath::sign(corr[i]);

	// find max absolute correlation in inactive set
	find_max_abs(corr, m_is_active, i_max_corr, max_corr);

	if (!lasso_cond)
	{
	// update Cholesky factorization matrix
	if (m_num_active == 0)
	{
	// R isn't allocated yet
	R=SGMatrix<ST>(1,1);
	ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr));
	R(0,0) = (ST) CMath::sqrt((float64_t) diag_k);
	}
	else
	R=cholesky_insert(X, X_active, R, i_max_corr, m_num_active);
	activate_variable(i_max_corr);
	}

	// Active variables
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_Xa(X_active.matrix, n_vec, m_num_active);
	if (!lasso_cond)
	map_Xa.col(m_num_active-1)=map_X.col(i_max_corr);

	SGVector<ST> corr_sign_a(m_num_active);
	for (int32_t i=0; i < m_num_active; ++i)
	corr_sign_a[i] = corr_sign[m_active_set[i]];

	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_corr_sign_a(corr_sign_a.vector, corr_sign_a.size()); //Map<VectorXd>
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_R(R.matrix, R.num_rows, R.num_cols); //Map<MatrixXd>
	Matrix<ST, -1, 1, 0,-1, 1> solve = map_R.transpose().template triangularView<Lower>().template solve<OnTheLeft>(map_corr_sign_a); //VectorXd
	Matrix<ST, -1, 1, 0,-1, 1> GA1 = map_R.template triangularView<Upper>().template solve<OnTheLeft>(solve); //VectorXd

	// AA = 1/sqrt(GA1' * corr_sign_a)
	ST AA = GA1.dot(map_corr_sign_a);
	AA = 1/(ST) CMath::sqrt( (float64_t) AA);

	Matrix<ST, -1, 1, 0,-1, 1> wA = AA*GA1;

	// equiangular direction (unit vector)
	vector<ST> u = make_vector<ST>(n_vec, 0);
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_u(&u[0], n_vec);

	map_u = map_Xa*wA;

	ST gamma = max_corr / AA;
	if (m_num_active < n_fea)
	{
	#pragma omp parallel for shared(gamma)
	for (int32_t i=0; i < n_fea; ++i)
	{
	if (m_is_active[i])
	continue;

	// correlation between X[:,i] and u
	ST dir_corr = map_u.dot(map_X.col(i));

	ST tmp1 = (max_corr-corr[i])/(AA-dir_corr);
	ST tmp2 = (max_corr+corr[i])/(AA+dir_corr);
	#pragma omp critical
	{
	if (tmp1 > CMath::MACHINE_EPSILON && tmp1 < gamma)
	gamma = tmp1;
	if (tmp2 > CMath::MACHINE_EPSILON && tmp2 < gamma)
	gamma = tmp2;
	}
	}
	}
	int32_t i_kick=-1;
	int32_t i_change=i_max_corr;
	if (m_lasso)
	{
	// lasso modification to further refine gamma
	lasso_cond = false;
	ST lasso_bound = numeric_limits<ST>::max();

	for (int32_t i=0; i < m_num_active; ++i)
	{
	ST tmp = -beta[m_active_set[i]] / wA(i);
	if (tmp > CMath::MACHINE_EPSILON && tmp < lasso_bound)
	{
	lasso_bound = tmp;
	i_kick = i;
	}
	}

	if (lasso_bound < gamma)
	{
	gamma = lasso_bound;
	lasso_cond = true;
	i_change = m_active_set[i_kick];
	}
	}

	// update prediction: mu = mu + gamma * u
	map_mu += gamma*map_u;

	// update estimator
	for (int32_t i=0; i < m_num_active; ++i)
	beta[m_active_set[i]] += gamma * wA(i);
	// early stopping on max l1-norm
	if (m_max_l1_norm > 0)
	{
	ST l1 = SGVector<ST>::onenorm(&beta[0], n_fea);
	if (l1 > m_max_l1_norm)
	{
	// stopping with interpolated beta
	stop_cond = true;
	lasso_cond = false;
	ST l1_prev = SGVector<ST>::onenorm(&m_beta_path[nloop][0], n_fea);
	ST s = (m_max_l1_norm-l1_prev)/(l1-l1_prev);

	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_beta(&beta[0], n_fea);
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_beta_prev(&m_beta_path[nloop][0], n_fea);
	map_beta = (1-s)map_beta_prev + smap_beta;
	}
	}

	// if lasso cond, drop the variable from active set
	if (lasso_cond)
	{
	beta[i_change] = 0;
	R=cholesky_delete(R, i_kick);
	deactivate_variable(i_kick);

	// Remove column from active set
	int32_t numRows = map_Xa.rows();
	int32_t numCols = map_Xa.cols()-1;
	if( i_kick < numCols )
	map_Xa.block(0, i_kick, numRows, numCols-i_kick) =
	map_Xa.block(0, i_kick+1, numRows, numCols-i_kick).eval();
	}

	nloop++;
	m_beta_path.push_back(beta);
	if (size_t(m_num_active) >= m_beta_idx.size())
	m_beta_idx.push_back(nloop);
	else
	m_beta_idx[m_num_active] = nloop;

	// early stopping with max number of non-zero variables
	if (m_max_nonz > 0 && m_num_active >= m_max_nonz)
	stop_cond = true;
	SG_DEBUG("Added : %d , Dropped %d, Active set size %d max_corr %.17f \n", i_max_corr, i_kick, m_num_active, max_corr);

	}

	// assign default estimator
	w.vlen = n_fea;
	switch_w(m_beta_idx.size()-1);

	return true;
	}

	template <class ST> SGMatrix<ST> CLeastAngleRegression<ST>::cholesky_insert(const SGMatrix<ST>& X,
	const SGMatrix<ST>& X_active, SGMatrix<ST>& R, int32_t i_max_corr, int32_t num_active)
	{
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_X(X.matrix, X.num_rows, X.num_cols); //Map<VectorXd>
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_X_active(X_active.matrix, X.num_rows, num_active); //Map<MatrixXd>
	ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr));

	// col_k is the k-th column of (X'X)
	Map<Matrix<ST, -1, 1, 0,-1, 1>> map_i_max(X.get_column_vector(i_max_corr), X.num_rows);//Map<VectorXd>
	Matrix<ST, -1, 1, 0,-1, 1> R_k = map_X_active.transpose()*map_i_max;//VectorXd
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_R(R.matrix, R.num_rows, R.num_cols);//Map<MatrixXd>

	// R' * R_k = (X' * X)_k = col_k, solving to get R_k
	map_R.transpose().template triangularView<Lower>().template solveInPlace<OnTheLeft>(R_k);
	ST R_kk = (ST) CMath::sqrt( (float64_t) (diag_k - R_k.dot(R_k)));

	SGMatrix<ST> R_new(num_active+1, num_active+1);
	Map<Matrix<ST,-1,-1,0,-1,-1>> map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols);

	map_R_new.block(0, 0, num_active, num_active) = map_R;
	memcpy(R_new.matrix+num_active(num_active+1), R_k.data(), sizeof(ST)(num_active));
	map_R_new.row(num_active).setZero();
	map_R_new(num_active, num_active) = R_kk;
	return R_new;
	}

	template <class ST> SGMatrix<ST> CLeastAngleRegression<ST>::cholesky_delete(SGMatrix<ST>& R, int32_t i_kick)
	{
	if (i_kick != m_num_active-1)
	{
	// remove i_kick-th column
	for (int32_t j=i_kick; j < m_num_active-1; ++j)
	for (int32_t i=0; i < m_num_active; ++i)
	R(i,j) = R(i,j+1);

	SGMatrix<ST> G(2,2);
	for (int32_t i=i_kick; i < m_num_active-1; ++i)
	{
	plane_rot(R(i,i),R(i+1,i), R(i,i), R(i+1,i), G);
	if (i < m_num_active-2)
	{
	for (int32_t k=i+1; k < m_num_active-1; ++k)
	{
	// R[i:i+1, k] = G*R[i:i+1, k]
	ST Rik = R(i,k), Ri1k = R(i+1,k);
	R(i,k) = G(0,0)Rik + G(0,1)Ri1k;
	R(i+1,k) = G(1,0)Rik+G(1,1)Ri1k;
	}
	}
	}
	}

	SGMatrix<ST> nR(m_num_active-1, m_num_active-1);
	for (int32_t i=0; i < m_num_active-1; ++i)
	for (int32_t j=0; j < m_num_active-1; ++j)
	nR(i,j) = R(i,j);

	return nR;
	}

	//template class CLeastAngleRegression<bool>;
	//template class CLeastAngleRegression<char>;
	//template class CLeastAngleRegression<int8_t>;
	//template class CLeastAngleRegression<uint8_t>;
	//template class CLeastAngleRegression<int16_t>;
	//template class CLeastAngleRegression<uint16_t>;
	//template class CLeastAngleRegression<int32_t>;
	//template class CLeastAngleRegression<uint32_t>;
	//template class CLeastAngleRegression<int64_t>;
	//template class CLeastAngleRegression<uint64_t>;
	//template class CLeastAngleRegression<float32_t>;
	template class CLeastAngleRegression<float64_t>;
	//template class CLeastAngleRegression<floatmax_t>;