Last active
June 21, 2016 16:55
-
-
Save c4goldsw/fd325503b9b81a2a7f434291732e19e0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* This program is free software; you can redistribute it and/or modify | |
* it under the terms of the GNU General Public License as published by | |
* the Free Software Foundation; either version 3 of the License, or | |
* (at your option) any later version. | |
* | |
* Written (W) 2012 Chiyuan Zhang | |
* Copyright (C) 2012 Chiyuan Zhang | |
*/ | |
#include <shogun/lib/config.h> | |
#include <vector> | |
#include <limits> | |
#include <algorithm> | |
#include <shogun/features/DenseFeatures.h> | |
#include <shogun/mathematics/Math.h> | |
#include <shogun/regression/LeastAngleRegression.h> | |
#include <shogun/labels/RegressionLabels.h> | |
#include <shogun/mathematics/eigen3.h> | |
using namespace Eigen; | |
using namespace shogun; | |
using namespace std; | |
template <class ST> static vector<ST> make_vector(int32_t size, ST val) | |
{ | |
vector<ST> result(size); | |
fill(result.begin(), result.end(), val); | |
return result; | |
} | |
template <class ST> static void plane_rot(ST x0, ST x1, | |
ST &y0, ST &y1, SGMatrix<ST> &G) | |
{ | |
G.zero(); | |
if (x1 == 0) | |
{ | |
G(0, 0) = G(1, 1) = 1; | |
y0 = x0; | |
y1 = x1; | |
} | |
else | |
{ | |
ST r = (ST) CMath::sqrt((float64_t) (x0*x0+x1*x1) ) ; | |
ST sx0 = x0 / r; | |
ST sx1 = x1 / r; | |
G(0,0) = sx0; | |
G(1,0) = -sx1; | |
G(0,1) = sx1; | |
G(1,1) = sx0; | |
y0 = r; | |
y1 = 0; | |
} | |
} | |
template <class ST> static void find_max_abs(const vector<ST> &vec, const vector<bool> &ignore_mask, | |
int32_t &imax, ST& vmax) | |
{ | |
imax = -1; | |
vmax = -1; | |
for (uint32_t i=0; i < vec.size(); ++i) | |
{ | |
if (ignore_mask[i]) | |
continue; | |
if (CMath::abs(vec[i]) > vmax) | |
{ | |
vmax = CMath::abs(vec[i]); | |
imax = i; | |
} | |
} | |
} | |
template <class ST> CLeastAngleRegression<ST>::CLeastAngleRegression(bool lasso) : | |
CLinearMachine(), m_lasso(lasso), | |
m_max_nonz(0), m_max_l1_norm(0) | |
{ | |
m_epsilon = CMath::MACHINE_EPSILON; | |
SG_ADD(&m_epsilon, "epsilon", "Epsilon for early stopping", MS_AVAILABLE); | |
SG_ADD(&m_max_nonz, "max_nonz", "Max number of non-zero variables", MS_AVAILABLE); | |
SG_ADD(&m_max_l1_norm, "max_l1_norm", "Max l1-norm of estimator", MS_AVAILABLE); | |
} | |
template <class ST> CLeastAngleRegression<ST>::~CLeastAngleRegression() | |
{ | |
} | |
template <class ST> bool CLeastAngleRegression<ST>::train_machine(CFeatures* data) | |
{ | |
if (!m_labels) | |
SG_ERROR("No labels set\n") | |
if (m_labels->get_label_type() != LT_REGRESSION) | |
SG_ERROR("Expected RegressionLabels\n") | |
if (!data) | |
data=features; | |
if (!data) | |
SG_ERROR("No features set\n") | |
if (m_labels->get_num_labels() != data->get_num_vectors()) | |
SG_ERROR("Number of training vectors does not match number of labels\n") | |
if (data->get_feature_class() != C_DENSE) | |
SG_ERROR("Expected Simple Features\n") | |
if (data->get_feature_type() != F_DREAL) | |
SG_ERROR("Expected Real Features\n") | |
CDenseFeatures<ST>* feats=(CDenseFeatures<ST>*) data; | |
int32_t n_fea = feats->get_num_features(); | |
int32_t n_vec = feats->get_num_vectors(); | |
bool lasso_cond = false; | |
bool stop_cond = false; | |
// init facilities | |
m_beta_idx.clear(); | |
m_beta_path.clear(); | |
m_num_active = 0; | |
m_active_set.clear(); | |
m_is_active.resize(n_fea); | |
fill(m_is_active.begin(), m_is_active.end(), false); | |
SGVector<ST> y = ((CRegressionLabels*) m_labels)->get_labels(); | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_y(y.vector, y.size()); | |
// transpose(X) is more convenient to work with since we care | |
// about features here. After transpose, each row will be a data | |
// point while each column corresponds to a feature | |
SGMatrix<ST> X (n_vec, n_fea); | |
Map<Matrix<ST, Dynamic, Dynamic>> map_Xr = feats->get_eigen_map(); | |
Map<Matrix<ST,-1,-1,0,-1,-1>> map_X(X.matrix, n_vec, n_fea); | |
map_X = map_Xr.transpose(); | |
SGMatrix<ST> X_active(n_vec, n_fea); | |
// beta is the estimator | |
vector<ST> beta = make_vector<ST>(n_fea, 0); | |
vector<ST> Xy = make_vector<ST>(n_fea, 0); | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_Xy(&Xy[0], n_fea); | |
// Xy = X' * y | |
map_Xy=map_Xr*map_y; | |
// mu is the prediction | |
vector<ST> mu = make_vector<ST>(n_vec, 0); | |
// correlation | |
vector<ST> corr = make_vector<ST>(n_fea, 0); | |
// sign of correlation | |
vector<ST> corr_sign(n_fea); | |
// Cholesky factorization R'R = X'X, R is upper triangular | |
SGMatrix<ST> R; | |
ST max_corr = 1; | |
int32_t i_max_corr = 1; | |
// first entry: all coefficients are zero | |
m_beta_path.push_back(beta); | |
m_beta_idx.push_back(0); | |
//maximum allowed active variables at a time | |
int32_t max_active_allowed = CMath::min(n_vec-1, n_fea); | |
//======================================== | |
// main loop | |
//======================================== | |
int32_t nloop=0; | |
while (m_num_active < max_active_allowed && max_corr/n_vec > m_epsilon && !stop_cond) | |
{ | |
// corr = X' * (y-mu) = - X'*mu + Xy | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_corr(&corr[0], n_fea); | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_mu(&mu[0], n_vec); | |
map_corr = map_Xy - (map_Xr*map_mu); | |
// corr_sign = sign(corr) | |
for (uint32_t i=0; i < corr.size(); ++i) | |
corr_sign[i] = CMath::sign(corr[i]); | |
// find max absolute correlation in inactive set | |
find_max_abs(corr, m_is_active, i_max_corr, max_corr); | |
if (!lasso_cond) | |
{ | |
// update Cholesky factorization matrix | |
if (m_num_active == 0) | |
{ | |
// R isn't allocated yet | |
R=SGMatrix<ST>(1,1); | |
ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr)); | |
R(0,0) = (ST) CMath::sqrt((float64_t) diag_k); | |
} | |
else | |
R=cholesky_insert(X, X_active, R, i_max_corr, m_num_active); | |
activate_variable(i_max_corr); | |
} | |
// Active variables | |
Map<Matrix<ST,-1,-1,0,-1,-1>> map_Xa(X_active.matrix, n_vec, m_num_active); | |
if (!lasso_cond) | |
map_Xa.col(m_num_active-1)=map_X.col(i_max_corr); | |
SGVector<ST> corr_sign_a(m_num_active); | |
for (int32_t i=0; i < m_num_active; ++i) | |
corr_sign_a[i] = corr_sign[m_active_set[i]]; | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_corr_sign_a(corr_sign_a.vector, corr_sign_a.size()); //Map<VectorXd> | |
Map<Matrix<ST,-1,-1,0,-1,-1>> map_R(R.matrix, R.num_rows, R.num_cols); //Map<MatrixXd> | |
Matrix<ST, -1, 1, 0,-1, 1> solve = map_R.transpose().template triangularView<Lower>().template solve<OnTheLeft>(map_corr_sign_a); //VectorXd | |
Matrix<ST, -1, 1, 0,-1, 1> GA1 = map_R.template triangularView<Upper>().template solve<OnTheLeft>(solve); //VectorXd | |
// AA = 1/sqrt(GA1' * corr_sign_a) | |
ST AA = GA1.dot(map_corr_sign_a); | |
AA = 1/(ST) CMath::sqrt( (float64_t) AA); | |
Matrix<ST, -1, 1, 0,-1, 1> wA = AA*GA1; | |
// equiangular direction (unit vector) | |
vector<ST> u = make_vector<ST>(n_vec, 0); | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_u(&u[0], n_vec); | |
map_u = map_Xa*wA; | |
ST gamma = max_corr / AA; | |
if (m_num_active < n_fea) | |
{ | |
#pragma omp parallel for shared(gamma) | |
for (int32_t i=0; i < n_fea; ++i) | |
{ | |
if (m_is_active[i]) | |
continue; | |
// correlation between X[:,i] and u | |
ST dir_corr = map_u.dot(map_X.col(i)); | |
ST tmp1 = (max_corr-corr[i])/(AA-dir_corr); | |
ST tmp2 = (max_corr+corr[i])/(AA+dir_corr); | |
#pragma omp critical | |
{ | |
if (tmp1 > CMath::MACHINE_EPSILON && tmp1 < gamma) | |
gamma = tmp1; | |
if (tmp2 > CMath::MACHINE_EPSILON && tmp2 < gamma) | |
gamma = tmp2; | |
} | |
} | |
} | |
int32_t i_kick=-1; | |
int32_t i_change=i_max_corr; | |
if (m_lasso) | |
{ | |
// lasso modification to further refine gamma | |
lasso_cond = false; | |
ST lasso_bound = numeric_limits<ST>::max(); | |
for (int32_t i=0; i < m_num_active; ++i) | |
{ | |
ST tmp = -beta[m_active_set[i]] / wA(i); | |
if (tmp > CMath::MACHINE_EPSILON && tmp < lasso_bound) | |
{ | |
lasso_bound = tmp; | |
i_kick = i; | |
} | |
} | |
if (lasso_bound < gamma) | |
{ | |
gamma = lasso_bound; | |
lasso_cond = true; | |
i_change = m_active_set[i_kick]; | |
} | |
} | |
// update prediction: mu = mu + gamma * u | |
map_mu += gamma*map_u; | |
// update estimator | |
for (int32_t i=0; i < m_num_active; ++i) | |
beta[m_active_set[i]] += gamma * wA(i); | |
// early stopping on max l1-norm | |
if (m_max_l1_norm > 0) | |
{ | |
ST l1 = SGVector<ST>::onenorm(&beta[0], n_fea); | |
if (l1 > m_max_l1_norm) | |
{ | |
// stopping with interpolated beta | |
stop_cond = true; | |
lasso_cond = false; | |
ST l1_prev = SGVector<ST>::onenorm(&m_beta_path[nloop][0], n_fea); | |
ST s = (m_max_l1_norm-l1_prev)/(l1-l1_prev); | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_beta(&beta[0], n_fea); | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_beta_prev(&m_beta_path[nloop][0], n_fea); | |
map_beta = (1-s)*map_beta_prev + s*map_beta; | |
} | |
} | |
// if lasso cond, drop the variable from active set | |
if (lasso_cond) | |
{ | |
beta[i_change] = 0; | |
R=cholesky_delete(R, i_kick); | |
deactivate_variable(i_kick); | |
// Remove column from active set | |
int32_t numRows = map_Xa.rows(); | |
int32_t numCols = map_Xa.cols()-1; | |
if( i_kick < numCols ) | |
map_Xa.block(0, i_kick, numRows, numCols-i_kick) = | |
map_Xa.block(0, i_kick+1, numRows, numCols-i_kick).eval(); | |
} | |
nloop++; | |
m_beta_path.push_back(beta); | |
if (size_t(m_num_active) >= m_beta_idx.size()) | |
m_beta_idx.push_back(nloop); | |
else | |
m_beta_idx[m_num_active] = nloop; | |
// early stopping with max number of non-zero variables | |
if (m_max_nonz > 0 && m_num_active >= m_max_nonz) | |
stop_cond = true; | |
SG_DEBUG("Added : %d , Dropped %d, Active set size %d max_corr %.17f \n", i_max_corr, i_kick, m_num_active, max_corr); | |
} | |
// assign default estimator | |
w.vlen = n_fea; | |
switch_w(m_beta_idx.size()-1); | |
return true; | |
} | |
template <class ST> SGMatrix<ST> CLeastAngleRegression<ST>::cholesky_insert(const SGMatrix<ST>& X, | |
const SGMatrix<ST>& X_active, SGMatrix<ST>& R, int32_t i_max_corr, int32_t num_active) | |
{ | |
Map<Matrix<ST,-1,-1,0,-1,-1>> map_X(X.matrix, X.num_rows, X.num_cols); //Map<VectorXd> | |
Map<Matrix<ST,-1,-1,0,-1,-1>> map_X_active(X_active.matrix, X.num_rows, num_active); //Map<MatrixXd> | |
ST diag_k = map_X.col(i_max_corr).dot(map_X.col(i_max_corr)); | |
// col_k is the k-th column of (X'X) | |
Map<Matrix<ST, -1, 1, 0,-1, 1>> map_i_max(X.get_column_vector(i_max_corr), X.num_rows);//Map<VectorXd> | |
Matrix<ST, -1, 1, 0,-1, 1> R_k = map_X_active.transpose()*map_i_max;//VectorXd | |
Map<Matrix<ST,-1,-1,0,-1,-1>> map_R(R.matrix, R.num_rows, R.num_cols);//Map<MatrixXd> | |
// R' * R_k = (X' * X)_k = col_k, solving to get R_k | |
map_R.transpose().template triangularView<Lower>().template solveInPlace<OnTheLeft>(R_k); | |
ST R_kk = (ST) CMath::sqrt( (float64_t) (diag_k - R_k.dot(R_k))); | |
SGMatrix<ST> R_new(num_active+1, num_active+1); | |
Map<Matrix<ST,-1,-1,0,-1,-1>> map_R_new(R_new.matrix, R_new.num_rows, R_new.num_cols); | |
map_R_new.block(0, 0, num_active, num_active) = map_R; | |
memcpy(R_new.matrix+num_active*(num_active+1), R_k.data(), sizeof(ST)*(num_active)); | |
map_R_new.row(num_active).setZero(); | |
map_R_new(num_active, num_active) = R_kk; | |
return R_new; | |
} | |
template <class ST> SGMatrix<ST> CLeastAngleRegression<ST>::cholesky_delete(SGMatrix<ST>& R, int32_t i_kick) | |
{ | |
if (i_kick != m_num_active-1) | |
{ | |
// remove i_kick-th column | |
for (int32_t j=i_kick; j < m_num_active-1; ++j) | |
for (int32_t i=0; i < m_num_active; ++i) | |
R(i,j) = R(i,j+1); | |
SGMatrix<ST> G(2,2); | |
for (int32_t i=i_kick; i < m_num_active-1; ++i) | |
{ | |
plane_rot(R(i,i),R(i+1,i), R(i,i), R(i+1,i), G); | |
if (i < m_num_active-2) | |
{ | |
for (int32_t k=i+1; k < m_num_active-1; ++k) | |
{ | |
// R[i:i+1, k] = G*R[i:i+1, k] | |
ST Rik = R(i,k), Ri1k = R(i+1,k); | |
R(i,k) = G(0,0)*Rik + G(0,1)*Ri1k; | |
R(i+1,k) = G(1,0)*Rik+G(1,1)*Ri1k; | |
} | |
} | |
} | |
} | |
SGMatrix<ST> nR(m_num_active-1, m_num_active-1); | |
for (int32_t i=0; i < m_num_active-1; ++i) | |
for (int32_t j=0; j < m_num_active-1; ++j) | |
nR(i,j) = R(i,j); | |
return nR; | |
} | |
//template class CLeastAngleRegression<bool>; | |
//template class CLeastAngleRegression<char>; | |
//template class CLeastAngleRegression<int8_t>; | |
//template class CLeastAngleRegression<uint8_t>; | |
//template class CLeastAngleRegression<int16_t>; | |
//template class CLeastAngleRegression<uint16_t>; | |
//template class CLeastAngleRegression<int32_t>; | |
//template class CLeastAngleRegression<uint32_t>; | |
//template class CLeastAngleRegression<int64_t>; | |
//template class CLeastAngleRegression<uint64_t>; | |
//template class CLeastAngleRegression<float32_t>; | |
template class CLeastAngleRegression<float64_t>; | |
//template class CLeastAngleRegression<floatmax_t>; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment