Skip to content

Instantly share code, notes, and snippets.

@tklein23
Created July 3, 2014 21:26
Show Gist options
  • Save tklein23/00d36caec496a4110e39 to your computer and use it in GitHub Desktop.
Save tklein23/00d36caec496a4110e39 to your computer and use it in GitHub Desktop.
CLR for Multilabel classification
CResultSet* CMultilabelClrSOModel::argmax(
SGVector< float64_t > w,
int32_t feat_idx,
bool const training)
{
CDotFeatures* df = (CDotFeatures*) m_features;
REQUIRE(m_feats_dim == df->get_dim_feature_space(),
"given features have dimension %d, but we need %d",
df->get_dim_feature_space(), m_feats_dim);
//printf("argmax(): m_feats_dim:%d\n", m_feats_dim);
REQUIRE(w != NULL, "w must be not null\n");
REQUIRE(m_labels != NULL, "m_labels should never be null");
REQUIRE(m_num_classes > 0, "The model needs to be trained before "
"using it for prediction\n");
int32_t dim = get_dim();
REQUIRE(dim == w.vlen, "get_dim() == %d != %d == w.vlen\n", dim, w.vlen);
SGVector< float64_t > plus_minus_one(m_num_classes);
CSparseMultilabel * ytrue = NULL;
if ( training )
{
auto ytrue_vec = ((CMultilabelSOLabels *) m_labels)->get_label_vector(feat_idx);
plus_minus_one.set_const(-1);
for (int32_t i=1; i<ytrue_vec.vlen; i++) {
plus_minus_one[ytrue_vec[i]] = +1;
}
plus_minus_one[0] = 0.0;
ytrue = new CSparseMultilabel(ytrue_vec);
}
else {
plus_minus_one.zero();
}
SGVector< float64_t > class_product(m_num_classes);
float64_t score_calibrated = df->dense_dot(feat_idx, w.vector, m_feats_dim);
class_product[0] = 0;
for ( int32_t c = 1 ; c < m_num_classes ; ++c )
{
class_product[c] = df->dense_dot(feat_idx, w.vector+c*m_feats_dim, m_feats_dim);
class_product[c] -= score_calibrated;
}
// class_product now contains:
// 0: 0
// c: <w_c; x_i> - <w_calibrated(c); x_i>
float64_t ypred_score = 0;
SGVector< int32_t > ypred_dense(m_num_classes);
ypred_dense.set_const(-1);
int32_t num_pos = 0;
SGVector< int32_t > ypred_displaced(m_num_classes);
for ( int32_t c = 0 ; c < m_num_classes ; ++c )
{
float64_t score_i = class_product[c] - plus_minus_one[c]; // TODO: assuming that loss() == 1
if (score_i >= 0.0)
{
num_pos++;
ypred_dense[c] = 1;
ypred_score += class_product[c];
}
}
SGVector< int16_t > ypred_sparse(num_pos);
int32_t pos_i = 0;
for (int16_t c = 0 ; c < m_num_classes ; ++c)
{
if (ypred_dense[c] > 0)
{
ypred_sparse[pos_i] = c;
pos_i++;
}
}
ASSERT(ypred_sparse.vlen == pos_i);
//printf("ypred[%d](score %.2f)", feat_idx, ypred_score);
//ypred_sparse.display_vector("");
// Build the CResultSet object to return
CResultSet* ret = new CResultSet();
SG_REF(ret);
CSparseMultilabel * ypred = new CSparseMultilabel(ypred_sparse);
SG_REF(ypred);
ret->argmax = ypred;
if ( training )
{
REQUIRE(ytrue != NULL, "ytrue[%d] should never be null", feat_idx);
auto ytrue_vec = ytrue->get_data();
float64_t ytrue_score = 0;
for (int32_t i=0; i<ytrue_vec.vlen; i++)
ytrue_score += class_product[ytrue_vec[i]];
ret->delta = delta_loss(ytrue, ypred);
ret->score = ret->delta + ypred_score - ytrue_score;
SG_UNREF(ytrue);
}
return ret;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment