Skip to content

Instantly share code, notes, and snippets.

@abinashpanda
Last active August 29, 2015 14:02
Show Gist options
  • Save abinashpanda/4da4c698168c92716599 to your computer and use it in GitHub Desktop.
Save abinashpanda/4da4c698168c92716599 to your computer and use it in GitHub Desktop.
/*
* Copyright(C) 2014 Abinash Panda
* Written(W) 2014 Abinash Panda
*/
#include <shogun/base/init.h>
#include <shogun/lib/Hash.h>
#include <shogun/lib/SGSparseVector.h>
#include <shogun/features/SparseFeatures.h>
#include <shogun/mathematics/Random.h>
#include <shogun/io/SGIO.h>
#include <shogun/lib/Time.h>
using namespace shogun;
SGSparseVector<float64_t> get_sparse_joint_feature_vector(
SGSparseVector<float64_t> vec,
SGVector<int32_t> labels, int32_t hash_dim)
{
SGSparseVector<float64_t> h_vec(vec.num_feat_entries * labels.vlen);
index_t k = 0;
for (int32_t i = 0; i < labels.vlen; i++)
{
uint32_t seed = (uint32_t)labels[i];
for (int32_t j = 0; j < vec.num_feat_entries; j++)
{
uint32_t hash = CHash::MurmurHash3(
(uint8_t *)&vec.features[j].feat_index,
sizeof(index_t), seed);
h_vec.features[k].feat_index = (hash >> 1) % hash_dim;
h_vec.features[k++].entry =
(hash % 2 == 1 ? -1.0 : 1.0) * vec.features[j].entry;
}
}
h_vec.sort_features(true);
return h_vec;
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
int32_t num_features = 1 << 12;
int32_t dim_features = 1 << 28;
uint32_t hash_seed = 23;
int32_t hash_dim = 1 << 18;
CRandom * prng = new CRandom(12345);
SGSparseVector<float64_t> sparse = SGSparseVector<float64_t>(num_features);
for (int32_t i = 0; i < num_features; i++)
{
sparse.features[i].feat_index = prng->random(0, dim_features);
sparse.features[i].entry = 1.0;
}
sparse.sort_features(true);
SGVector<int32_t> labels(1000);
SGVector<int32_t>::range_fill_vector(labels.vector, labels.vlen, 123);
CTime * timer = new CTime();
timer->start();
SGSparseVector<float64_t> psi = get_sparse_joint_feature_vector(
sparse, labels, hash_dim);
timer->stop();
float64_t iter_time = timer->cur_time_diff();
SG_SPRINT("[%.1f sec] sparse joint feature vector implementation \n",
iter_time);
SG_SPRINT("Number of features in sparse vector = %d\n",
sparse.num_feat_entries);
SG_SPRINT("Number of featurs in joint vector = %d\n",
psi.num_feat_entries)
SG_UNREF(timer);
exit_shogun();
return 0;
}
@abinashpanda
Copy link
Author

[0.9 sec] sparse joint feature vector implementation
Number of features in sparse vector = 4096
Number of featurs in joint vector   = 235383

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment