Last active
August 29, 2015 14:02
-
-
Save abinashpanda/4da4c698168c92716599 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright(C) 2014 Abinash Panda | |
* Written(W) 2014 Abinash Panda | |
*/ | |
#include <shogun/base/init.h> | |
#include <shogun/lib/Hash.h> | |
#include <shogun/lib/SGSparseVector.h> | |
#include <shogun/features/SparseFeatures.h> | |
#include <shogun/mathematics/Random.h> | |
#include <shogun/io/SGIO.h> | |
#include <shogun/lib/Time.h> | |
using namespace shogun; | |
SGSparseVector<float64_t> get_sparse_joint_feature_vector( | |
SGSparseVector<float64_t> vec, | |
SGVector<int32_t> labels, int32_t hash_dim) | |
{ | |
SGSparseVector<float64_t> h_vec(vec.num_feat_entries * labels.vlen); | |
index_t k = 0; | |
for (int32_t i = 0; i < labels.vlen; i++) | |
{ | |
uint32_t seed = (uint32_t)labels[i]; | |
for (int32_t j = 0; j < vec.num_feat_entries; j++) | |
{ | |
uint32_t hash = CHash::MurmurHash3( | |
(uint8_t *)&vec.features[j].feat_index, | |
sizeof(index_t), seed); | |
h_vec.features[k].feat_index = (hash >> 1) % hash_dim; | |
h_vec.features[k++].entry = | |
(hash % 2 == 1 ? -1.0 : 1.0) * vec.features[j].entry; | |
} | |
} | |
h_vec.sort_features(true); | |
return h_vec; | |
} | |
int main(int argc, char ** argv) | |
{ | |
init_shogun_with_defaults(); | |
int32_t num_features = 1 << 12; | |
int32_t dim_features = 1 << 28; | |
uint32_t hash_seed = 23; | |
int32_t hash_dim = 1 << 18; | |
CRandom * prng = new CRandom(12345); | |
SGSparseVector<float64_t> sparse = SGSparseVector<float64_t>(num_features); | |
for (int32_t i = 0; i < num_features; i++) | |
{ | |
sparse.features[i].feat_index = prng->random(0, dim_features); | |
sparse.features[i].entry = 1.0; | |
} | |
sparse.sort_features(true); | |
SGVector<int32_t> labels(1000); | |
SGVector<int32_t>::range_fill_vector(labels.vector, labels.vlen, 123); | |
CTime * timer = new CTime(); | |
timer->start(); | |
SGSparseVector<float64_t> psi = get_sparse_joint_feature_vector( | |
sparse, labels, hash_dim); | |
timer->stop(); | |
float64_t iter_time = timer->cur_time_diff(); | |
SG_SPRINT("[%.1f sec] sparse joint feature vector implementation \n", | |
iter_time); | |
SG_SPRINT("Number of features in sparse vector = %d\n", | |
sparse.num_feat_entries); | |
SG_SPRINT("Number of featurs in joint vector = %d\n", | |
psi.num_feat_entries) | |
SG_UNREF(timer); | |
exit_shogun(); | |
return 0; | |
} | |
Author
abinashpanda
commented
Jun 15, 2014
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment