Skip to content

Instantly share code, notes, and snippets.

@abinashpanda
Created May 10, 2014 10:08
Show Gist options
  • Save abinashpanda/94f422a8f7562109a024 to your computer and use it in GitHub Desktop.
Save abinashpanda/94f422a8f7562109a024 to your computer and use it in GitHub Desktop.
#include <shogun/base/init.h>
#include <shogun/features/SparseFeatures.h>
#include <shogun/io/SGIO.h>
#include <shogun/mathematics/Math.h>
#include <shogun/structure/MulticlassSOLabels.h>
#include <shogun/structure/MulticlassModel.h>
#include <shogun/structure/DualLibQPBMSOSVM.h>
#include <shogun/structure/StochasticSOSVM.h>
#include <shogun/labels/BinaryLabels.h>
#include <shogun/labels/MulticlassLabels.h>
#include <shogun/lib/Time.h>
#include <shogun/features/HashedSparseFeatures.h>
#define DIMS 100
#define HASH_DIMS 90
#define NUM_SAMPLES 1000
using namespace shogun;
void gen_rand_data(SGVector<float64_t>& labs, SGMatrix<float64_t>& feats)
{
float64_t mean;
float64_t std;
for (int i=0; i<NUM_SAMPLES; i++)
{
for (int j=0; j<DIMS; j++)
{
mean = CMath::random(-100, 100);
std = CMath::random(1, 5);
feats[(j*NUM_SAMPLES)+i] = CMath::normal_random(mean, std);
}
if (i < NUM_SAMPLES/2)
labs[i] = 0;
else
labs[i] = 1;
}
}
int main(int argc, char** argv)
{
init_shogun_with_defaults();
SGVector<float64_t> labs(NUM_SAMPLES);
SGMatrix<float64_t> feats(DIMS, NUM_SAMPLES);
gen_rand_data(labs, feats);
//SGVector<float64_t>::display_vector(labs);
//SGMatrix<float64_t>::display_matrix(feats);
CSparseFeatures<float64_t>* features = new CSparseFeatures<float64_t>(feats);
SG_REF(features);
CHashedSparseFeatures<float64_t>* hashed_features = new CHashedSparseFeatures<float64_t>(feats, HASH_DIMS);
SG_REF(hashed_features);
CMulticlassSOLabels* labels = new CMulticlassSOLabels(labs);
SG_REF(labels);
CMulticlassModel* model = new CMulticlassModel(features, labels);
SG_REF(model);
CMulticlassModel* hashed_model = new CMulticlassModel(hashed_features, labels);
SG_REF(hashed_model);
CDualLibQPBMSOSVM* bundle = new CDualLibQPBMSOSVM(model, labels, 100);
SG_REF(bundle);
bundle->set_verbose(true);
//bundle->train();
CStochasticSOSVM* sosvm = new CStochasticSOSVM(model, labels);
SG_REF(sosvm);
CStochasticSOSVM* hashed_sosvm = new CStochasticSOSVM(hashed_model, labels);
SG_REF(hashed_sosvm);
CTime start;
sosvm->train();
float64_t time1 = start.cur_time_diff(false);
hashed_sosvm->train();
float64_t time2 = start.cur_time_diff(false);
SG_SPRINT("Training Time for SO-SGD = %f\n", time1);
SG_SPRINT("Training Time for SO-SGD with hashed features = %f\n", time2-time1);
CStructuredLabels* out = CLabelsFactory::to_structured(sosvm->apply());
CStructuredLabels* hashed_out = CLabelsFactory::to_structured(hashed_sosvm->apply());
int32_t count = 0, hashed_count = 0;
CRealNumber* real;
for (int i=0; i<out->get_num_labels(); i++)
{
real = CRealNumber::obtain_from_generic(out->get_label(i));
if (labs[i] == real->value)
{
//SG_SPRINT("actual = %f | predicted = %f | correct\n",
// labs[i], real->value);
}
else
{
//SG_SPRINT("actual = %f | predicted = %f | incorrect\n",
// labs[i], real->value);
count++;
}
real = CRealNumber::obtain_from_generic(hashed_out->get_label(i));
if (labs[i] == real->value)
{
//SG_SPRINT("actual = %f | predicted = %f | correct\n",
// labs[i], real->value);
}
else
{
//SG_SPRINT("actual = %f | predicted = %f | incorrect\n",
// labs[i], real->value);
hashed_count++;
}
}
SG_SPRINT("Total number of incorrect predictions = %d\n", count);
SG_SPRINT("Total number of incorrect predictions with hashed features = %d\n", hashed_count);
SG_UNREF(features);
SG_UNREF(hashed_features);
SG_UNREF(labels);
SG_UNREF(model);
SG_UNREF(hashed_model);
SG_UNREF(bundle);
SG_UNREF(sosvm);
SG_UNREF(hashed_sosvm);
SG_UNREF(out);
SG_UNREF(hashed_out);
SG_UNREF(real);
exit_shogun();
return 0;
}
@tklein23
Copy link

Regarding line 56:
Please note that the shogun classes don't allow to set a seed for hashing. That's why I'm asking you to write a method which allows to hash-transform individual feature vectors.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment