Skip to content

Instantly share code, notes, and snippets.

@tklein23
Created June 10, 2014 23:26
Show Gist options
  • Save tklein23/44f7b03be81743bd17ca to your computer and use it in GitHub Desktop.
Save tklein23/44f7b03be81743bd17ca to your computer and use it in GitHub Desktop.
/*
* Written (W) 2013 Thoralf Klein <thoralf.klein@zib.de>
* Copyright (C) 2013 Zuse-Institute-Berlin (ZIB)
* Copyright (C) 2013 Thoralf Klein
*/
#include <shogun/lib/Hash.h>
#include <shogun/lib/SGSparseVector.h>
#include <shogun/lib/SGSparseMatrix.h>
using namespace shogun;
void
test_hash ()
{
uint32_t seed = 0xDEADBEAF;
uint32_t number = 0xE5E77E5E;
printf ("hash-test %0x\n",
CHash::MurmurHash3 ((uint8_t *) (&number), 4, seed));
uint8_t array[4] = { 0, 1, 2, 3 };
printf ("hash(3)=%0x\n", CHash::MurmurHash3 (&array[0], 4, 0xDEADBEAF));
// printf ("%d\n", *(uint32_t *) & array[0]);
}
/*
hashed indices are in range [0;hash_size-1]
*/
void
hash_sparse_vector (SGSparseVector < float64_t > &dst,
SGSparseVector < float64_t > &src,
uint32_t seed, uint32_t hash_size)
{
dst.features = SG_MALLOC (SGSparseVectorEntry < float64_t >, src.num_feat_entries);
dst.num_feat_entries = src.num_feat_entries;
memcpy (dst.features, src.features, src.num_feat_entries * sizeof (SGSparseVectorEntry < float64_t >));
//printf("hashed: ");
for (int32_t j = 0; j < src.num_feat_entries; j++)
{
uint32_t mm3 = CHash::MurmurHash3 ((uint8_t *) & src.features[j].feat_index, 4, seed);
dst.features[j].feat_index = (mm3 >> 1) % hash_size;
dst.features[j].entry = (mm3 % 2 == 1 ? -1.0 : 1.0) * src.features[j].entry;
//printf("%10u:%+1.0f -- (%10u %10u:%+1.0f)\n", src.features[j].feat_index, src.features[j].entry, mm3, dst.features[j].feat_index, dst.features[j].entry);
}
}
SGSparseMatrix < float64_t >
hash_sparse_matrix (SGSparseMatrix < float64_t > feats, uint32_t seed,
uint32_t hash_size)
{
ASSERT (hash_size > 0);
// printf ("building hashed matrix size %u, seed %0x:\n", hash_size, seed);
SGSparseMatrix < float64_t > hashed_matrix (hash_size, feats.num_vectors);
for (int32_t i = 0; i < feats.num_vectors; i++)
{
SGSparseVector < float64_t > result_vector;
hash_sparse_vector (result_vector, feats.sparse_matrix[i], seed, hash_size);
result_vector.sort_features();
hashed_matrix.sparse_matrix[i] = result_vector;
}
// print_memory_info ();
printf ("hashed matrix[%u/%0x]: examples %d, dimensions %d\n", hash_size,
seed, hashed_matrix.num_vectors, hashed_matrix.num_features);
return hashed_matrix;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment