Skip to content

Instantly share code, notes, and snippets.

@abinashpanda
Created June 9, 2014 14:53
Show Gist options
  • Save abinashpanda/7a201f3fabb74e40dbf2 to your computer and use it in GitHub Desktop.
Save abinashpanda/7a201f3fabb74e40dbf2 to your computer and use it in GitHub Desktop.
#include <shogun/io/LineReader.h>
#include <shogun/io/Parser.h>
#include <shogun/lib/DelimiterTokenizer.h>
#include <shogun/lib/SGVector.h>
#include <shogun/lib/SGSparseVector.h>
#include <shogun/io/SGIO.h>
#include <shogun/base/init.h>
using namespace shogun;
void get_sparse_matrix(
FILE * file,
SGSparseVector<float64_t> * &mat_feat,
int32_t &num_feat,
int32_t &num_vec,
SGVector<int32_t> * &multilabel,
int32_t &num_classes)
{
char m_delimiter_feat = ':';
char m_delimiter_label = ',';
CDelimiterTokenizer * m_whitespace_tokenizer = new CDelimiterTokenizer(
true);
m_whitespace_tokenizer->delimiters[' '] = 1;
SG_REF(m_whitespace_tokenizer);
CDelimiterTokenizer * m_delimiter_feat_tokenizer = new CDelimiterTokenizer(
true);
m_delimiter_feat_tokenizer->delimiters[m_delimiter_feat] = 1;
SG_REF(m_delimiter_feat_tokenizer);
CDelimiterTokenizer * m_delimiter_label_tokenizer = new CDelimiterTokenizer(
true);
m_delimiter_label_tokenizer->delimiters[m_delimiter_label] = 1;
SG_REF(m_delimiter_label_tokenizer);
CDelimiterTokenizer * m_line_tokenizer = new CDelimiterTokenizer(
true);
m_line_tokenizer->delimiters['\n'] = 1;
SG_REF(m_line_tokenizer);
CParser * m_parser = new CParser();
SG_REF(m_parser);
CLineReader * m_line_reader = new CLineReader(
file,
m_line_tokenizer);
SG_REF(m_line_reader);
SG_SPRINT(">>> Counting number of lines in the file...\n");
num_vec = 0;
while (m_line_reader->has_next())
{
m_line_reader->skip_line();
num_vec++;
}
m_line_reader->reset();
SG_SPRINT("<<< Counting lines completed.\n");
mat_feat = SG_MALLOC(SGSparseVector<float64_t>, num_vec);
multilabel = SG_MALLOC(SGVector<int32_t>, num_vec);
SG_UNREF(m_whitespace_tokenizer); SG_UNREF(m_delimiter_feat_tokenizer);
SG_UNREF(m_delimiter_label_tokenizer); SG_UNREF(m_line_tokenizer);
SG_UNREF(m_parser); SG_UNREF(m_line_reader);
}
int main(int argc, char ** argv)
{
init_shogun_with_defaults();
sg_io->set_loglevel(MSG_DEBUG);
FILE * file = fopen("sample.svm", "r");
int32_t num_feat, num_vec, num_classes;
SGSparseVector<float64_t> *
mat_feat;
SGVector<int32_t> * multilabel;
get_sparse_matrix(
file,
mat_feat,
num_feat,
num_vec,
multilabel,
num_classes);
SG_SPRINT("Number of vectors/samples are = %d\n", num_vec);
exit_shogun();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment