Skip to content

Instantly share code, notes, and snippets.

@tnarihi
Last active August 29, 2015 14:11
Show Gist options
  • Save tnarihi/e6e4ebe1f235dc6c6690 to your computer and use it in GitHub Desktop.
Save tnarihi/e6e4ebe1f235dc6c6690 to your computer and use it in GitHub Desktop.
An example of parsing LIBSVM input format with Boost library
/*
* No Copyright
*/
#include <boost/algorithm/string.hpp>
#include <boost/foreach.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/lexical_cast.hpp>
#include <iostream> // NOLINT(readability/streams)
#include <fstream> // NOLINT(readability/streams)
#include <string>
#include <vector>
#include <utility>
using boost::shared_ptr;
using boost::lexical_cast;
using std::pair;
using std::vector;
using std::string;
using std::cout;
using std::cerr;
using std::endl;
typedef vector<float> blob_type;
typedef pair<string, shared_ptr<blob_type> > datum_type;
typedef vector< shared_ptr<datum_type> > data_type;
shared_ptr<datum_type> get_datum_form_line_or_die(string line, int channels) {
// buff
vector<string> cells, indval;
// split lines
boost::split(cells, line, boost::is_any_of(" \t"));
string label = cells[0];
shared_ptr<blob_type> blob(new blob_type(channels));
// Parse sparse format features
int i = 0;
BOOST_FOREACH(string cell, cells) {
if (i++ == 0) { continue; }
boost::split(indval, cell, boost::is_any_of(":"));
if (indval.size() != 2) {
cerr << "aaaaaa" << cell << endl;
exit(1);
}
unsigned int ind = lexical_cast<unsigned int>(indval[0]);
float val = lexical_cast<float>(indval[1]);
(*blob)[ind] = val;
}
return shared_ptr<datum_type>(new datum_type(label, blob));
}
void display_data(data_type data) {
BOOST_FOREACH(shared_ptr<datum_type> d, data) {
cout << d->first << " -- ";
BOOST_FOREACH(float v, *(d->second)) {
cout << v << " ";
}
cout << endl;
}
}
int main() {
int channels = 123;
std::ifstream infile("input.txt");
data_type data;
// read datum for each line
string line;
while (std::getline(infile, line)) {
// trim spaces
boost::trim(line);
// skip empty lines
if (line.empty()) {
cout << "empty line" << endl;
continue;
}
data.push_back(get_datum_form_line_or_die(line, channels));
}
display_data(data);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment