Last active
August 29, 2015 14:11
-
-
Save tnarihi/e6e4ebe1f235dc6c6690 to your computer and use it in GitHub Desktop.
An example of parsing LIBSVM input format with Boost library
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* No Copyright | |
*/ | |
#include <boost/algorithm/string.hpp> | |
#include <boost/foreach.hpp> | |
#include <boost/shared_ptr.hpp> | |
#include <boost/lexical_cast.hpp> | |
#include <iostream> // NOLINT(readability/streams) | |
#include <fstream> // NOLINT(readability/streams) | |
#include <string> | |
#include <vector> | |
#include <utility> | |
using boost::shared_ptr; | |
using boost::lexical_cast; | |
using std::pair; | |
using std::vector; | |
using std::string; | |
using std::cout; | |
using std::cerr; | |
using std::endl; | |
typedef vector<float> blob_type; | |
typedef pair<string, shared_ptr<blob_type> > datum_type; | |
typedef vector< shared_ptr<datum_type> > data_type; | |
shared_ptr<datum_type> get_datum_form_line_or_die(string line, int channels) { | |
// buff | |
vector<string> cells, indval; | |
// split lines | |
boost::split(cells, line, boost::is_any_of(" \t")); | |
string label = cells[0]; | |
shared_ptr<blob_type> blob(new blob_type(channels)); | |
// Parse sparse format features | |
int i = 0; | |
BOOST_FOREACH(string cell, cells) { | |
if (i++ == 0) { continue; } | |
boost::split(indval, cell, boost::is_any_of(":")); | |
if (indval.size() != 2) { | |
cerr << "aaaaaa" << cell << endl; | |
exit(1); | |
} | |
unsigned int ind = lexical_cast<unsigned int>(indval[0]); | |
float val = lexical_cast<float>(indval[1]); | |
(*blob)[ind] = val; | |
} | |
return shared_ptr<datum_type>(new datum_type(label, blob)); | |
} | |
void display_data(data_type data) { | |
BOOST_FOREACH(shared_ptr<datum_type> d, data) { | |
cout << d->first << " -- "; | |
BOOST_FOREACH(float v, *(d->second)) { | |
cout << v << " "; | |
} | |
cout << endl; | |
} | |
} | |
int main() { | |
int channels = 123; | |
std::ifstream infile("input.txt"); | |
data_type data; | |
// read datum for each line | |
string line; | |
while (std::getline(infile, line)) { | |
// trim spaces | |
boost::trim(line); | |
// skip empty lines | |
if (line.empty()) { | |
cout << "empty line" << endl; | |
continue; | |
} | |
data.push_back(get_datum_form_line_or_die(line, channels)); | |
} | |
display_data(data); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment