Skip to content

Instantly share code, notes, and snippets.

@stephenjbarr
Created March 31, 2012 17:22
Show Gist options
  • Save stephenjbarr/2266900 to your computer and use it in GitHub Desktop.
Save stephenjbarr/2266900 to your computer and use it in GitHub Desktop.
using boost tokenizer to parse CSV files into Eigen Matrices
#include <iostream>
#include <iomanip>
#include <fstream>
#include "mkl.h"
#include "math.h"
#include <vector>
#include <cmath>
#include <string>
#include <cstdlib>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <getopt.h>
#define EIGEN_USE_MKL_BLAS
#define EIGEN_USE_MKL_LAPACKE
#define NDEBUG
#ifndef PI
#define PI 3.141592653589793
#endif
#include <Eigen/Dense>
#include <boost/tokenizer.hpp>
using namespace boost;
using namespace Eigen;
using namespace std;
// THIS IS A TYPEDEF FOR A ROWMAJOR MATRIX
typedef Eigen::Matrix<double,Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> MatrixRMXd;
////////////////////////////////////////
// PARSE THE OPTIONS
void parseTheOptions(int argc, char *argv[],
string& fname) {
cout << "This is the argument parsing function" << endl;
// process the command line arguments
while(1)
{
int c;
int digit_optind = 0;
int this_option_optind = optind ? optind : 1;
int option_index = 0;
static struct option long_options[] = {
{"input" , 1, 0, 'u' },
{0,0,0,0}
};
c = getopt_long_only(argc, argv, "abc:d:012",
long_options, &option_index);
if (c == -1)
break;
switch(c)
{
case 'u':
fname = optarg;
break;
}
} // end while
} // end opt parse
////////////////////////////////////////
// double round(double number)
// {
// return number < 0.0 ? ceil(number - 0.5) : floor(number + 0.5);
// }
////////////////////////////////////////
// parse csv file into a Matrix of Integers
// assumes no header
// (yes this is ugly)
// NOTE: assumes that the "right" number of columns is the number
// of columns in the first row
MatrixXi parseCSVfile_int(string infilename) {
ifstream in(infilename.c_str());
if (!in.is_open()) return MatrixXi(1,1);
typedef tokenizer< escaped_list_separator<char> > Tokenizer;
vector< string > vec;
string line;
vector< vector< string > > matrows;
while (getline(in,line))
{
Tokenizer tok(line);
vec.assign(tok.begin(),tok.end());
// // Print each row
// copy(vec.begin(), vec.end(),
// ostream_iterator<string>(cout, "|"));
// cout << "\n----------------------" << endl;
matrows.push_back(vec);
}
in.close();
// FIGURE OUT HOW MANY OF THE ROWS HAVE THE RIGHT NUMBER
// OF COLUMNS
int Nrows = matrows.size();
int Ncols = matrows[0].size();
int Ngoodrows = 0;
for(int i = 0; i < Nrows; i++) {
if(matrows[i].size() == Ncols) {
Ngoodrows++;
}
}
// TRANSFORM THE VECTOR OF ROWS INTO AN EIGEN INTEGER MATRIX
MatrixXi xmat = MatrixXi(Ngoodrows, Ncols);
cout << "INPUT MATRIX: " << Nrows << "x" << Ncols << endl;
int rc = 0;
for(int i = 0; i < Nrows; i++) {
int rowsize = matrows[i].size();
if(rowsize != Ncols) {
cout << "Row " << i << " has bad column count" << endl;
continue;
}
for(int j = 0; j < Ncols; j++) {
xmat(rc,j) = int(round(strtod(matrows[i][j].c_str(), NULL)));
}
rc++;
}
return(xmat);
}
int main(int argc, char **argv)
{
string fname;
parseTheOptions(argc, argv, fname);
MatrixXi parsed = parseCSVfile_int(fname);
cout << "PARSED: " << endl;
cout << parsed << endl << endl;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment