Skip to content

Instantly share code, notes, and snippets.

@mad
Created April 30, 2011 07:18
Show Gist options
  • Save mad/949508 to your computer and use it in GitHub Desktop.
Save mad/949508 to your computer and use it in GitHub Desktop.
#include <iostream> // cout, endl
#include <fstream> // fstream
#include <vector>
#include <string>
#include <algorithm> // copy
#include <iterator> // ostream_operator
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// #include <boost/tokenizer.hpp>
#include <boost/algorithm/string.hpp>
using namespace std;
//using namespace boost;
void boost_tokenizer(string in, string out);
void boost_split(string in, string out);
inline void fast_split(char **fields, char* str);
int main(int argc, char *argv[])
{
string in(argv[1]);
string out(argv[2]);
boost_split(in, out);
return 0;
}
#if 0
void boost_tokenizer(string in_file, string out_file)
{
ifstream in(in_file.c_str());
ofstream out(out_file.c_str());
if (!in.is_open()) return;
escaped_list_separator<char> separator("\\","\t", "\"");
typedef tokenizer< escaped_list_separator<char> > Tokenizer;
vector< string > vec;
string line;
while (getline(in, line))
{
Tokenizer tok(line, separator);
vec.assign(tok.begin(), tok.end());
// vector now contains strings from one row, output to cout here
copy(vec.begin(), vec.end(), ostream_iterator<string>(out, ";"));
out << endl;
}
}
#endif
#define LINE_SIZE 256
#define NUM_FIELDS 8
void boost_split(string in_file, string out_file)
{
ifstream in(in_file.c_str());
ofstream out(out_file.c_str());
char str[LINE_SIZE];
if (!in.is_open()) return;
std::vector< string > vec(8, "");
string line;
char **fields;
// fields = (char **) malloc(10 * sizeof(char**));
while (in.getline(str, LINE_SIZE))
{
boost::split(vec, str, boost::is_any_of("\t"));
// fast_split(fields, str);
// vec.push_back(string("wqe"));
out << vec[0] << ";" << vec[1] << ";" << vec[2] << ";" << vec[3] << ";" << vec[4] << ";" << vec[5] << ";" << vec[6] << ";" << vec[7] << "\n";
// vector now contains strings from one row, output to cout here
// copy(vec.begin(), vec.end(), ostream_iterator<string>(out, ";"));
}
}
// XXX: NOT THREAD SAFED
inline void fast_split(char **fields, char* str)
{
char * pch;
int i = 0;
pch = strtok (str, "\t");
// vector<string>::iterator it;
// it = vec.begin();
// vec.clear();
while (pch != NULL) {
// it = vec.begin();
// vec.insert(it, string(strdup(pch)));
// vec[i] = strdup(pch);
// fields[i] = strdup(pch);
pch = strtok (NULL, "\t");
i++;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment