Skip to content

Instantly share code, notes, and snippets.

@tcbrindle
Forked from jbarczak/Tokenizer_Comparison.cpp
Last active May 12, 2016 21:25
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save tcbrindle/56276f1a727e97c93203 to your computer and use it in GitHub Desktop.
Save tcbrindle/56276f1a727e97c93203 to your computer and use it in GitHub Desktop.
#include <chrono>
#include <cstring>
#include <string>
#include <fstream>
#include <istream>
#include <iostream>
#include <sstream>
#include <boost/tokenizer.hpp>
using namespace std;
struct timer {
using clock = std::chrono::high_resolution_clock;
template <class DurationType = std::chrono::nanoseconds>
auto elapsed() const {
return std::chrono::duration_cast<DurationType>(clock::now() - start_);
}
void reset() { start_ = clock::now(); }
private:
std::chrono::time_point<clock> start_ = clock::now();
};
// 4 statements
void DoBoost( std::ofstream& cout, std::string& text )
{
boost::char_separator<char> sep(" \n\t\r\f");
boost::tokenizer<boost::char_separator<char>> tokens(text, sep);
for (const auto& t : tokens) {
cout << t ;
}
}
// 6 statements
void DoIterator(std::ofstream& cout, std::string& str )
{
// construct a stream from the string
std::stringstream strstr(str);
// use stream iterators to copy the stream to the vector as whitespace separated strings
std::istream_iterator<std::string> it(strstr);
std::istream_iterator<std::string> end;
std::vector<std::string> results(it, end);
// send the vector to stdout.
std::ostream_iterator<std::string> oit(cout);
std::copy(results.begin(), results.end(), oit);
}
// 4 statements
void DoIteratorCorrectly(std::ofstream& cout, std::string& str )
{
// construct a stream from the string
std::stringstream strstr(str);
// use stream iterators to read individual strings
std::istream_iterator<std::string> it(strstr);
std::istream_iterator<std::string> end;
std::for_each( it, end, [&cout]( const std::string& str ) { cout << str; } );
}
// 7 statements
void DoStrtok(std::ofstream& cout, std::string& str)
{
char* pMutableString = (char*) malloc( str.size()+1 );
strcpy( pMutableString, str.c_str() );
char *p = strtok(pMutableString, " \n\t\r\f");
while (p) {
cout << p;
p = strtok(NULL, " \n\t\r\f");
}
free(pMutableString);
}
// 7 statements
static bool IsDelim( char tst )
{
const char* DELIMS = " \n\t\r\f";
do // Delimiter string cannot be empty, so don't check for it
{
if( tst == *DELIMS )
return true;
++DELIMS;
} while( *DELIMS );
return false;
}
static bool IsDelim_STDFind( char tst )
{
// For those of you who insist that nobody can ever outperform
// the standard algorithms... std::find clocks in about 25% slower
const char* DELIMS = " \n\t\r\f";
const char* END = DELIMS+5;
return std::find( DELIMS, END, tst ) != END;
}
// 14 statements
void DoJoshsWay( std::ofstream& cout, std::string& str)
{
char* pMutableString = (char*) malloc( str.size()+1 );
strcpy( pMutableString, str.c_str() );
char* p = pMutableString;
// skip leading delimiters
while( *p && IsDelim(*p) )
++p;
while( *p )
{
// note start of token
char* pTok = p;
do// skip non-delimiters
{
++p;
} while( !IsDelim(*p) && *p );
// clobber trailing delimiter with null
*p = 0;
cout << pTok; // send the token
do // skip null, and any subsequent trailing delimiters
{
++p;
} while( *p && IsDelim(*p) );
}
free(pMutableString);
}
// 5 statements
template <class InputIt, class ForwardIt, class BinOp>
void for_each_token(InputIt first, InputIt last,
ForwardIt d_first, ForwardIt d_last,
BinOp binary_op)
{
while (first != last) {
const auto pos = find_first_of(first, last, d_first, d_last);
binary_op(first, pos);
if (pos == last) break;
first = next(pos);
}
}
// 2 statements
void DoTristansWay(std::ofstream& cout, std::string str)
{
constexpr char delims[] = " \n\t\r\f";
for_each_token(cbegin(str), cend(str),
cbegin(delims), cend(delims),
[&cout] (auto first, auto second) {
if (first != second) cout << string(first, second);
});
}
int main()
{
std::ifstream t("crytek_sponza.obj");
std::stringstream str;
str << t.rdbuf();
std::string text = str.str();
FILE* fp = fopen("times.csv", "w");
fprintf(fp,"boost,iterators,iterators_right,strtok,josh,tristan\n");
for( size_t i=0; i<5; i++ )
{
std::chrono::nanoseconds tm;
{
timer t;
std::ofstream out_boost("out_boost.txt");
DoBoost( out_boost, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_iter("out_iter.txt");
DoIterator( out_iter, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_split("out_iter_right.txt");
DoIteratorCorrectly( out_split, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_strtok("out_strtok.txt");
DoStrtok( out_strtok, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_josh("out_josh.txt");
DoJoshsWay( out_josh, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
{
timer t;
std::ofstream out_tristan("out_tristan.txt");
DoTristansWay( out_tristan, text );
tm = t.elapsed();
}
fprintf(fp,"%lld,", tm.count() );
fprintf(fp,"\n");
}
return 0;
}
@derekxgl
Copy link

fprintf's format should be "%ld" instead of "%lld".

@brevzin
Copy link

brevzin commented May 12, 2016

You should change cout << string(first, second) to cout << write(&*first, second - first). No need to allocate a new string every time just to print it! It's another 1.4x speedup on top of your improvements...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment