public
Created

  • Download Gist
benchmark_expand_algorithms.cpp
C++
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
#include <boost/regex.hpp>
#include <boost/range.hpp>
#include <cstring>
#include <cstdio>
#include <chrono>
 
using namespace boost;
using namespace boost::range;
 
static std::string expand(const std::string& in)
{
static const regex re(R"([a-z](?:-[a-z])+|[0-9](?:-[0-9])+)");
 
std::string out;
out.reserve(in.size() + 12); // heuristic
 
auto tail = in.begin();
for (auto match : make_iterator_range(sregex_iterator(in.begin(), in.end(), re), sregex_iterator()))
{
out.append(tail, match[0].first);
 
// char range bounds: the cost of accepting unordered ranges...
char a=127, b=0;
for (auto x=match[0].first; x<match[0].second; x+=2)
{ a = std::min(*x,a); b = std::max(*x,b); }
 
for (char c=a; c<=b; out.push_back(c++));
tail = match.suffix().first;
}
out.append(tail, in.end());
 
return out;
}
 
int alpha_range(char c) { return (c>='a') && (c<='z'); }
int digit_range(char c) { return (c>='0') && (c<='9'); }
 
std::string c_style(const std::string& s)
{
std::string buf;
buf.reserve(s.size() + 12); // heuristic
 
const char* in = s.c_str();
auto out = std::back_inserter(buf);
 
// parser state
int (*predicate)(char) = 0; // either: NULL (free state), alpha_range (in alphabetic range), digit_range (in digit range)
char lower=0,upper=0; // tracks lower and upper bound of character ranges in the range parsing states
 
while (*in)
{
if (!predicate)
{
// free parsing state
if (alpha_range(*in) && (in[1] == '-') && alpha_range(in[2]))
{
lower = upper = *in++;
predicate = &alpha_range;
}
else if (digit_range(*in) && (in[1] == '-') && digit_range(in[2]))
{
lower = upper = *in++;
predicate = &digit_range;
}
else *out++ = *in;
} else
{
// in a range
if (*in < lower) lower = *in;
if (*in > upper) upper = *in;
 
if (in[1] == '-' && predicate(in[2]))
in++; // more coming
else
{
// end of range mode, dump expansion
char c;
for (c=lower; c<=upper; *out++ = c++);
predicate = 0;
}
}
in++;
}
 
*out = 0; // null-terminate buf
return buf;
}
 
// benchmark helpers
static const std::string original_post = "a-z or 0-9 or a-b-c or a-z0-9";
static const std::string cpp_requirements = "This is some e-z test in 5-7 steps; this works: a-b-c. This works too: b-k-c-e. Likewise 8-4-6";
static const std::string torture_tests = "-x-s a-9 9- a-k-9 9-a-c-7-3";
 
template <typename _T>
double time_since(std::chrono::time_point<_T>& time)
{
typedef std::chrono::duration<long, std::ratio<1, 1000>> millisecs;
 
long tm = std::chrono::duration_cast<millisecs>(std::chrono::system_clock::now() - time).count();
time = std::chrono::system_clock::now();
return tm/1000.0;
}
 
void do_bench(std::string (*implementation)(const std::string&))
{
implementation(original_post); // from the original post
implementation(cpp_requirements); // from my C++ answer
implementation(torture_tests); // assorted torture tests
}
 
int main()
{
const size_t iterations = (1ul<<20);
std::cout << "Running benchmarks (" << iterations << " iterations)" << std::endl;
 
auto timestamp = std::chrono::system_clock::now();
 
for(size_t i=0; i< iterations; i++) do_bench(expand);
std::cout << "C++ Boost Style: " << time_since(timestamp) << "s" << std::endl;
 
for(size_t i=0; i< iterations; i++) do_bench(c_style);
std::cout << "Pure C style : " << time_since(timestamp) << "s" << std::endl;
 
std::cout << "\nboost c++: " << expand(original_post) << std::endl;
std::cout << "c_style : " << c_style(original_post) << std::endl;
 
std::cout << "\nboost c++: " << expand(cpp_requirements) << std::endl;
std::cout << "c_style : " << c_style(cpp_requirements) << std::endl;
 
std::cout << "\nboost c++: " << expand(torture_tests) << std::endl;
std::cout << "c_style : " << c_style(torture_tests) << std::endl;
}

The benchmark was designed to compare

Note that I've modified the C version to use std::string for output so we aren't comparing parameter passing performance but merely the algorithm itself. In other words, I tried to create as little differences in test context as possible. (Due to the iterator pattern I used in the pure C version, there very little changes were necessary)

After creating the C version I predicted that the C version would likely outperform the C++ version. It turns out I was pretty close to the mark:

g++ -L ~/custom/boost_1_47_0/stage/lib/ -I ~/custom/boost_1_47_0/ -O4 -g --std=c++0x test.cpp -o test -lboost_regex
Running benchmarks (1048576 iterations)
C++ Boost Style: 17.041s
Pure C style   : 4.546s

boost c++: abcdefghijklmnopqrstuvwxyz or 0123456789 or abc or abcdefghijklmnopqrstuvwxyz0123456789
c_style  : abcdefghijklmnopqrstuvwxyz or 0123456789 or abc or abcdefghijklmnopqrstuvwxyz0123456789

boost c++: This is some efghijklmnopqrstuvwxyz test in 567 steps; this works: abc. This works too: bcdefghijk. Likewise 45678
c_style  : This is some efghijklmnopqrstuvwxyz test in 567 steps; this works: abc. This works too: bcdefghijk. Likewise 45678

boost c++: -stuvwx a-9 9- abcdefghijk-9 9-abc-34567
c_style  : -stuvwx a-9 9- abcdefghijk-9 9-abc-34567

Update I optimized the strchr calls away... Performance is now 6.2x faster for the C version than for the C++ version. We have a winner!

Running benchmarks (1048576 iterations)
C++ Boost Style: 17.061s
Pure C style   : 2.741s

boost c++: abcdefghijklmnopqrstuvwxyz or 0123456789 or abc or abcdefghijklmnopqrstuvwxyz0123456789
c_style  : abcdefghijklmnopqrstuvwxyz or 0123456789 or abc or abcdefghijklmnopqrstuvwxyz0123456789

boost c++: This is some efghijklmnopqrstuvwxyz test in 567 steps; this works: abc. This works too: bcdefghijk. Likewise 45678
c_style  : This is some efghijklmnopqrstuvwxyz test in 567 steps; this works: abc. This works too: bcdefghijk. Likewise 45678

boost c++: -stuvwx a-9 9- abcdefghijk-9 9-abc-34567
c_style  : -stuvwx a-9 9- abcdefghijk-9 9-abc-34567

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.