Created
November 3, 2011 00:14
-
-
Save sehe/1335390 to your computer and use it in GitHub Desktop.
Benchmark for http://stackoverflow.com/questions/7985661/method-for-expand-a-z-to-abc-xyz-form/7988413#7988413
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <boost/regex.hpp> | |
#include <boost/range.hpp> | |
#include <cstring> | |
#include <cstdio> | |
#include <chrono> | |
using namespace boost; | |
using namespace boost::range; | |
static std::string expand(const std::string& in) | |
{ | |
static const regex re(R"([a-z](?:-[a-z])+|[0-9](?:-[0-9])+)"); | |
std::string out; | |
out.reserve(in.size() + 12); // heuristic | |
auto tail = in.begin(); | |
for (auto match : make_iterator_range(sregex_iterator(in.begin(), in.end(), re), sregex_iterator())) | |
{ | |
out.append(tail, match[0].first); | |
// char range bounds: the cost of accepting unordered ranges... | |
char a=127, b=0; | |
for (auto x=match[0].first; x<match[0].second; x+=2) | |
{ a = std::min(*x,a); b = std::max(*x,b); } | |
for (char c=a; c<=b; out.push_back(c++)); | |
tail = match.suffix().first; | |
} | |
out.append(tail, in.end()); | |
return out; | |
} | |
int alpha_range(char c) { return (c>='a') && (c<='z'); } | |
int digit_range(char c) { return (c>='0') && (c<='9'); } | |
std::string c_style(const std::string& s) | |
{ | |
std::string buf; | |
buf.reserve(s.size() + 12); // heuristic | |
const char* in = s.c_str(); | |
auto out = std::back_inserter(buf); | |
// parser state | |
int (*predicate)(char) = 0; // either: NULL (free state), alpha_range (in alphabetic range), digit_range (in digit range) | |
char lower=0,upper=0; // tracks lower and upper bound of character ranges in the range parsing states | |
while (*in) | |
{ | |
if (!predicate) | |
{ | |
// free parsing state | |
if (alpha_range(*in) && (in[1] == '-') && alpha_range(in[2])) | |
{ | |
lower = upper = *in++; | |
predicate = &alpha_range; | |
} | |
else if (digit_range(*in) && (in[1] == '-') && digit_range(in[2])) | |
{ | |
lower = upper = *in++; | |
predicate = &digit_range; | |
} | |
else *out++ = *in; | |
} else | |
{ | |
// in a range | |
if (*in < lower) lower = *in; | |
if (*in > upper) upper = *in; | |
if (in[1] == '-' && predicate(in[2])) | |
in++; // more coming | |
else | |
{ | |
// end of range mode, dump expansion | |
char c; | |
for (c=lower; c<=upper; *out++ = c++); | |
predicate = 0; | |
} | |
} | |
in++; | |
} | |
*out = 0; // null-terminate buf | |
return buf; | |
} | |
// benchmark helpers | |
static const std::string original_post = "a-z or 0-9 or a-b-c or a-z0-9"; | |
static const std::string cpp_requirements = "This is some e-z test in 5-7 steps; this works: a-b-c. This works too: b-k-c-e. Likewise 8-4-6"; | |
static const std::string torture_tests = "-x-s a-9 9- a-k-9 9-a-c-7-3"; | |
template <typename _T> | |
double time_since(std::chrono::time_point<_T>& time) | |
{ | |
typedef std::chrono::duration<long, std::ratio<1, 1000>> millisecs; | |
long tm = std::chrono::duration_cast<millisecs>(std::chrono::system_clock::now() - time).count(); | |
time = std::chrono::system_clock::now(); | |
return tm/1000.0; | |
} | |
void do_bench(std::string (*implementation)(const std::string&)) | |
{ | |
implementation(original_post); // from the original post | |
implementation(cpp_requirements); // from my C++ answer | |
implementation(torture_tests); // assorted torture tests | |
} | |
int main() | |
{ | |
const size_t iterations = (1ul<<20); | |
std::cout << "Running benchmarks (" << iterations << " iterations)" << std::endl; | |
auto timestamp = std::chrono::system_clock::now(); | |
for(size_t i=0; i< iterations; i++) do_bench(expand); | |
std::cout << "C++ Boost Style: " << time_since(timestamp) << "s" << std::endl; | |
for(size_t i=0; i< iterations; i++) do_bench(c_style); | |
std::cout << "Pure C style : " << time_since(timestamp) << "s" << std::endl; | |
std::cout << "\nboost c++: " << expand(original_post) << std::endl; | |
std::cout << "c_style : " << c_style(original_post) << std::endl; | |
std::cout << "\nboost c++: " << expand(cpp_requirements) << std::endl; | |
std::cout << "c_style : " << c_style(cpp_requirements) << std::endl; | |
std::cout << "\nboost c++: " << expand(torture_tests) << std::endl; | |
std::cout << "c_style : " << c_style(torture_tests) << std::endl; | |
} |
Update I optimized the strchr
calls away... Performance is now 6.2x faster for the C version than for the C++ version. We have a winner!
Running benchmarks (1048576 iterations)
C++ Boost Style: 17.061s
Pure C style : 2.741s
boost c++: abcdefghijklmnopqrstuvwxyz or 0123456789 or abc or abcdefghijklmnopqrstuvwxyz0123456789
c_style : abcdefghijklmnopqrstuvwxyz or 0123456789 or abc or abcdefghijklmnopqrstuvwxyz0123456789
boost c++: This is some efghijklmnopqrstuvwxyz test in 567 steps; this works: abc. This works too: bcdefghijk. Likewise 45678
c_style : This is some efghijklmnopqrstuvwxyz test in 567 steps; this works: abc. This works too: bcdefghijk. Likewise 45678
boost c++: -stuvwx a-9 9- abcdefghijk-9 9-abc-34567
c_style : -stuvwx a-9 9- abcdefghijk-9 9-abc-34567
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The benchmark was designed to compare
Note that I've modified the C version to use std::string for output so we aren't comparing parameter passing performance but merely the algorithm itself. In other words, I tried to create as little differences in test context as possible. (Due to the iterator pattern I used in the pure C version, there very little changes were necessary)
After creating the C version I predicted that the C version would likely outperform the C++ version. It turns out I was pretty close to the mark: