Created
November 3, 2011 00:14
-
-
Save sehe/1335390 to your computer and use it in GitHub Desktop.
Benchmark for http://stackoverflow.com/questions/7985661/method-for-expand-a-z-to-abc-xyz-form/7988413#7988413
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <boost/regex.hpp> | |
#include <boost/range.hpp> | |
#include <cstring> | |
#include <cstdio> | |
#include <chrono> | |
using namespace boost; | |
using namespace boost::range; | |
static std::string expand(const std::string& in) | |
{ | |
static const regex re(R"([a-z](?:-[a-z])+|[0-9](?:-[0-9])+)"); | |
std::string out; | |
out.reserve(in.size() + 12); // heuristic | |
auto tail = in.begin(); | |
for (auto match : make_iterator_range(sregex_iterator(in.begin(), in.end(), re), sregex_iterator())) | |
{ | |
out.append(tail, match[0].first); | |
// char range bounds: the cost of accepting unordered ranges... | |
char a=127, b=0; | |
for (auto x=match[0].first; x<match[0].second; x+=2) | |
{ a = std::min(*x,a); b = std::max(*x,b); } | |
for (char c=a; c<=b; out.push_back(c++)); | |
tail = match.suffix().first; | |
} | |
out.append(tail, in.end()); | |
return out; | |
} | |
int alpha_range(char c) { return (c>='a') && (c<='z'); } | |
int digit_range(char c) { return (c>='0') && (c<='9'); } | |
std::string c_style(const std::string& s) | |
{ | |
std::string buf; | |
buf.reserve(s.size() + 12); // heuristic | |
const char* in = s.c_str(); | |
auto out = std::back_inserter(buf); | |
// parser state | |
int (*predicate)(char) = 0; // either: NULL (free state), alpha_range (in alphabetic range), digit_range (in digit range) | |
char lower=0,upper=0; // tracks lower and upper bound of character ranges in the range parsing states | |
while (*in) | |
{ | |
if (!predicate) | |
{ | |
// free parsing state | |
if (alpha_range(*in) && (in[1] == '-') && alpha_range(in[2])) | |
{ | |
lower = upper = *in++; | |
predicate = &alpha_range; | |
} | |
else if (digit_range(*in) && (in[1] == '-') && digit_range(in[2])) | |
{ | |
lower = upper = *in++; | |
predicate = &digit_range; | |
} | |
else *out++ = *in; | |
} else | |
{ | |
// in a range | |
if (*in < lower) lower = *in; | |
if (*in > upper) upper = *in; | |
if (in[1] == '-' && predicate(in[2])) | |
in++; // more coming | |
else | |
{ | |
// end of range mode, dump expansion | |
char c; | |
for (c=lower; c<=upper; *out++ = c++); | |
predicate = 0; | |
} | |
} | |
in++; | |
} | |
*out = 0; // null-terminate buf | |
return buf; | |
} | |
// benchmark helpers | |
static const std::string original_post = "a-z or 0-9 or a-b-c or a-z0-9"; | |
static const std::string cpp_requirements = "This is some e-z test in 5-7 steps; this works: a-b-c. This works too: b-k-c-e. Likewise 8-4-6"; | |
static const std::string torture_tests = "-x-s a-9 9- a-k-9 9-a-c-7-3"; | |
template <typename _T> | |
double time_since(std::chrono::time_point<_T>& time) | |
{ | |
typedef std::chrono::duration<long, std::ratio<1, 1000>> millisecs; | |
long tm = std::chrono::duration_cast<millisecs>(std::chrono::system_clock::now() - time).count(); | |
time = std::chrono::system_clock::now(); | |
return tm/1000.0; | |
} | |
void do_bench(std::string (*implementation)(const std::string&)) | |
{ | |
implementation(original_post); // from the original post | |
implementation(cpp_requirements); // from my C++ answer | |
implementation(torture_tests); // assorted torture tests | |
} | |
int main() | |
{ | |
const size_t iterations = (1ul<<20); | |
std::cout << "Running benchmarks (" << iterations << " iterations)" << std::endl; | |
auto timestamp = std::chrono::system_clock::now(); | |
for(size_t i=0; i< iterations; i++) do_bench(expand); | |
std::cout << "C++ Boost Style: " << time_since(timestamp) << "s" << std::endl; | |
for(size_t i=0; i< iterations; i++) do_bench(c_style); | |
std::cout << "Pure C style : " << time_since(timestamp) << "s" << std::endl; | |
std::cout << "\nboost c++: " << expand(original_post) << std::endl; | |
std::cout << "c_style : " << c_style(original_post) << std::endl; | |
std::cout << "\nboost c++: " << expand(cpp_requirements) << std::endl; | |
std::cout << "c_style : " << c_style(cpp_requirements) << std::endl; | |
std::cout << "\nboost c++: " << expand(torture_tests) << std::endl; | |
std::cout << "c_style : " << c_style(torture_tests) << std::endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Update I optimized the
strchr
calls away... Performance is now 6.2x faster for the C version than for the C++ version. We have a winner!