sehe/benchmark_expand_algorithms.cpp

## benchmark_expand_algorithms.cpp
#include <boost/regex.hpp>
#include <boost/range.hpp>
#include <cstring>
#include <cstdio>
#include <chrono>

using namespace boost;
using namespace boost::range;

static std::string expand(const std::string& in)
{
    static const regex re(R"([a-z](?:-[a-z])+|[0-9](?:-[0-9])+)");

    std::string out;
    out.reserve(in.size() + 12); // heuristic

    auto tail = in.begin();
    for (auto match : make_iterator_range(sregex_iterator(in.begin(), in.end(), re), sregex_iterator()))
    {
        out.append(tail, match[0].first);

        // char range bounds: the cost of accepting unordered ranges...
        char a=127, b=0;
        for (auto x=match[0].first; x<match[0].second; x+=2)
        { a = std::min(*x,a); b = std::max(*x,b); }

        for (char c=a; c<=b; out.push_back(c++));
        tail = match.suffix().first;
    }
    out.append(tail, in.end());

    return out;
}

int alpha_range(char c) { return (c>='a') && (c<='z'); }
int digit_range(char c) { return (c>='0') && (c<='9'); }

std::string c_style(const std::string& s)
{
    std::string buf;
    buf.reserve(s.size() + 12); // heuristic

    const char* in  = s.c_str();
    auto out = std::back_inserter(buf);

    // parser state
    int (*predicate)(char) = 0; // either: NULL (free state), alpha_range (in alphabetic range), digit_range (in digit range)
    char lower=0,upper=0;       // tracks lower and upper bound of character ranges in the range parsing states

    while (*in)
    {
        if (!predicate)
        {
            // free parsing state
            if (alpha_range(*in) && (in[1] == '-') && alpha_range(in[2]))
            {
                lower = upper = *in++;
                predicate = &alpha_range;
            }
            else if (digit_range(*in) && (in[1] == '-') && digit_range(in[2]))
            {
                lower = upper = *in++;
                predicate = &digit_range;
            }
            else *out++ = *in;
        } else
        {
            // in a range
            if (*in < lower) lower = *in;
            if (*in > upper) upper = *in;

            if (in[1] == '-' && predicate(in[2]))
                in++; // more coming
            else
            {
                // end of range mode, dump expansion
                char c;
                for (c=lower; c<=upper; *out++ = c++);
                predicate = 0;
            }
        }
        in++;
    }

    *out = 0; // null-terminate buf
    return buf;
}

// benchmark helpers
static const std::string original_post    = "a-z or 0-9 or a-b-c or a-z0-9";
static const std::string cpp_requirements = "This is some e-z test in 5-7 steps; this works: a-b-c. This works too: b-k-c-e. Likewise 8-4-6";
static const std::string torture_tests    = "-x-s a-9 9- a-k-9 9-a-c-7-3";

template <typename _T>
    double time_since(std::chrono::time_point<_T>& time)
{
    typedef std::chrono::duration<long, std::ratio<1, 1000>> millisecs;

    long tm = std::chrono::duration_cast<millisecs>(std::chrono::system_clock::now() - time).count();
    time = std::chrono::system_clock::now();
    return tm/1000.0;
}

void do_bench(std::string (*implementation)(const std::string&))
{
    implementation(original_post);    // from the original post
    implementation(cpp_requirements); // from my C++ answer
    implementation(torture_tests);    // assorted torture tests
}

int main()
{
    const size_t iterations = (1ul<<20);
    std::cout << "Running benchmarks (" << iterations << " iterations)" << std::endl;

    auto timestamp = std::chrono::system_clock::now();

    for(size_t i=0; i< iterations; i++) do_bench(expand);
    std::cout << "C++ Boost Style: " << time_since(timestamp) << "s" << std::endl;

    for(size_t i=0; i< iterations; i++) do_bench(c_style);
    std::cout << "Pure C style   : " << time_since(timestamp) << "s" << std::endl;

    std::cout << "\nboost c++: " << expand(original_post)  << std::endl;
    std::cout << "c_style  : "   << c_style(original_post) << std::endl;

    std::cout << "\nboost c++: " << expand(cpp_requirements)  << std::endl;
    std::cout << "c_style  : "   << c_style(cpp_requirements) << std::endl;

    std::cout << "\nboost c++: " << expand(torture_tests)  << std::endl;
    std::cout << "c_style  : "   << c_style(torture_tests) << std::endl;
}
	#include <boost/regex.hpp>
	#include <boost/range.hpp>
	#include <cstring>
	#include <cstdio>
	#include <chrono>

	using namespace boost;
	using namespace boost::range;

	static std::string expand(const std::string& in)
	{
	static const regex re(R"([a-z](?:-[a-z])+\|[0-9](?:-[0-9])+)");

	std::string out;
	out.reserve(in.size() + 12); // heuristic

	auto tail = in.begin();
	for (auto match : make_iterator_range(sregex_iterator(in.begin(), in.end(), re), sregex_iterator()))
	{
	out.append(tail, match[0].first);

	// char range bounds: the cost of accepting unordered ranges...
	char a=127, b=0;
	for (auto x=match[0].first; x<match[0].second; x+=2)
	{ a = std::min(x,a); b = std::max(x,b); }

	for (char c=a; c<=b; out.push_back(c++));
	tail = match.suffix().first;
	}
	out.append(tail, in.end());

	return out;
	}

	int alpha_range(char c) { return (c>='a') && (c<='z'); }
	int digit_range(char c) { return (c>='0') && (c<='9'); }

	std::string c_style(const std::string& s)
	{
	std::string buf;
	buf.reserve(s.size() + 12); // heuristic

	const char* in = s.c_str();
	auto out = std::back_inserter(buf);

	// parser state
	int (*predicate)(char) = 0; // either: NULL (free state), alpha_range (in alphabetic range), digit_range (in digit range)
	char lower=0,upper=0; // tracks lower and upper bound of character ranges in the range parsing states

	while (*in)
	{
	if (!predicate)
	{
	// free parsing state
	if (alpha_range(*in) && (in[1] == '-') && alpha_range(in[2]))
	{
	lower = upper = *in++;
	predicate = &alpha_range;
	}
	else if (digit_range(*in) && (in[1] == '-') && digit_range(in[2]))
	{
	lower = upper = *in++;
	predicate = &digit_range;
	}
	else out++ = in;
	} else
	{
	// in a range
	if (in < lower) lower = in;
	if (in > upper) upper = in;

	if (in[1] == '-' && predicate(in[2]))
	in++; // more coming
	else
	{
	// end of range mode, dump expansion
	char c;
	for (c=lower; c<=upper; *out++ = c++);
	predicate = 0;
	}
	}
	in++;
	}

	*out = 0; // null-terminate buf
	return buf;
	}

	// benchmark helpers
	static const std::string original_post = "a-z or 0-9 or a-b-c or a-z0-9";
	static const std::string cpp_requirements = "This is some e-z test in 5-7 steps; this works: a-b-c. This works too: b-k-c-e. Likewise 8-4-6";
	static const std::string torture_tests = "-x-s a-9 9- a-k-9 9-a-c-7-3";

	template <typename _T>
	double time_since(std::chrono::time_point<_T>& time)
	{
	typedef std::chrono::duration<long, std::ratio<1, 1000>> millisecs;

	long tm = std::chrono::duration_cast<millisecs>(std::chrono::system_clock::now() - time).count();
	time = std::chrono::system_clock::now();
	return tm/1000.0;
	}

	void do_bench(std::string (*implementation)(const std::string&))
	{
	implementation(original_post); // from the original post
	implementation(cpp_requirements); // from my C++ answer
	implementation(torture_tests); // assorted torture tests
	}

	int main()
	{
	const size_t iterations = (1ul<<20);
	std::cout << "Running benchmarks (" << iterations << " iterations)" << std::endl;

	auto timestamp = std::chrono::system_clock::now();

	for(size_t i=0; i< iterations; i++) do_bench(expand);
	std::cout << "C++ Boost Style: " << time_since(timestamp) << "s" << std::endl;

	for(size_t i=0; i< iterations; i++) do_bench(c_style);
	std::cout << "Pure C style : " << time_since(timestamp) << "s" << std::endl;

	std::cout << "\nboost c++: " << expand(original_post) << std::endl;
	std::cout << "c_style : " << c_style(original_post) << std::endl;

	std::cout << "\nboost c++: " << expand(cpp_requirements) << std::endl;
	std::cout << "c_style : " << c_style(cpp_requirements) << std::endl;

	std::cout << "\nboost c++: " << expand(torture_tests) << std::endl;
	std::cout << "c_style : " << c_style(torture_tests) << std::endl;
	}