eskil/chunkserv.cc

## chunkserv.cc
/*
  C++11 version of chunkserv.util.TimeWindowedStats

 A dumb test that feeds a ton of entries into both the python and C++ version shows that;

 Python
 - insert_events time 0.237569570541
 - compute_stats time 19.1441180706
 C++
 - insert_events time 0.234213590622
 - compute_stats time 10.5404567719

 So roughly 2x faster at doing the compute_stats. This is with -g and no optimisation. With -O3 ;

 C++
 - insert_events time 0.074560880661
 - compute_stats time 0.906313896179
 */

#include "chunkserv.hh"

#include <stdio.h>
#include <cmath>
#include <sys/time.h>
#include <boost/format.hpp>
#include <boost/python.hpp>
#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
#include <boost/python/suite/indexing/map_indexing_suite.hpp>
#include <boost/assert.hpp>

namespace {
    using namespace boost::python;

    template<class T1, class T2>
    struct PairToTupleConverter {
        static PyObject* convert(const std::pair<T1, T2>& pair) {
            return incref(make_tuple(pair.first, pair.second).ptr());
        }
    };
}

// Python bindings
BOOST_PYTHON_MODULE(chunkserv_cxx) {
    using namespace boost::python;

    // For the unit-tests, we need to expose some of the internal types. This is because the unit-tests were
    // written in python for the python version of the class. So they inspect some of the guts, and there
    // We need to expose all the internal types.

    // Expose windows_list_t to python
    class_<yelp::chunkserv::TimeWindowedStats::windows_list_t> ("WindowsVector")
        .def(vector_indexing_suite<yelp::chunkserv::TimeWindowedStats::windows_list_t>())
        ;

    // Expose entry_list_t to python. Passes true to have boost.python magically look for type
    // converters. In this case the std::pairs that it contains, which is defined using the
    // to_python_converter later.
    class_<yelp::chunkserv::TimeWindowedStats::entry_list_t> ("EntryVector")
        .def(vector_indexing_suite<yelp::chunkserv::TimeWindowedStats::entry_list_t, true>())
        ;

    // And also export time_windows_t
    class_<yelp::chunkserv::TimeWindowedStats::time_windows_t> ("TimeWindowsMap")
        .def(map_indexing_suite<yelp::chunkserv::TimeWindowedStats::time_windows_t, true>())
        ;

    // magic that converts entry_t to a corresponding python tuple. Used by EntryVector
    to_python_converter<yelp::chunkserv::TimeWindowedStats::entry_t, PairToTupleConverter<yelp::chunkserv::TimeWindowedStats::entry_t::first_type, yelp::chunkserv::TimeWindowedStats::entry_t::second_type> >();

    scope().attr("__doc__") = "Hey I'm a docstring for this module";

    class_<yelp::chunkserv::TimeWindowedStats>("TimeWindowedStats", "A class which holds a window of statistics, and computes stats on it.")
        .def("insert_event", &yelp::chunkserv::TimeWindowedStats::insert_event, "docstrings go here..")
        .def("compute_stats", &yelp::chunkserv::TimeWindowedStats::compute_stats, "docstrings go here..")
        .def_readonly("WINDOWS", &yelp::chunkserv::TimeWindowedStats::windows, "docstrings go here..")
        .def_readonly("time_windows", &yelp::chunkserv::TimeWindowedStats::time_windows, "docstrings go here..")
        ;

}


namespace {
    using namespace yelp::chunkserv;


    inline double time_time() {
        struct timeval tv;
        ::gettimeofday(&tv, 0);
        return tv.tv_sec + tv.tv_usec/1000000.0;
    }


    double get_std_dev(double count, double mean, const TimeWindowedStats::entry_list_t &window) {
        if (count <= 1.0) {
            return 0.0;
        }

        auto variances = 0.0;
#ifdef CXX11_NO_FOR_RANGE
        for (auto val_it = window.cbegin(); val_it != window.cend(); ++val_it) {
            const auto &val = *val_it;
#else
        for (const auto &val: window) {
#endif
            variances += std::pow((val.first - mean), 2);
        }
        auto variance = variances / double(count - 1);
        return sqrt(variance);
    }


    double get_percentile(const TimeWindowedStats::entry_list_t &window, int percentile) {
        if (window.size() == 0) {
            return std::numeric_limits<double>::quiet_NaN();
        }

        auto count = window.size() - 1;
        auto index = percentile / 100.0 * count;
        if (index <= 0.0) {
            return window.front().first;
        }
        if (index >= count) {
            return window.back().first;
        }

        auto lo_index = static_cast<unsigned int> (std::floor(index));
        auto hi_index = static_cast<unsigned int> (std::ceil(index));
        auto weight = index - lo_index;

        if (lo_index < 0) {
            return window.front().first;
        }
        if (hi_index > count) {
            return window.back().first;
        }

        auto lo = window[lo_index].first;
        auto hi = window[hi_index].first;
        if (lo == hi) {
            return lo;
        }
        return lo * (1 - weight) + hi * weight;
    }
}


namespace yelp {
    namespace chunkserv {
        TimeWindowedStats::TimeWindowedStats() : init_time (time_time()) {
#ifdef CXX11_NO_FOR_RANGE
            percentiles.push_back(50);
            percentiles.push_back(75);
            percentiles.push_back(95);
            percentiles.push_back(99);
            windows.push_back(60);
            windows.push_back(300);
#else
            percentiles = {50, 75, 95, 99};
            windows = {60, 300};
#endif
        }


        void TimeWindowedStats::cull_old_events(timestamp_t now) {
            for (auto wit = windows.cbegin(); wit != windows.cend(); ++wit) {
                auto &window = time_windows[*wit];
                double lowest_time_allowed = now - *wit;

                // We loop over the list until we've reached an recent enough events. And while
                // doing that, keep track of the sum. This lets us update the window and
                // time_windows_sum quickly.
                value_t sum = 0.0;
                auto end = window.begin();
                for (; end != window.end(); ++end) {
                    if (end->second >= lowest_time_allowed) break;
                    sum += end->first;
                }

                time_windows_sums[*wit] -= sum;
                window.erase(window.begin(), end);

                // shrink
                entry_list_t tmp(window);
                window.swap(tmp);
            }
        }


        void TimeWindowedStats::insert_event(timestamp_t ts, value_t val) {
#ifdef CXX11_NO_INITIALISER_LISTS
            for (auto window_size_it = windows.cbegin(); window_size_it != windows.cend(); ++window_size_it) {
                const auto &window_size = *window_size_it;
#else
            for (const auto &window_size: windows) {
#endif
                // I tried keeping 2 lists, one new and one old. When calling compute_stats, I could
                // sort the new and compare the head of that with the tail of old. That let me avoid
                // resorting the entire set. It did not improve perf, just added a 8% slowdown.
                time_windows[window_size].push_back(entry_t(val, ts));
                time_windows_sums[window_size] += val;
            }
        }


        boost::python::dict TimeWindowedStats::compute_stats() {
            using namespace boost::python;

            auto stats = list();
            auto total_data_points = 0U;

            auto now = time_time();
            cull_old_events(now);

#ifdef CXX11_NO_INITIALISER_LISTS
            for (auto window_size_it = windows.cbegin(); window_size_it != windows.cend(); ++window_size_it) {
                const auto &window_size = *window_size_it;
#else
            for (const auto &window_size: windows) {
#endif
                auto window = entry_list_t(time_windows[window_size]);
                std::sort(window.begin(), window.end());
                auto count = window.size();
                total_data_points += count;
                auto total = time_windows_sums[window_size];
                auto mean = (count > 0.0) ? (double(total) / double(count)) : 0;
                auto std_dev = get_std_dev(count, mean, window);

                dict win_stats = dict();
                win_stats["mean"] = mean;
                win_stats["std_dev"] = std_dev;
                win_stats["rate_per_sec"] = double(count) / double(window_size);

#ifdef CXX11_NO_INITIALISER_LISTS
                for (auto percentile_it = percentiles.cbegin(); percentile_it != percentiles.cend(); ++percentile_it) {
                    const auto &percentile = *percentile_it;
#else
                for (const auto &percentile: percentiles) {
#endif
                    char name[128];
                    auto sz = snprintf(name, sizeof(name), "%dth", percentile);
                    BOOST_ASSERT(sz < static_cast<int>(sizeof(name)));

                    auto pct = get_percentile(window, percentile);
                    if (pct == std::numeric_limits<double>::quiet_NaN()) {
                        // If there's not valid percentile, return 'U', which is magic RRD value for undefined.
                        win_stats[name] = "U";
                    } else {
                        win_stats[name] = get_percentile(window, percentile);
                    }
                }

                char stat_name[128];
                int sz = snprintf(stat_name, sizeof(stat_name), "%ds_window", window_size);
                BOOST_ASSERT(sz < static_cast<int>(sizeof(stat_name)));

                dict stat = dict();
                stat["name"] = stat_name;
                stat["stat_type"] = 1; // STAT_TYPE_PERCENTILE :-(
                stat["stats"] = win_stats;
                stats.append(stat);
            }

            auto result = dict();
            result["timestamp"] = now;
            result["stats"] = stats;
            result["data_point_count"] = total_data_points;;
            return result;
        }
    }
}
	/*
	C++11 version of chunkserv.util.TimeWindowedStats

	A dumb test that feeds a ton of entries into both the python and C++ version shows that;

	Python
	- insert_events time 0.237569570541
	- compute_stats time 19.1441180706
	C++
	- insert_events time 0.234213590622
	- compute_stats time 10.5404567719

	So roughly 2x faster at doing the compute_stats. This is with -g and no optimisation. With -O3 ;

	C++
	- insert_events time 0.074560880661
	- compute_stats time 0.906313896179
	*/

	#include "chunkserv.hh"

	#include <stdio.h>
	#include <cmath>
	#include <sys/time.h>
	#include <boost/format.hpp>
	#include <boost/python.hpp>
	#include <boost/python/suite/indexing/vector_indexing_suite.hpp>
	#include <boost/python/suite/indexing/map_indexing_suite.hpp>
	#include <boost/assert.hpp>

	namespace {
	using namespace boost::python;

	template<class T1, class T2>
	struct PairToTupleConverter {
	static PyObject* convert(const std::pair<T1, T2>& pair) {
	return incref(make_tuple(pair.first, pair.second).ptr());
	}
	};
	}

	// Python bindings
	BOOST_PYTHON_MODULE(chunkserv_cxx) {
	using namespace boost::python;

	// For the unit-tests, we need to expose some of the internal types. This is because the unit-tests were
	// written in python for the python version of the class. So they inspect some of the guts, and there
	// We need to expose all the internal types.

	// Expose windows_list_t to python
	class_<yelp::chunkserv::TimeWindowedStats::windows_list_t> ("WindowsVector")
	.def(vector_indexing_suite<yelp::chunkserv::TimeWindowedStats::windows_list_t>())
	;

	// Expose entry_list_t to python. Passes true to have boost.python magically look for type
	// converters. In this case the std::pairs that it contains, which is defined using the
	// to_python_converter later.
	class_<yelp::chunkserv::TimeWindowedStats::entry_list_t> ("EntryVector")
	.def(vector_indexing_suite<yelp::chunkserv::TimeWindowedStats::entry_list_t, true>())
	;

	// And also export time_windows_t
	class_<yelp::chunkserv::TimeWindowedStats::time_windows_t> ("TimeWindowsMap")
	.def(map_indexing_suite<yelp::chunkserv::TimeWindowedStats::time_windows_t, true>())
	;

	// magic that converts entry_t to a corresponding python tuple. Used by EntryVector
	to_python_converter<yelp::chunkserv::TimeWindowedStats::entry_t, PairToTupleConverter<yelp::chunkserv::TimeWindowedStats::entry_t::first_type, yelp::chunkserv::TimeWindowedStats::entry_t::second_type> >();

	scope().attr("__doc__") = "Hey I'm a docstring for this module";

	class_<yelp::chunkserv::TimeWindowedStats>("TimeWindowedStats", "A class which holds a window of statistics, and computes stats on it.")
	.def("insert_event", &yelp::chunkserv::TimeWindowedStats::insert_event, "docstrings go here..")
	.def("compute_stats", &yelp::chunkserv::TimeWindowedStats::compute_stats, "docstrings go here..")
	.def_readonly("WINDOWS", &yelp::chunkserv::TimeWindowedStats::windows, "docstrings go here..")
	.def_readonly("time_windows", &yelp::chunkserv::TimeWindowedStats::time_windows, "docstrings go here..")
	;

	}


	namespace {
	using namespace yelp::chunkserv;


	inline double time_time() {
	struct timeval tv;
	::gettimeofday(&tv, 0);
	return tv.tv_sec + tv.tv_usec/1000000.0;
	}


	double get_std_dev(double count, double mean, const TimeWindowedStats::entry_list_t &window) {
	if (count <= 1.0) {
	return 0.0;
	}

	auto variances = 0.0;
	#ifdef CXX11_NO_FOR_RANGE
	for (auto val_it = window.cbegin(); val_it != window.cend(); ++val_it) {
	const auto &val = *val_it;
	#else
	for (const auto &val: window) {
	#endif
	variances += std::pow((val.first - mean), 2);
	}
	auto variance = variances / double(count - 1);
	return sqrt(variance);
	}


	double get_percentile(const TimeWindowedStats::entry_list_t &window, int percentile) {
	if (window.size() == 0) {
	return std::numeric_limits<double>::quiet_NaN();
	}

	auto count = window.size() - 1;
	auto index = percentile / 100.0 * count;
	if (index <= 0.0) {
	return window.front().first;
	}
	if (index >= count) {
	return window.back().first;
	}

	auto lo_index = static_cast<unsigned int> (std::floor(index));
	auto hi_index = static_cast<unsigned int> (std::ceil(index));
	auto weight = index - lo_index;

	if (lo_index < 0) {
	return window.front().first;
	}
	if (hi_index > count) {
	return window.back().first;
	}

	auto lo = window[lo_index].first;
	auto hi = window[hi_index].first;
	if (lo == hi) {
	return lo;
	}
	return lo * (1 - weight) + hi * weight;
	}
	}


	namespace yelp {
	namespace chunkserv {
	TimeWindowedStats::TimeWindowedStats() : init_time (time_time()) {
	#ifdef CXX11_NO_FOR_RANGE
	percentiles.push_back(50);
	percentiles.push_back(75);
	percentiles.push_back(95);
	percentiles.push_back(99);
	windows.push_back(60);
	windows.push_back(300);
	#else
	percentiles = {50, 75, 95, 99};
	windows = {60, 300};
	#endif
	}


	void TimeWindowedStats::cull_old_events(timestamp_t now) {
	for (auto wit = windows.cbegin(); wit != windows.cend(); ++wit) {
	auto &window = time_windows[*wit];
	double lowest_time_allowed = now - *wit;

	// We loop over the list until we've reached an recent enough events. And while
	// doing that, keep track of the sum. This lets us update the window and
	// time_windows_sum quickly.
	value_t sum = 0.0;
	auto end = window.begin();
	for (; end != window.end(); ++end) {
	if (end->second >= lowest_time_allowed) break;
	sum += end->first;
	}

	time_windows_sums[*wit] -= sum;
	window.erase(window.begin(), end);

	// shrink
	entry_list_t tmp(window);
	window.swap(tmp);
	}
	}


	void TimeWindowedStats::insert_event(timestamp_t ts, value_t val) {
	#ifdef CXX11_NO_INITIALISER_LISTS
	for (auto window_size_it = windows.cbegin(); window_size_it != windows.cend(); ++window_size_it) {
	const auto &window_size = *window_size_it;
	#else
	for (const auto &window_size: windows) {
	#endif
	// I tried keeping 2 lists, one new and one old. When calling compute_stats, I could
	// sort the new and compare the head of that with the tail of old. That let me avoid
	// resorting the entire set. It did not improve perf, just added a 8% slowdown.
	time_windows[window_size].push_back(entry_t(val, ts));
	time_windows_sums[window_size] += val;
	}
	}


	boost::python::dict TimeWindowedStats::compute_stats() {
	using namespace boost::python;

	auto stats = list();
	auto total_data_points = 0U;

	auto now = time_time();
	cull_old_events(now);

	#ifdef CXX11_NO_INITIALISER_LISTS
	for (auto window_size_it = windows.cbegin(); window_size_it != windows.cend(); ++window_size_it) {
	const auto &window_size = *window_size_it;
	#else
	for (const auto &window_size: windows) {
	#endif
	auto window = entry_list_t(time_windows[window_size]);
	std::sort(window.begin(), window.end());
	auto count = window.size();
	total_data_points += count;
	auto total = time_windows_sums[window_size];
	auto mean = (count > 0.0) ? (double(total) / double(count)) : 0;
	auto std_dev = get_std_dev(count, mean, window);

	dict win_stats = dict();
	win_stats["mean"] = mean;
	win_stats["std_dev"] = std_dev;
	win_stats["rate_per_sec"] = double(count) / double(window_size);

	#ifdef CXX11_NO_INITIALISER_LISTS
	for (auto percentile_it = percentiles.cbegin(); percentile_it != percentiles.cend(); ++percentile_it) {
	const auto &percentile = *percentile_it;
	#else
	for (const auto &percentile: percentiles) {
	#endif
	char name[128];
	auto sz = snprintf(name, sizeof(name), "%dth", percentile);
	BOOST_ASSERT(sz < static_cast<int>(sizeof(name)));

	auto pct = get_percentile(window, percentile);
	if (pct == std::numeric_limits<double>::quiet_NaN()) {
	// If there's not valid percentile, return 'U', which is magic RRD value for undefined.
	win_stats[name] = "U";
	} else {
	win_stats[name] = get_percentile(window, percentile);
	}
	}

	char stat_name[128];
	int sz = snprintf(stat_name, sizeof(stat_name), "%ds_window", window_size);
	BOOST_ASSERT(sz < static_cast<int>(sizeof(stat_name)));

	dict stat = dict();
	stat["name"] = stat_name;
	stat["stat_type"] = 1; // STAT_TYPE_PERCENTILE :-(
	stat["stats"] = win_stats;
	stats.append(stat);
	}

	auto result = dict();
	result["timestamp"] = now;
	result["stats"] = stats;
	result["data_point_count"] = total_data_points;;
	return result;
	}
	}
	}