flaviut/benchmark.cpp

## benchmark.cpp
#include <iostream>
#include <iomanip>
#include <pico/divider.h>
#include "fpm/fixed.hpp"
// from https://github.com/JoelFilho/EMB
#include "emb.hpp"
#include "smartdevice/Debug.hpp"

#pragma GCC optimize("O3")

#define DECL_BENCH(suffix, conversion, count) \
template<typename State> \
void benchmark_conv_ ## suffix(State &s) { \
    for (auto _: s) { \
        for (int i = 0; i < count; i++) \
            emb::dontOptimize(conversion(i)); \
    } \
} \
template<typename State> \
void benchmark_div_ ## suffix(State &s) { \
    for (auto _: s) { \
        for (int i = 0; i < count; i++) \
            emb::dontOptimize(1/conversion(i)); \
    } \
} \
template<typename State> \
void benchmark_mul_ ## suffix(State &s) { \
    for (auto _: s) {                         \
    static auto s = conversion(5454.13249); \
        for (int i = 0; i < count; i++) \
            emb::dontOptimize(conversion(i)*s); \
    } \
} \
template<typename State> \
void benchmark_add_ ## suffix(State &s) { \
    for (auto _: s) {                         \
    static auto s = conversion(5454.13249); \
        for (int i = 0; i < count; i++)       \
            emb::dontOptimize(conversion(i)+s); \
    } \
}
#define RUN_BENCH(suffix) \
    EMB_MAKE_BENCHMARK(benchmarker, benchmark_conv_ ## suffix); \
    EMB_MAKE_BENCHMARK(benchmarker, benchmark_div_ ## suffix);  \
    EMB_MAKE_BENCHMARK(benchmarker, benchmark_mul_ ## suffix);  \
    EMB_MAKE_BENCHMARK(benchmarker, benchmark_add_ ## suffix);

typedef fpm::fixed<int16_t, int32_t, 8> fixed_8_8;

DECL_BENCH(i8, int8_t, 10000)
DECL_BENCH(i16, int16_t, 10000)
DECL_BENCH(i32, int32_t, 10000)
DECL_BENCH(i64, int64_t, 10000)
DECL_BENCH(f, float, 1000)
DECL_BENCH(d, double, 1000)
DECL_BENCH(fgg, fpm::fixed_16_16::from_raw_value, 10000)
DECL_BENCH(f88, fixed_8_8::from_raw_value, 10000)

template<typename State>
void benchmark_hdiv_i32(State &s) {
    for (auto _: s) {
        for (int i = 0; i < 10000; i++)
            emb::dontOptimize(div_s32s32(1, int32_t(i)));
    }
}
template<typename State>
void benchmark_hdiv_i64(State &s) {
    for (auto _: s) {
        for (int i = 0; i < 10000; i++)
            emb::dontOptimize(div_s64s64(1, i));
    }
}
template<typename State>
void benchmark_uhdiv_i32(State &s) {
    for (auto _: s) {
        for (int i = 0; i < 10000; i++)
            emb::dontOptimize(hw_divider_s32_quotient_inlined(1, int32_t(i)));
    }
}

struct Reporter {
    template<typename Accumulator>
    static void report(const char *name, size_t iterations, Accumulator mean, Accumulator sd) {
        std::cout << std::setw(20)
                  << name << '\t'
                  << iterations << '\t'
                  << mean << "us\t"
                  << sd << "us\n";
    }
};

struct cpu_timer {
    static int64_t now() {
        return time_us_64();
    }
};

using Benchmarker = emb::Benchmarker<cpu_timer>;

[[noreturn]] int main() {
    Debug::init();
    sleep_ms(3000);
    std::cout << "Benchmarking...\n";
    Benchmarker benchmarker(1000);
    EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i32)
    EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i64)
    EMB_MAKE_BENCHMARK(benchmarker, benchmark_uhdiv_i32)
    RUN_BENCH(i8)
    RUN_BENCH(i16)
    RUN_BENCH(i32)
    RUN_BENCH(i64)
    RUN_BENCH(f)
    RUN_BENCH(d)
    RUN_BENCH(fgg)
    RUN_BENCH(f88)
    benchmarker.runBenchmarks<Reporter>();
    std::cout << "Done :D\n";
}

## results-rp2040.csv

          
            operation
            type
            Inner loops
            mean μs
            mean hw μs
            cycles
            speedup
            Property

            
              int conversion
              int8
              10000
              334
              335
              4
              0.997

            
              division
              int8
              10000
              3980
              5108
              50
              0.779

            
              multiplication
              int8
              10000
              737
              737
              9
              1

            
              addition
              int8
              10000
              734
              736
              9
              0.997

            
              int conversion
              int16
              10000
              332
              330
              4
              1.006

            
              division
              int16
              10000
              2676
              5103
              33
              0.524

            
              multiplication
              int16
              10000
              655
              654
              8
              1.002

            
              addition
              int16
              10000
              655
              654
              8
              1.002

            
              int conversion
              int32
              10000
              332
              331
              4
              1.003

            
              division
              int32
              10000
              2676
              5102
              33
              0.525

            
              multiplication
              int32
              10000
              572
              574
              7
              0.997

            
              addition
              int32
              10000
              573
              576
              7
              0.995

            
              int conversion
              int64
              10000
              412
              412
              5
              1

            
              division
              int64
              10000
              17877
              8013
              223
              2.231

            
              multiplication
              int64
              10000
              6720
              6720
              84
              1

            
              addition
              int64
              10000
              1061
              1060
              13
              1.001

            
              int conversion
              float
              1000
              672
              670
              84
              1.003

            
              division
              float
              1000
              4436
              4434
              555
              1

            
              multiplication
              float
              1000
              1972
              1972
              247
              1

            
              addition
              float
              1000
              1552
              1551
              194
              1.001

            
              int conversion
              double
              1000
              664
              662
              83
              1.003

            
              division
              double
              1000
              6577
              4936
              822
              1.332

            
              multiplication
              double
              1000
              3541
              3540
              443
              1

            
              addition
              double
              1000
              1892
              1892
              237
              1

            
              int conversion
              fixed_16_16
              10000
              332
              330
              4
              1.006

            
              division
              fixed_16_16
              10000
              54347
              11408
              679
              4.764

            
              multiplication
              fixed_16_16
              10000
              13755
              13755
              172
              1

            
              addition
              fixed_16_16
              10000
              572
              574
              7
              0.997

            
              int conversion
              fixed_8_8
              10000
              330
              331
              4
              0.997

            
              division
              fixed_8_8
              10000
              6300
              5423
              79
              1.162

            
              multiplication
              fixed_8_8
              10000
              1544
              1545
              19
              0.999

            
              addition
              fixed_8_8
              10000
              574
              574
              7
              1
	#include <iostream>
	#include <iomanip>
	#include <pico/divider.h>
	#include "fpm/fixed.hpp"
	// from https://github.com/JoelFilho/EMB
	#include "emb.hpp"
	#include "smartdevice/Debug.hpp"

	#pragma GCC optimize("O3")

	#define DECL_BENCH(suffix, conversion, count) \
	template<typename State> \
	void benchmark_conv_ ## suffix(State &s) { \
	for (auto _: s) { \
	for (int i = 0; i < count; i++) \
	emb::dontOptimize(conversion(i)); \
	} \
	} \
	template<typename State> \
	void benchmark_div_ ## suffix(State &s) { \
	for (auto _: s) { \
	for (int i = 0; i < count; i++) \
	emb::dontOptimize(1/conversion(i)); \
	} \
	} \
	template<typename State> \
	void benchmark_mul_ ## suffix(State &s) { \
	for (auto _: s) { \
	static auto s = conversion(5454.13249); \
	for (int i = 0; i < count; i++) \
	emb::dontOptimize(conversion(i)*s); \
	} \
	} \
	template<typename State> \
	void benchmark_add_ ## suffix(State &s) { \
	for (auto _: s) { \
	static auto s = conversion(5454.13249); \
	for (int i = 0; i < count; i++) \
	emb::dontOptimize(conversion(i)+s); \
	} \
	}
	#define RUN_BENCH(suffix) \
	EMB_MAKE_BENCHMARK(benchmarker, benchmark_conv_ ## suffix); \
	EMB_MAKE_BENCHMARK(benchmarker, benchmark_div_ ## suffix); \
	EMB_MAKE_BENCHMARK(benchmarker, benchmark_mul_ ## suffix); \
	EMB_MAKE_BENCHMARK(benchmarker, benchmark_add_ ## suffix);

	typedef fpm::fixed<int16_t, int32_t, 8> fixed_8_8;

	DECL_BENCH(i8, int8_t, 10000)
	DECL_BENCH(i16, int16_t, 10000)
	DECL_BENCH(i32, int32_t, 10000)
	DECL_BENCH(i64, int64_t, 10000)
	DECL_BENCH(f, float, 1000)
	DECL_BENCH(d, double, 1000)
	DECL_BENCH(fgg, fpm::fixed_16_16::from_raw_value, 10000)
	DECL_BENCH(f88, fixed_8_8::from_raw_value, 10000)

	template<typename State>
	void benchmark_hdiv_i32(State &s) {
	for (auto _: s) {
	for (int i = 0; i < 10000; i++)
	emb::dontOptimize(div_s32s32(1, int32_t(i)));
	}
	}
	template<typename State>
	void benchmark_hdiv_i64(State &s) {
	for (auto _: s) {
	for (int i = 0; i < 10000; i++)
	emb::dontOptimize(div_s64s64(1, i));
	}
	}
	template<typename State>
	void benchmark_uhdiv_i32(State &s) {
	for (auto _: s) {
	for (int i = 0; i < 10000; i++)
	emb::dontOptimize(hw_divider_s32_quotient_inlined(1, int32_t(i)));
	}
	}

	struct Reporter {
	template<typename Accumulator>
	static void report(const char *name, size_t iterations, Accumulator mean, Accumulator sd) {
	std::cout << std::setw(20)
	<< name << '\t'
	<< iterations << '\t'
	<< mean << "us\t"
	<< sd << "us\n";
	}
	};

	struct cpu_timer {
	static int64_t now() {
	return time_us_64();
	}
	};

	using Benchmarker = emb::Benchmarker<cpu_timer>;

	[[noreturn]] int main() {
	Debug::init();
	sleep_ms(3000);
	std::cout << "Benchmarking...\n";
	Benchmarker benchmarker(1000);
	EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i32)
	EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i64)
	EMB_MAKE_BENCHMARK(benchmarker, benchmark_uhdiv_i32)
	RUN_BENCH(i8)
	RUN_BENCH(i16)
	RUN_BENCH(i32)
	RUN_BENCH(i64)
	RUN_BENCH(f)
	RUN_BENCH(d)
	RUN_BENCH(fgg)
	RUN_BENCH(f88)
	benchmarker.runBenchmarks<Reporter>();
	std::cout << "Done :D\n";
	}
operation	type	Inner loops	mean μs	mean hw μs	cycles	speedup	Property
int conversion	int8	10000	334	335	4	0.997
division	int8	10000	3980	5108	50	0.779
multiplication	int8	10000	737	737	9	1
addition	int8	10000	734	736	9	0.997
int conversion	int16	10000	332	330	4	1.006
division	int16	10000	2676	5103	33	0.524
multiplication	int16	10000	655	654	8	1.002
addition	int16	10000	655	654	8	1.002
int conversion	int32	10000	332	331	4	1.003
division	int32	10000	2676	5102	33	0.525
multiplication	int32	10000	572	574	7	0.997
addition	int32	10000	573	576	7	0.995
int conversion	int64	10000	412	412	5	1
division	int64	10000	17877	8013	223	2.231
multiplication	int64	10000	6720	6720	84	1
addition	int64	10000	1061	1060	13	1.001
int conversion	float	1000	672	670	84	1.003
division	float	1000	4436	4434	555	1
multiplication	float	1000	1972	1972	247	1
addition	float	1000	1552	1551	194	1.001
int conversion	double	1000	664	662	83	1.003
division	double	1000	6577	4936	822	1.332
multiplication	double	1000	3541	3540	443	1
addition	double	1000	1892	1892	237	1
int conversion	fixed_16_16	10000	332	330	4	1.006
division	fixed_16_16	10000	54347	11408	679	4.764
multiplication	fixed_16_16	10000	13755	13755	172	1
addition	fixed_16_16	10000	572	574	7	0.997
int conversion	fixed_8_8	10000	330	331	4	0.997
division	fixed_8_8	10000	6300	5423	79	1.162
multiplication	fixed_8_8	10000	1544	1545	19	0.999
addition	fixed_8_8	10000	574	574	7	1