Skip to content

Instantly share code, notes, and snippets.

@flaviut
Created May 15, 2024 15:59
Show Gist options
  • Save flaviut/b54f7827c2912f88fd8d50feb64c803f to your computer and use it in GitHub Desktop.
Save flaviut/b54f7827c2912f88fd8d50feb64c803f to your computer and use it in GitHub Desktop.
#include <iostream>
#include <iomanip>
#include <pico/divider.h>
#include "fpm/fixed.hpp"
// from https://github.com/JoelFilho/EMB
#include "emb.hpp"
#include "smartdevice/Debug.hpp"
#pragma GCC optimize("O3")
#define DECL_BENCH(suffix, conversion, count) \
template<typename State> \
void benchmark_conv_ ## suffix(State &s) { \
for (auto _: s) { \
for (int i = 0; i < count; i++) \
emb::dontOptimize(conversion(i)); \
} \
} \
template<typename State> \
void benchmark_div_ ## suffix(State &s) { \
for (auto _: s) { \
for (int i = 0; i < count; i++) \
emb::dontOptimize(1/conversion(i)); \
} \
} \
template<typename State> \
void benchmark_mul_ ## suffix(State &s) { \
for (auto _: s) { \
static auto s = conversion(5454.13249); \
for (int i = 0; i < count; i++) \
emb::dontOptimize(conversion(i)*s); \
} \
} \
template<typename State> \
void benchmark_add_ ## suffix(State &s) { \
for (auto _: s) { \
static auto s = conversion(5454.13249); \
for (int i = 0; i < count; i++) \
emb::dontOptimize(conversion(i)+s); \
} \
}
#define RUN_BENCH(suffix) \
EMB_MAKE_BENCHMARK(benchmarker, benchmark_conv_ ## suffix); \
EMB_MAKE_BENCHMARK(benchmarker, benchmark_div_ ## suffix); \
EMB_MAKE_BENCHMARK(benchmarker, benchmark_mul_ ## suffix); \
EMB_MAKE_BENCHMARK(benchmarker, benchmark_add_ ## suffix);
typedef fpm::fixed<int16_t, int32_t, 8> fixed_8_8;
DECL_BENCH(i8, int8_t, 10000)
DECL_BENCH(i16, int16_t, 10000)
DECL_BENCH(i32, int32_t, 10000)
DECL_BENCH(i64, int64_t, 10000)
DECL_BENCH(f, float, 1000)
DECL_BENCH(d, double, 1000)
DECL_BENCH(fgg, fpm::fixed_16_16::from_raw_value, 10000)
DECL_BENCH(f88, fixed_8_8::from_raw_value, 10000)
template<typename State>
void benchmark_hdiv_i32(State &s) {
for (auto _: s) {
for (int i = 0; i < 10000; i++)
emb::dontOptimize(div_s32s32(1, int32_t(i)));
}
}
template<typename State>
void benchmark_hdiv_i64(State &s) {
for (auto _: s) {
for (int i = 0; i < 10000; i++)
emb::dontOptimize(div_s64s64(1, i));
}
}
template<typename State>
void benchmark_uhdiv_i32(State &s) {
for (auto _: s) {
for (int i = 0; i < 10000; i++)
emb::dontOptimize(hw_divider_s32_quotient_inlined(1, int32_t(i)));
}
}
struct Reporter {
template<typename Accumulator>
static void report(const char *name, size_t iterations, Accumulator mean, Accumulator sd) {
std::cout << std::setw(20)
<< name << '\t'
<< iterations << '\t'
<< mean << "us\t"
<< sd << "us\n";
}
};
struct cpu_timer {
static int64_t now() {
return time_us_64();
}
};
using Benchmarker = emb::Benchmarker<cpu_timer>;
[[noreturn]] int main() {
Debug::init();
sleep_ms(3000);
std::cout << "Benchmarking...\n";
Benchmarker benchmarker(1000);
EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i32)
EMB_MAKE_BENCHMARK(benchmarker, benchmark_hdiv_i64)
EMB_MAKE_BENCHMARK(benchmarker, benchmark_uhdiv_i32)
RUN_BENCH(i8)
RUN_BENCH(i16)
RUN_BENCH(i32)
RUN_BENCH(i64)
RUN_BENCH(f)
RUN_BENCH(d)
RUN_BENCH(fgg)
RUN_BENCH(f88)
benchmarker.runBenchmarks<Reporter>();
std::cout << "Done :D\n";
}
operation type Inner loops mean μs mean hw μs cycles speedup Property
int conversion int8 10000 334 335 4 0.997
division int8 10000 3980 5108 50 0.779
multiplication int8 10000 737 737 9 1
addition int8 10000 734 736 9 0.997
int conversion int16 10000 332 330 4 1.006
division int16 10000 2676 5103 33 0.524
multiplication int16 10000 655 654 8 1.002
addition int16 10000 655 654 8 1.002
int conversion int32 10000 332 331 4 1.003
division int32 10000 2676 5102 33 0.525
multiplication int32 10000 572 574 7 0.997
addition int32 10000 573 576 7 0.995
int conversion int64 10000 412 412 5 1
division int64 10000 17877 8013 223 2.231
multiplication int64 10000 6720 6720 84 1
addition int64 10000 1061 1060 13 1.001
int conversion float 1000 672 670 84 1.003
division float 1000 4436 4434 555 1
multiplication float 1000 1972 1972 247 1
addition float 1000 1552 1551 194 1.001
int conversion double 1000 664 662 83 1.003
division double 1000 6577 4936 822 1.332
multiplication double 1000 3541 3540 443 1
addition double 1000 1892 1892 237 1
int conversion fixed_16_16 10000 332 330 4 1.006
division fixed_16_16 10000 54347 11408 679 4.764
multiplication fixed_16_16 10000 13755 13755 172 1
addition fixed_16_16 10000 572 574 7 0.997
int conversion fixed_8_8 10000 330 331 4 0.997
division fixed_8_8 10000 6300 5423 79 1.162
multiplication fixed_8_8 10000 1544 1545 19 0.999
addition fixed_8_8 10000 574 574 7 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment