Last active
October 6, 2021 21:54
-
-
Save janisozaur/21e0c6ba82b9b871cdeef8cbdefe615a to your computer and use it in GitHub Desktop.
Benchmark for SDL2's blit1to4 implementation alternative
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// g++ benchmark.cpp -lbenchmark -O2 -g -o bench && ./bench | |
#include <cstdint> | |
#include <cstring> | |
#include <random> | |
#include <benchmark/benchmark.h> | |
#define Uint8 uint8_t | |
#define Uint32 uint32_t | |
typedef struct SDL_Color | |
{ | |
Uint8 r; | |
Uint8 g; | |
Uint8 b; | |
Uint8 a; | |
} SDL_Color; | |
#define SDL_Colour SDL_Color | |
typedef struct SDL_Palette | |
{ | |
int ncolors; | |
SDL_Color *colors; | |
Uint32 version; | |
int refcount; | |
} SDL_Palette; | |
/** | |
* \note Everything in the pixel format structure is read-only. | |
*/ | |
typedef struct SDL_PixelFormat | |
{ | |
Uint32 format; | |
SDL_Palette *palette; | |
Uint8 BitsPerPixel; | |
Uint8 BytesPerPixel; | |
Uint8 padding[2]; | |
Uint32 Rmask; | |
Uint32 Gmask; | |
Uint32 Bmask; | |
Uint32 Amask; | |
Uint8 Rloss; | |
Uint8 Gloss; | |
Uint8 Bloss; | |
Uint8 Aloss; | |
Uint8 Rshift; | |
Uint8 Gshift; | |
Uint8 Bshift; | |
Uint8 Ashift; | |
int refcount; | |
struct SDL_PixelFormat *next; | |
} SDL_PixelFormat; | |
typedef struct | |
{ | |
Uint8 *src; | |
int src_w, src_h; | |
int src_pitch; | |
int src_skip; | |
Uint8 *dst; | |
int dst_w, dst_h; | |
int dst_pitch; | |
int dst_skip; | |
SDL_PixelFormat *src_fmt; | |
SDL_PixelFormat *dst_fmt; | |
Uint8 *table; | |
int flags; | |
Uint32 colorkey; | |
Uint8 r, g, b, a; | |
} SDL_BlitInfo; | |
static void | |
Blit1to4(SDL_BlitInfo * info) | |
{ | |
#ifndef USE_DUFFS_LOOP | |
int c; | |
#endif | |
int width, height; | |
Uint8 *src; | |
Uint32 *map, *dst; | |
int srcskip, dstskip; | |
/* Set up some basic variables */ | |
width = info->dst_w; | |
height = info->dst_h; | |
src = info->src; | |
srcskip = info->src_skip; | |
dst = (Uint32 *) info->dst; | |
dstskip = info->dst_skip / 4; | |
map = (Uint32 *) info->table; | |
while (height--) { | |
#ifdef USE_DUFFS_LOOP | |
/* *INDENT-OFF* */ | |
DUFFS_LOOP( | |
*dst++ = map[*src++]; | |
, width); | |
/* *INDENT-ON* */ | |
#else | |
for (c = width / 4; c; --c) { | |
*dst++ = map[*src++]; | |
*dst++ = map[*src++]; | |
*dst++ = map[*src++]; | |
*dst++ = map[*src++]; | |
} | |
switch (width & 3) { | |
case 3: | |
*dst++ = map[*src++]; | |
case 2: | |
*dst++ = map[*src++]; | |
case 1: | |
*dst++ = map[*src++]; | |
} | |
#endif /* USE_DUFFS_LOOP */ | |
src += srcskip; | |
dst += dstskip; | |
} | |
} | |
#define USE_DUFFS_LOOP | |
/* 8-times unrolled loop */ | |
#define DUFFS_LOOP8(pixel_copy_increment, width) \ | |
{ int n = (width+7)/8; \ | |
switch (width & 7) { \ | |
case 0: do { pixel_copy_increment; /* fallthrough */ \ | |
case 7: pixel_copy_increment; /* fallthrough */ \ | |
case 6: pixel_copy_increment; /* fallthrough */ \ | |
case 5: pixel_copy_increment; /* fallthrough */ \ | |
case 4: pixel_copy_increment; /* fallthrough */ \ | |
case 3: pixel_copy_increment; /* fallthrough */ \ | |
case 2: pixel_copy_increment; /* fallthrough */ \ | |
case 1: pixel_copy_increment; /* fallthrough */ \ | |
} while ( --n > 0 ); \ | |
} \ | |
} | |
/* 4-times unrolled loop */ | |
#define DUFFS_LOOP4(pixel_copy_increment, width) \ | |
{ int n = (width+3)/4; \ | |
switch (width & 3) { \ | |
case 0: do { pixel_copy_increment; /* fallthrough */ \ | |
case 3: pixel_copy_increment; /* fallthrough */ \ | |
case 2: pixel_copy_increment; /* fallthrough */ \ | |
case 1: pixel_copy_increment; /* fallthrough */ \ | |
} while (--n > 0); \ | |
} \ | |
} | |
/* Use the 8-times version of the loop by default */ | |
#define DUFFS_LOOP(pixel_copy_increment, width) \ | |
DUFFS_LOOP8(pixel_copy_increment, width) | |
static void | |
Blit1to4_duffs(SDL_BlitInfo * info) | |
{ | |
#ifndef USE_DUFFS_LOOP | |
int c; | |
#endif | |
int width, height; | |
Uint8 *src; | |
Uint32 *map, *dst; | |
int srcskip, dstskip; | |
/* Set up some basic variables */ | |
width = info->dst_w; | |
height = info->dst_h; | |
src = info->src; | |
srcskip = info->src_skip; | |
dst = (Uint32 *) info->dst; | |
dstskip = info->dst_skip / 4; | |
map = (Uint32 *) info->table; | |
while (height--) { | |
#ifdef USE_DUFFS_LOOP | |
/* *INDENT-OFF* */ | |
DUFFS_LOOP( | |
*dst++ = map[*src++]; | |
, width); | |
/* *INDENT-ON* */ | |
#else | |
for (c = width / 4; c; --c) { | |
*dst++ = map[*src++]; | |
*dst++ = map[*src++]; | |
*dst++ = map[*src++]; | |
*dst++ = map[*src++]; | |
} | |
switch (width & 3) { | |
case 3: | |
*dst++ = map[*src++]; | |
case 2: | |
*dst++ = map[*src++]; | |
case 1: | |
*dst++ = map[*src++]; | |
} | |
#endif /* USE_DUFFS_LOOP */ | |
src += srcskip; | |
dst += dstskip; | |
} | |
} | |
static void | |
Blit1to4_janis(SDL_BlitInfo * info) | |
{ | |
int c; | |
int width, height; | |
Uint8 *src; | |
Uint32 *map, *dst; | |
int srcskip, dstskip; | |
/* Set up some basic variables */ | |
width = info->dst_w; | |
height = info->dst_h; | |
src = info->src; | |
srcskip = info->src_skip; | |
dst = (Uint32 *)(info->dst); | |
dstskip = info->dst_skip / 4; | |
map = (Uint32 *) info->table; | |
while (height--) { | |
for (c = width / 4; c; --c) { | |
uint32_t foo[4]; | |
foo[0] = map[*src++]; | |
foo[1] = map[*src++]; | |
foo[2] = map[*src++]; | |
foo[3] = map[*src++]; | |
memcpy(dst, foo, 4 * sizeof(uint32_t)); | |
dst += 4; | |
} | |
switch (width & 3) { | |
case 3: | |
*dst++ = map[*src++]; | |
case 2: | |
*dst++ = map[*src++]; | |
case 1: | |
*dst++ = map[*src++]; | |
} | |
src += srcskip; | |
dst += dstskip; | |
} | |
} | |
static void | |
Blit1to4_janis_aligned(SDL_BlitInfo * info) | |
{ | |
int c; | |
int width, height; | |
Uint8 *src; | |
Uint32 *map, *dst; | |
int srcskip, dstskip; | |
/* Set up some basic variables */ | |
width = info->dst_w; | |
height = info->dst_h; | |
src = (uint8_t*)__builtin_assume_aligned(info->src, 4); | |
srcskip = info->src_skip; | |
dst = (Uint32 *)__builtin_assume_aligned(info->dst, 4); | |
dstskip = info->dst_skip / 4; | |
map = (Uint32 *) __builtin_assume_aligned(info->table, 4); | |
while (height--) { | |
for (c = width / 4; c; --c) { | |
uint32_t foo[4]; | |
foo[0] = map[*src++]; | |
foo[1] = map[*src++]; | |
foo[2] = map[*src++]; | |
foo[3] = map[*src++]; | |
memcpy(dst, foo, 4 * sizeof(uint32_t)); | |
dst += 4; | |
} | |
switch (width & 3) { | |
case 3: | |
*dst++ = map[*src++]; | |
case 2: | |
*dst++ = map[*src++]; | |
case 1: | |
*dst++ = map[*src++]; | |
} | |
src += srcskip; | |
dst += dstskip; | |
} | |
} | |
static void | |
Blit_memcpy(SDL_BlitInfo * info) | |
{ | |
int c; | |
int width, height; | |
Uint8 *src; | |
Uint32 *map, *dst; | |
int srcskip, dstskip; | |
/* Set up some basic variables */ | |
width = info->dst_w; | |
height = info->dst_h; | |
src = (uint8_t*)__builtin_assume_aligned(info->src, 4); | |
srcskip = info->src_skip; | |
dst = (Uint32 *)__builtin_assume_aligned(info->dst, 4); | |
dstskip = info->dst_skip / 4; | |
map = (Uint32 *) __builtin_assume_aligned(info->table, 4); | |
while (height--) { | |
for (c = width / 4; c; --c) { | |
uint32_t foo[4]{}; | |
memcpy(dst, foo, 4 * sizeof(uint32_t)); | |
dst += 4; | |
} | |
switch (width & 3) { | |
case 3: | |
*dst++ = map[*src++]; | |
case 2: | |
*dst++ = map[*src++]; | |
case 1: | |
*dst++ = map[*src++]; | |
} | |
src += srcskip; | |
dst += dstskip; | |
} | |
} | |
static void | |
Blit_memset(SDL_BlitInfo * info) | |
{ | |
int c; | |
int width, height; | |
Uint8 *src; | |
Uint32 *map, *dst; | |
int srcskip, dstskip; | |
/* Set up some basic variables */ | |
width = info->dst_w; | |
height = info->dst_h; | |
src = (uint8_t*)__builtin_assume_aligned(info->src, 4); | |
srcskip = info->src_skip; | |
dst = (Uint32 *)__builtin_assume_aligned(info->dst, 4); | |
dstskip = info->dst_skip / 4; | |
map = (Uint32 *) __builtin_assume_aligned(info->table, 4); | |
while (height--) { | |
memset(dst, 0, width * 4); | |
src += srcskip + width; | |
dst += dstskip + width; | |
} | |
} | |
extern Uint8 real_palette[256 * 4]; | |
Uint8 real_palette[256 * 4]; | |
void init(SDL_BlitInfo *sbi) | |
{ | |
std::mt19937 gen32; | |
for (int i = 0; i < 256 * 4; i++) { | |
sbi->table[i] = gen32(); | |
} | |
for (int i = 0; i < sbi->dst_w * sbi->dst_h; i++) { | |
sbi->src[i] = gen32(); | |
} | |
} | |
static void BM_Blit1to4(benchmark::State& state) { | |
auto* src = new Uint8[state.range(0) * state.range(0)]; | |
auto* dst = new Uint8[state.range(0) * state.range(0) * 4]; | |
SDL_BlitInfo sbi; | |
sbi.src = src; | |
sbi.dst = dst; | |
sbi.table = real_palette; | |
sbi.dst_w = state.range(0); | |
sbi.dst_h = state.range(0); | |
sbi.src_skip = 0; | |
sbi.dst_skip = 0; | |
init(&sbi); | |
for (auto _ : state) | |
Blit1to4(&sbi); | |
state.SetBytesProcessed(int64_t(state.iterations()) * | |
int64_t(state.range(0) * state.range(0))); | |
delete[] src; | |
delete[] dst; | |
} | |
BENCHMARK(BM_Blit1to4)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(2<<10)->Arg(4<<10); | |
static void BM_Blit1to4_duffs(benchmark::State& state) { | |
auto* src = new Uint8[state.range(0) * state.range(0)]; | |
auto* dst = new Uint8[state.range(0) * state.range(0) * 4]; | |
SDL_BlitInfo sbi; | |
sbi.src = src; | |
sbi.dst = dst; | |
sbi.table = real_palette; | |
sbi.dst_w = state.range(0); | |
sbi.dst_h = state.range(0); | |
sbi.src_skip = 0; | |
sbi.dst_skip = 0; | |
init(&sbi); | |
for (auto _ : state) | |
Blit1to4_duffs(&sbi); | |
state.SetBytesProcessed(int64_t(state.iterations()) * | |
int64_t(state.range(0) * state.range(0))); | |
delete[] src; | |
delete[] dst; | |
} | |
BENCHMARK(BM_Blit1to4_duffs)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(2<<10)->Arg(4<<10); | |
static void BM_Blit1to4_janis(benchmark::State& state) { | |
auto* src = new Uint8[state.range(0) * state.range(0)]; | |
auto* dst = new Uint8[state.range(0) * state.range(0) * 4]; | |
SDL_BlitInfo sbi; | |
sbi.src = src; | |
sbi.dst = dst; | |
sbi.table = real_palette; | |
sbi.dst_w = state.range(0); | |
sbi.dst_h = state.range(0); | |
sbi.src_skip = 0; | |
sbi.dst_skip = 0; | |
init(&sbi); | |
for (auto _ : state) | |
Blit1to4_janis(&sbi); | |
state.SetBytesProcessed(int64_t(state.iterations()) * | |
int64_t(state.range(0) * state.range(0))); | |
delete[] src; | |
delete[] dst; | |
} | |
BENCHMARK(BM_Blit1to4_janis)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(2<<10)->Arg(4<<10); | |
static void BM_Blit1to4_janis_aligned(benchmark::State& state) { | |
auto* src = new Uint8[state.range(0) * state.range(0)]; | |
auto* dst = new Uint8[state.range(0) * state.range(0) * 4]; | |
SDL_BlitInfo sbi; | |
sbi.src = src; | |
sbi.dst = dst; | |
sbi.table = real_palette; | |
sbi.dst_w = state.range(0); | |
sbi.dst_h = state.range(0); | |
sbi.src_skip = 0; | |
sbi.dst_skip = 0; | |
init(&sbi); | |
for (auto _ : state) | |
Blit1to4_janis_aligned(&sbi); | |
state.SetBytesProcessed(int64_t(state.iterations()) * | |
int64_t(state.range(0) * state.range(0))); | |
delete[] src; | |
delete[] dst; | |
} | |
BENCHMARK(BM_Blit1to4_janis_aligned)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(2<<10)->Arg(4<<10); | |
static void BM_Blit1to4_janis_aligned_zero(benchmark::State& state) { | |
auto* src = new Uint8[state.range(0) * state.range(0)]; | |
auto* dst = new Uint8[state.range(0) * state.range(0) * 4]; | |
SDL_BlitInfo sbi; | |
sbi.src = src; | |
sbi.dst = dst; | |
sbi.table = real_palette; | |
sbi.dst_w = state.range(0); | |
sbi.dst_h = state.range(0); | |
sbi.src_skip = 0; | |
sbi.dst_skip = 0; | |
init(&sbi); | |
memset(sbi.table, 0, 256 * 4); | |
for (auto _ : state) | |
Blit1to4_janis_aligned(&sbi); | |
state.SetBytesProcessed(int64_t(state.iterations()) * | |
int64_t(state.range(0) * state.range(0))); | |
delete[] src; | |
delete[] dst; | |
} | |
BENCHMARK(BM_Blit1to4_janis_aligned_zero)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(2<<10)->Arg(4<<10); | |
static void BM_memset(benchmark::State& state) { | |
auto* src = new Uint8[state.range(0) * state.range(0)]; | |
auto* dst = new Uint8[state.range(0) * state.range(0) * 4]; | |
SDL_BlitInfo sbi; | |
sbi.src = src; | |
sbi.dst = dst; | |
sbi.table = real_palette; | |
sbi.dst_w = state.range(0); | |
sbi.dst_h = state.range(0); | |
sbi.src_skip = 0; | |
sbi.dst_skip = 0; | |
init(&sbi); | |
memset(sbi.table, 0, 256 * 4); | |
for (auto _ : state) | |
Blit_memset(&sbi); | |
state.SetBytesProcessed(int64_t(state.iterations()) * | |
int64_t(state.range(0) * state.range(0))); | |
delete[] src; | |
delete[] dst; | |
} | |
BENCHMARK(BM_memset)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(2<<10)->Arg(4<<10); | |
static void BM_memcpy(benchmark::State& state) { | |
auto* src = new Uint8[state.range(0) * state.range(0)]; | |
auto* dst = new Uint8[state.range(0) * state.range(0) * 4]; | |
SDL_BlitInfo sbi; | |
sbi.src = src; | |
sbi.dst = dst; | |
sbi.table = real_palette; | |
sbi.dst_w = state.range(0); | |
sbi.dst_h = state.range(0); | |
sbi.src_skip = 0; | |
sbi.dst_skip = 0; | |
init(&sbi); | |
memset(sbi.table, 0, 256 * 4); | |
for (auto _ : state) | |
Blit_memcpy(&sbi); | |
state.SetBytesProcessed(int64_t(state.iterations()) * | |
int64_t(state.range(0) * state.range(0))); | |
delete[] src; | |
delete[] dst; | |
} | |
BENCHMARK(BM_memcpy)->Arg(8)->Arg(64)->Arg(512)->Arg(1<<10)->Arg(2<<10)->Arg(4<<10); | |
int main(int argc, char** argv) { | |
//benchmark::RegisterBenchmark("BM_Blit1to4_duffs", BM_Blit1to4_duffs); | |
benchmark::Initialize(&argc, argv); | |
benchmark::RunSpecifiedBenchmarks(); | |
//benchmark::Shutdown(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Unable to determine clock rate from sysctl: hw.cpufrequency: No such file or directory | |
2021-09-18T13:05:20+02:00 | |
Running ./bench | |
Run on (8 X 24.1206 MHz CPU s) | |
CPU Caches: | |
L1 Data 64 KiB (x8) | |
L1 Instruction 128 KiB (x8) | |
L2 Unified 4096 KiB (x8) | |
Load Average: 1.41, 1.67, 1.79 | |
---------------------------------------------------------------------------------------------- | |
Benchmark Time CPU Iterations UserCounters... | |
---------------------------------------------------------------------------------------------- | |
BM_Blit1to4/8 16.0 ns 16.0 ns 41584241 bytes_per_second=3.72337G/s | |
BM_Blit1to4/64 947 ns 947 ns 744673 bytes_per_second=4.02659G/s | |
BM_Blit1to4/512 63372 ns 63371 ns 11145 bytes_per_second=3.85257G/s | |
BM_Blit1to4/1024 252167 ns 252163 ns 2772 bytes_per_second=3.87274G/s | |
BM_Blit1to4/2048 1003950 ns 1003946 ns 700 bytes_per_second=3.8909G/s | |
BM_Blit1to4/4096 4061885 ns 4061785 ns 172 bytes_per_second=3.84683G/s | |
BM_Blit1to4_duffs/8 23.3 ns 23.3 ns 30233532 bytes_per_second=2.5628G/s | |
BM_Blit1to4_duffs/64 1346 ns 1346 ns 517078 bytes_per_second=2.8335G/s | |
BM_Blit1to4_duffs/512 88163 ns 88162 ns 7966 bytes_per_second=2.76923G/s | |
BM_Blit1to4_duffs/1024 368285 ns 368281 ns 1899 bytes_per_second=2.65168G/s | |
BM_Blit1to4_duffs/2048 1471590 ns 1471584 ns 476 bytes_per_second=2.65445G/s | |
BM_Blit1to4_duffs/4096 5908542 ns 5908495 ns 111 bytes_per_second=2.6445G/s | |
BM_Blit1to4_janis/8 16.9 ns 16.9 ns 41453723 bytes_per_second=3.52575G/s | |
BM_Blit1to4_janis/64 957 ns 957 ns 731239 bytes_per_second=3.98593G/s | |
BM_Blit1to4_janis/512 63277 ns 63275 ns 11051 bytes_per_second=3.85843G/s | |
BM_Blit1to4_janis/1024 251139 ns 251137 ns 2786 bytes_per_second=3.88856G/s | |
BM_Blit1to4_janis/2048 1002765 ns 1002730 ns 699 bytes_per_second=3.89562G/s | |
BM_Blit1to4_janis/4096 4037985 ns 4037983 ns 173 bytes_per_second=3.86951G/s | |
BM_Blit1to4_janis_aligned/8 16.6 ns 16.6 ns 42072112 bytes_per_second=3.59699G/s | |
BM_Blit1to4_janis_aligned/64 956 ns 956 ns 732279 bytes_per_second=3.98882G/s | |
BM_Blit1to4_janis_aligned/512 63317 ns 63316 ns 11049 bytes_per_second=3.85591G/s | |
BM_Blit1to4_janis_aligned/1024 251312 ns 251309 ns 2784 bytes_per_second=3.8859G/s | |
BM_Blit1to4_janis_aligned/2048 998530 ns 998521 ns 701 bytes_per_second=3.91204G/s | |
BM_Blit1to4_janis_aligned/4096 4034439 ns 4034399 ns 173 bytes_per_second=3.87294G/s | |
BM_Blit1to4_janis_aligned_zero/8 16.6 ns 16.6 ns 42012268 bytes_per_second=3.58571G/s | |
BM_Blit1to4_janis_aligned_zero/64 957 ns 957 ns 731743 bytes_per_second=3.98668G/s | |
BM_Blit1to4_janis_aligned_zero/512 63346 ns 63345 ns 11050 bytes_per_second=3.85412G/s | |
BM_Blit1to4_janis_aligned_zero/1024 251606 ns 251604 ns 2783 bytes_per_second=3.88135G/s | |
BM_Blit1to4_janis_aligned_zero/2048 999055 ns 999030 ns 701 bytes_per_second=3.91004G/s | |
BM_Blit1to4_janis_aligned_zero/4096 4046626 ns 4046387 ns 173 bytes_per_second=3.86147G/s | |
BM_memset/8 31.0 ns 31.0 ns 22412408 bytes_per_second=1.92244G/s | |
BM_memset/64 184 ns 184 ns 3805734 bytes_per_second=20.7455G/s | |
BM_memset/512 16803 ns 16802 ns 41671 bytes_per_second=14.5301G/s | |
BM_memset/1024 74512 ns 74510 ns 9413 bytes_per_second=13.1065G/s | |
BM_memset/2048 680728 ns 680727 ns 967 bytes_per_second=5.73835G/s | |
BM_memset/4096 2963167 ns 2963136 ns 235 bytes_per_second=5.27313G/s | |
BM_memcpy/8 35.1 ns 35.1 ns 20043523 bytes_per_second=1.69974G/s | |
BM_memcpy/64 332 ns 332 ns 2111735 bytes_per_second=11.5018G/s | |
BM_memcpy/512 18137 ns 18137 ns 38616 bytes_per_second=13.461G/s | |
BM_memcpy/1024 95677 ns 95669 ns 7307 bytes_per_second=10.2077G/s | |
BM_memcpy/2048 652300 ns 652300 ns 1014 bytes_per_second=5.98843G/s | |
BM_memcpy/4096 3523019 ns 3523000 ns 198 bytes_per_second=4.43514G/s |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Run on (4 X 1500 MHz CPU s) | |
Load Average: 0.29, 1.51, 1.30 | |
***WARNING*** CPU scaling is enabled, the benchmark real time measurements may be noisy and will incur extra overhead. | |
---------------------------------------------------------------------------------------------- | |
Benchmark Time CPU Iterations UserCounters... | |
---------------------------------------------------------------------------------------------- | |
BM_Blit1to4/8 108 ns 108 ns 6450524 bytes_per_second=563.704M/s | |
BM_Blit1to4/64 5695 ns 5690 ns 120728 bytes_per_second=686.452M/s | |
BM_Blit1to4/512 384858 ns 384585 ns 1818 bytes_per_second=650.052M/s | |
BM_Blit1to4/1024 1737759 ns 1734079 ns 404 bytes_per_second=576.675M/s | |
BM_Blit1to4/2048 6973408 ns 6956302 ns 101 bytes_per_second=575.018M/s | |
BM_Blit1to4/4096 35186953 ns 35090453 ns 18 bytes_per_second=455.964M/s | |
BM_Blit1to4_duffs/8 108 ns 108 ns 6467720 bytes_per_second=564.11M/s | |
BM_Blit1to4_duffs/64 5707 ns 5703 ns 117894 bytes_per_second=684.899M/s | |
BM_Blit1to4_duffs/512 393705 ns 393433 ns 1728 bytes_per_second=635.432M/s | |
BM_Blit1to4_duffs/1024 1727725 ns 1724378 ns 405 bytes_per_second=579.919M/s | |
BM_Blit1to4_duffs/2048 6950270 ns 6931980 ns 101 bytes_per_second=577.036M/s | |
BM_Blit1to4_duffs/4096 34990708 ns 34902436 ns 18 bytes_per_second=458.421M/s | |
BM_Blit1to4_janis/8 105 ns 105 ns 6676132 bytes_per_second=582.156M/s | |
BM_Blit1to4_janis/64 5646 ns 5642 ns 118151 bytes_per_second=692.362M/s | |
BM_Blit1to4_janis/512 392460 ns 392156 ns 1735 bytes_per_second=637.501M/s | |
BM_Blit1to4_janis/1024 1729658 ns 1726145 ns 405 bytes_per_second=579.326M/s | |
BM_Blit1to4_janis/2048 6957858 ns 6940140 ns 100 bytes_per_second=576.357M/s | |
BM_Blit1to4_janis/4096 34863384 ns 34765160 ns 18 bytes_per_second=460.231M/s | |
BM_Blit1to4_janis_aligned/8 106 ns 106 ns 6633877 bytes_per_second=578.385M/s | |
BM_Blit1to4_janis_aligned/64 5657 ns 5653 ns 118096 bytes_per_second=690.989M/s | |
BM_Blit1to4_janis_aligned/512 392742 ns 392469 ns 1736 bytes_per_second=636.992M/s | |
BM_Blit1to4_janis_aligned/1024 1728761 ns 1725408 ns 405 bytes_per_second=579.573M/s | |
BM_Blit1to4_janis_aligned/2048 6959338 ns 6939785 ns 100 bytes_per_second=576.387M/s | |
BM_Blit1to4_janis_aligned/4096 34906249 ns 34815391 ns 18 bytes_per_second=459.567M/s | |
BM_Blit1to4_janis_aligned_zero/8 106 ns 106 ns 6632627 bytes_per_second=578.382M/s | |
BM_Blit1to4_janis_aligned_zero/64 5640 ns 5636 ns 119403 bytes_per_second=693.066M/s | |
BM_Blit1to4_janis_aligned_zero/512 393510 ns 393206 ns 1736 bytes_per_second=635.799M/s | |
BM_Blit1to4_janis_aligned_zero/1024 1728387 ns 1724770 ns 405 bytes_per_second=579.788M/s | |
BM_Blit1to4_janis_aligned_zero/2048 6953788 ns 6936679 ns 100 bytes_per_second=576.645M/s | |
BM_Blit1to4_janis_aligned_zero/4096 34902772 ns 34804518 ns 18 bytes_per_second=459.71M/s | |
BM_memset/8 78.9 ns 78.8 ns 8874246 bytes_per_second=774.597M/s | |
BM_memset/64 1714 ns 1713 ns 357216 bytes_per_second=2.22725G/s | |
BM_memset/512 310023 ns 309789 ns 2113 bytes_per_second=807.001M/s | |
BM_memset/1024 1387825 ns 1386778 ns 495 bytes_per_second=721.096M/s | |
BM_memset/2048 5672146 ns 5667385 ns 123 bytes_per_second=705.793M/s | |
BM_memset/4096 27291624 ns 27257563 ns 24 bytes_per_second=586.993M/s | |
BM_memcpy/8 88.9 ns 88.8 ns 7874357 bytes_per_second=687.215M/s | |
BM_memcpy/64 1767 ns 1766 ns 360358 bytes_per_second=2.16045G/s | |
BM_memcpy/512 298864 ns 298655 ns 2088 bytes_per_second=837.087M/s | |
BM_memcpy/1024 1384263 ns 1383143 ns 494 bytes_per_second=722.991M/s | |
BM_memcpy/2048 5701829 ns 5697497 ns 123 bytes_per_second=702.063M/s | |
BM_memcpy/4096 27279868 ns 27243797 ns 24 bytes_per_second=587.29M/s |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment