-
-
Save emaxerrno/db0db4996ed4584ee45ee692db1bac27 to your computer and use it in GitHub Desktop.
lemire.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2018 Alexander Gallego | |
// | |
#include <cstring> | |
#include <memory> | |
#include <thread> | |
#include <benchmark/benchmark.h> | |
#include <simple8b.h> | |
// simple must be above | |
#include <simdfastpfor.h> | |
struct varint { | |
static inline char * | |
encode(char *sptr, uint32_t v) { | |
// Operate on characters as unsigneds | |
unsigned char * ptr = reinterpret_cast<unsigned char *>(sptr); | |
static const int B = 128; | |
if (v < (1 << 7)) { | |
*(ptr++) = v; | |
} else if (v < (1 << 14)) { | |
*(ptr++) = v | B; | |
*(ptr++) = v >> 7; | |
} else if (v < (1 << 21)) { | |
*(ptr++) = v | B; | |
*(ptr++) = (v >> 7) | B; | |
*(ptr++) = v >> 14; | |
} else if (v < (1 << 28)) { | |
*(ptr++) = v | B; | |
*(ptr++) = (v >> 7) | B; | |
*(ptr++) = (v >> 14) | B; | |
*(ptr++) = v >> 21; | |
} else { | |
*(ptr++) = v | B; | |
*(ptr++) = (v >> 7) | B; | |
*(ptr++) = (v >> 14) | B; | |
*(ptr++) = (v >> 21) | B; | |
*(ptr++) = v >> 28; | |
} | |
return reinterpret_cast<char *>(ptr); | |
} | |
static inline uint32_t | |
decode(const char *src) noexcept { | |
auto *buf = reinterpret_cast<const uint8_t *>(src); | |
auto flip = [](uint32_t v) { return v & ~128; }; | |
if (buf[0] > 127) { | |
if (buf[1] > 127) { | |
if (buf[2] > 127) { | |
if (buf[3] > 127) { | |
const uint32_t r = flip(buf[0]) | (flip(buf[1]) << 7) | | |
(flip(buf[2]) << 14) | (flip(buf[3]) << 21) | | |
(buf[4] << 28); | |
buf += 5; | |
return r; | |
} else { | |
const uint32_t r = flip(buf[0]) | (flip(buf[1]) << 7) | | |
(flip(buf[2]) << 14) | (buf[3] << 21); | |
buf += 4; | |
return r; | |
} | |
} else { | |
const uint32_t r = | |
flip(buf[0]) | (flip(buf[1]) << 7) | (buf[2] << 14); | |
buf += 3; | |
return r; | |
} | |
} else { | |
const uint32_t r = flip(buf[0]) | (buf[1] << 7); | |
buf += 2; | |
return r; | |
} | |
} else | |
return *buf++; | |
} | |
}; | |
static void | |
BM_fast_pfor_simd_encode(benchmark::State &state) { | |
static thread_local FastPForLib::SIMDFastPFor<4> pfor; | |
for (auto _ : state) { | |
state.PauseTiming(); | |
std::vector<uint32_t> in(state.range(0)); | |
std::vector<uint32_t> out(state.range(0)); | |
for (auto i = 0u; i < in.size(); ++i) { in[i] = i; } | |
state.ResumeTiming(); | |
auto n = out.size(); | |
pfor.encodeArray(in.data(), in.size(), out.data(), n); | |
} | |
} | |
BENCHMARK(BM_fast_pfor_simd_encode) | |
->Args({128 << 1, 128 << 1}) | |
->Args({128 << 2, 128 << 2}) | |
->Args({128 << 3, 128 << 3}) | |
->Args({128 << 10, 128 << 10}) | |
->Args({128 << 20, 128 << 20}); | |
static void | |
BM_varint_encode(benchmark::State &state) { | |
char storage[4]; | |
for (auto _ : state) { | |
auto limit = state.range(0); | |
for (uint32_t i = 0u; i < limit; ++i) | |
benchmark::DoNotOptimize(varint::encode(storage, i)); | |
} | |
} | |
BENCHMARK(BM_varint_encode) | |
->Args({1, 1}) | |
->Args({1 << 1, 1 << 1}) | |
->Args({1 << 4, 1 << 4}) | |
->Args({1 << 8, 1 << 8}) | |
->Args({128 << 1, 128 << 1}) | |
->Args({128 << 2, 128 << 2}) | |
->Args({128 << 3, 128 << 3}) | |
->Args({128 << 10, 128 << 10}) | |
->Args({128 << 20, 128 << 20}); | |
static void | |
BM_varint_decode(benchmark::State &state) { | |
state.PauseTiming(); | |
char storage[4]; | |
varint::encode(storage, 1 << 29); | |
state.ResumeTiming(); | |
for (auto _ : state) { | |
auto limit = state.range(0); | |
for (uint32_t i = 0u; i < limit; ++i) | |
benchmark::DoNotOptimize(varint::decode(storage)); | |
} | |
} | |
BENCHMARK(BM_varint_decode) | |
->Args({1, 1}) | |
->Args({1 << 1, 1 << 1}) | |
->Args({1 << 4, 1 << 4}) | |
->Args({1 << 8, 1 << 8}) | |
->Args({1 << 16, 1 << 16}) | |
->Args({1 << 18, 1 << 18}) | |
->Args({1 << 22, 1 << 22}) | |
->Args({1 << 29, 1 << 29}); | |
static void | |
BM_varint_roundtrip(benchmark::State &state) { | |
char storage[4]; | |
for (auto _ : state) { | |
auto limit = state.range(0); | |
for (uint32_t i = 0u; i < limit; ++i) { | |
benchmark::DoNotOptimize(varint::encode(storage, i)); | |
benchmark::DoNotOptimize(varint::decode(storage)); | |
} | |
} | |
} | |
BENCHMARK(BM_varint_roundtrip) | |
->Args({1, 1}) | |
->Args({1 << 1, 1 << 1}) | |
->Args({1 << 4, 1 << 4}) | |
->Args({1 << 8, 1 << 8}) | |
->Args({1 << 16, 1 << 16}) | |
->Args({1 << 18, 1 << 18}) | |
->Args({1 << 22, 1 << 22}) | |
->Args({1 << 29, 1 << 29}); | |
BENCHMARK_MAIN(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment