-
-
Save mmozeiko/37010d7352cffcdf535624f783a5944a to your computer and use it in GitHub Desktop.
pcg benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "benchmark\benchmark.h" | |
#include <stdint.h> | |
#include <intrin.h> | |
typedef uint64_t pcg64_uint128[2]; | |
#define PCG64_INIT(a, b) { (b), (a) } | |
#define PCG64_IS_ZERO(r) ((r)[0] == 0 && (r)[1] == 0) | |
#define PCG64_ZERO(r) (r)[0] = (r)[1] = 0 | |
#define PCG64_COPY(r, x) (r)[0] = (x)[0], (r)[1] = (x)[1] | |
#define PCG64_LOW(r) (r)[0] | |
#define PCG64_HIGH(r) (r)[1] | |
#define PCG64_SHR(r, c) (r)[0] = __shiftright128((r)[0], (r)[1], (c)), (r)[1] >>= (c) | |
#define PCG64_ADD(r, x) _addcarry_u64(_addcarry_u64(0, (r)[0], (x)[0], &(r)[0]), (r)[1], (x)[1], &(r)[1]) | |
#define PCG64_ADD_nointrin(r, x) (r)[0] += (x)[0], (r)[1] += (x)[1] + ((r)[0] < (x)[0]) | |
#define PCG64_MUL(r, x) \ | |
{ \ | |
uint64_t _temp = (r)[0] * (x)[1] + (r)[1] * (x)[0]; \ | |
(r)[0] = _umul128((r)[0], (x)[0], &(r)[1]); \ | |
(r)[1] += _temp; \ | |
} | |
#if defined(__clang__) | |
#define PCG64_ROR(r, c) __builtin_rotateright64((r), (c)) | |
#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64)) | |
#define PCG64_ROR(r, c) _rotr64((r), (c)) | |
#endif | |
#define PCG_DEFAULT_MULTIPLIER_128_H 2549297995355413924ULL | |
#define PCG_DEFAULT_MULTIPLIER_128_L 4865540595714422341ULL | |
#define PCG_DEFAULT_INCREMENT_128_H 6364136223846793005ULL | |
#define PCG_DEFAULT_INCREMENT_128_L 1442695040888963407ULL | |
typedef struct { | |
pcg64_uint128 state; | |
} pcg64; | |
static inline uint64_t pcg64_next(pcg64* rng) | |
{ | |
pcg64_uint128 state_mul = PCG64_INIT( PCG_DEFAULT_MULTIPLIER_128_H, PCG_DEFAULT_MULTIPLIER_128_L ); | |
pcg64_uint128 state_add = PCG64_INIT( PCG_DEFAULT_INCREMENT_128_H, PCG_DEFAULT_INCREMENT_128_L ); | |
pcg64_uint128 state; | |
PCG64_COPY(state, rng->state); | |
PCG64_MUL(state, state_mul); | |
PCG64_ADD(state, state_add); | |
PCG64_COPY(rng->state, state); | |
// XSL-RR | |
uint64_t value = PCG64_HIGH(state) ^ PCG64_LOW(state); | |
int rot = PCG64_HIGH(state) >> 58; | |
return PCG64_ROR(value, rot); | |
} | |
static inline uint64_t pcg64_next_noaddintrin(pcg64* rng) | |
{ | |
pcg64_uint128 state_mul = PCG64_INIT( PCG_DEFAULT_MULTIPLIER_128_H, PCG_DEFAULT_MULTIPLIER_128_L ); | |
pcg64_uint128 state_add = PCG64_INIT( PCG_DEFAULT_INCREMENT_128_H, PCG_DEFAULT_INCREMENT_128_L ); | |
pcg64_uint128 state; | |
PCG64_COPY(state, rng->state); | |
PCG64_MUL(state, state_mul); | |
PCG64_ADD_nointrin(state, state_add); | |
PCG64_COPY(rng->state, state); | |
// XSL-RR | |
uint64_t value = PCG64_HIGH(state) ^ PCG64_LOW(state); | |
int rot = PCG64_HIGH(state) >> 58; | |
return PCG64_ROR(value, rot); | |
} | |
static inline void pcg64_seed(pcg64* rng, uint64_t seed_high, uint64_t seed_low) | |
{ | |
pcg64_uint128 seed = PCG64_INIT( seed_high, seed_low ); | |
PCG64_ZERO(rng->state); | |
pcg64_next(rng); | |
PCG64_ADD(rng->state, seed); | |
pcg64_next(rng); | |
} | |
static void BenchMy(benchmark::State& state) { | |
pcg64 r; | |
pcg64_seed(&r, 1, 2); | |
benchmark::DoNotOptimize(r); | |
for (auto _ : state) { | |
uint64_t x = pcg64_next(&r); | |
benchmark::DoNotOptimize(x); | |
} | |
} | |
static void BenchMyNoAddIntrin(benchmark::State& state) { | |
pcg64 r; | |
pcg64_seed(&r, 1, 2); | |
benchmark::DoNotOptimize(r); | |
for (auto _ : state) { | |
uint64_t x = pcg64_next_noaddintrin(&r); | |
benchmark::DoNotOptimize(x); | |
} | |
} | |
BENCHMARK(BenchMy); | |
BENCHMARK(BenchMyNoAddIntrin); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment