Skip to content

Instantly share code, notes, and snippets.

@mmozeiko

mmozeiko/pcg.cpp Secret

Created February 4, 2022 19:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmozeiko/37010d7352cffcdf535624f783a5944a to your computer and use it in GitHub Desktop.
Save mmozeiko/37010d7352cffcdf535624f783a5944a to your computer and use it in GitHub Desktop.
pcg benchmark
#include "benchmark\benchmark.h"
#include <stdint.h>
#include <intrin.h>
typedef uint64_t pcg64_uint128[2];
#define PCG64_INIT(a, b) { (b), (a) }
#define PCG64_IS_ZERO(r) ((r)[0] == 0 && (r)[1] == 0)
#define PCG64_ZERO(r) (r)[0] = (r)[1] = 0
#define PCG64_COPY(r, x) (r)[0] = (x)[0], (r)[1] = (x)[1]
#define PCG64_LOW(r) (r)[0]
#define PCG64_HIGH(r) (r)[1]
#define PCG64_SHR(r, c) (r)[0] = __shiftright128((r)[0], (r)[1], (c)), (r)[1] >>= (c)
#define PCG64_ADD(r, x) _addcarry_u64(_addcarry_u64(0, (r)[0], (x)[0], &(r)[0]), (r)[1], (x)[1], &(r)[1])
#define PCG64_ADD_nointrin(r, x) (r)[0] += (x)[0], (r)[1] += (x)[1] + ((r)[0] < (x)[0])
#define PCG64_MUL(r, x) \
{ \
uint64_t _temp = (r)[0] * (x)[1] + (r)[1] * (x)[0]; \
(r)[0] = _umul128((r)[0], (x)[0], &(r)[1]); \
(r)[1] += _temp; \
}
#if defined(__clang__)
#define PCG64_ROR(r, c) __builtin_rotateright64((r), (c))
#elif defined(_MSC_VER) && (defined(_M_AMD64) || defined(_M_ARM64))
#define PCG64_ROR(r, c) _rotr64((r), (c))
#endif
#define PCG_DEFAULT_MULTIPLIER_128_H 2549297995355413924ULL
#define PCG_DEFAULT_MULTIPLIER_128_L 4865540595714422341ULL
#define PCG_DEFAULT_INCREMENT_128_H 6364136223846793005ULL
#define PCG_DEFAULT_INCREMENT_128_L 1442695040888963407ULL
typedef struct {
pcg64_uint128 state;
} pcg64;
static inline uint64_t pcg64_next(pcg64* rng)
{
pcg64_uint128 state_mul = PCG64_INIT( PCG_DEFAULT_MULTIPLIER_128_H, PCG_DEFAULT_MULTIPLIER_128_L );
pcg64_uint128 state_add = PCG64_INIT( PCG_DEFAULT_INCREMENT_128_H, PCG_DEFAULT_INCREMENT_128_L );
pcg64_uint128 state;
PCG64_COPY(state, rng->state);
PCG64_MUL(state, state_mul);
PCG64_ADD(state, state_add);
PCG64_COPY(rng->state, state);
// XSL-RR
uint64_t value = PCG64_HIGH(state) ^ PCG64_LOW(state);
int rot = PCG64_HIGH(state) >> 58;
return PCG64_ROR(value, rot);
}
static inline uint64_t pcg64_next_noaddintrin(pcg64* rng)
{
pcg64_uint128 state_mul = PCG64_INIT( PCG_DEFAULT_MULTIPLIER_128_H, PCG_DEFAULT_MULTIPLIER_128_L );
pcg64_uint128 state_add = PCG64_INIT( PCG_DEFAULT_INCREMENT_128_H, PCG_DEFAULT_INCREMENT_128_L );
pcg64_uint128 state;
PCG64_COPY(state, rng->state);
PCG64_MUL(state, state_mul);
PCG64_ADD_nointrin(state, state_add);
PCG64_COPY(rng->state, state);
// XSL-RR
uint64_t value = PCG64_HIGH(state) ^ PCG64_LOW(state);
int rot = PCG64_HIGH(state) >> 58;
return PCG64_ROR(value, rot);
}
static inline void pcg64_seed(pcg64* rng, uint64_t seed_high, uint64_t seed_low)
{
pcg64_uint128 seed = PCG64_INIT( seed_high, seed_low );
PCG64_ZERO(rng->state);
pcg64_next(rng);
PCG64_ADD(rng->state, seed);
pcg64_next(rng);
}
static void BenchMy(benchmark::State& state) {
pcg64 r;
pcg64_seed(&r, 1, 2);
benchmark::DoNotOptimize(r);
for (auto _ : state) {
uint64_t x = pcg64_next(&r);
benchmark::DoNotOptimize(x);
}
}
static void BenchMyNoAddIntrin(benchmark::State& state) {
pcg64 r;
pcg64_seed(&r, 1, 2);
benchmark::DoNotOptimize(r);
for (auto _ : state) {
uint64_t x = pcg64_next_noaddintrin(&r);
benchmark::DoNotOptimize(x);
}
}
BENCHMARK(BenchMy);
BENCHMARK(BenchMyNoAddIntrin);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment