Skip to content

Instantly share code, notes, and snippets.

@Strilanc
Created February 2, 2025 22:53
Show Gist options
  • Select an option

  • Save Strilanc/4f0f826e7a00c9dd6e32d5c9d5f913a1 to your computer and use it in GitHub Desktop.

Select an option

Save Strilanc/4f0f826e7a00c9dd6e32d5c9d5f913a1 to your computer and use it in GitHub Desktop.
Implements a class that can quickly approximately sample from geometric distributions whose miss probabilities are specified as an integer root of 1/2. For example, for p=0.1% you would use the 693'th root because 1 - 0.5^(1/693) ~= 0.1%.
#include <chrono>
#include <iostream>
#include <random>
#include <bit>
#include "immintrin.h"
#include <algorithm>
__m256i countl_zero_vec(__m256i x) {
// Tried doing vectorized bit twiddling instead, but it was slower.
uint64_t a = (uint64_t)_mm256_extract_epi64(x, 0);
uint64_t b = (uint64_t)_mm256_extract_epi64(x, 1);
uint64_t c = (uint64_t)_mm256_extract_epi64(x, 2);
uint64_t d = (uint64_t)_mm256_extract_epi64(x, 3);
a = std::countl_zero(a);
b = std::countl_zero(b);
c = std::countl_zero(c);
d = std::countl_zero(d);
return _mm256_set_epi64x(a, b, c, d);
}
/// Approximates floor(step * -log2(x / pow(2, 33) - 0.5)) for each even 32 bit word of x.
/// The odd 32 bit words are ignored (due using _mm256_mul_epi32).
///
/// Args:
/// x: The vector value to operate on.
/// x32[2*k+0] is used.
/// x32[2*k+1] is ignored.
/// step: The end of the range of possible results. The number of biased 0s
/// per leading unbiased 0, when converting biased bits into jumps across
/// unbiased bits. The k in p^k = 0.5 where p is the chance of each biased
/// bit being ON.
///
/// Returns:
/// A vector value r with r.u64[k] storing the output for input x.u32[2*k].
/// The outputs are guaranteed to be in the range [0, step).
/// Assuming x.u32[2*k] is a uniformly random integer, then seeing the result
/// r.u64[k] = V+1 should be approximately 1-p times less likely than seeing
/// r.u64[k] = V.
__m256i vec_approx_geometric_step(__m256i x, size_t step) {
x = _mm256_srli_epi32(x, 1);
__m256i total = _mm256_set1_epi64x(-22531715);
total = _mm256_mul_epi32(total, x);
total = _mm256_srli_epi64(total, 32);
total = _mm256_add_epi64(total, _mm256_set1_epi64x(49873802));
total = _mm256_mul_epi32(total, x);
total = _mm256_srli_epi64(total, 32);
total = _mm256_add_epi64(total, _mm256_set1_epi64x(-54912070));
total = _mm256_mul_epi32(total, x);
total = _mm256_srli_epi64(total, 32);
total = _mm256_add_epi64(total, _mm256_set1_epi64x(47448117));
total = _mm256_mul_epi32(total, x);
total = _mm256_srli_epi64(total, 32);
total = _mm256_add_epi64(total, _mm256_set1_epi64x(-48376425));
total = _mm256_mul_epi32(total, x);
total = _mm256_srli_epi64(total, 32);
total = _mm256_add_epi64(total, _mm256_set1_epi64x(16777200));
total = _mm256_mul_epi32(total, _mm256_set1_epi64x(step));
total = _mm256_srli_epi64(total, 24);
return total;
}
struct FastRng {
__m256i state;
static FastRng from_external_entropy() {
std::random_device device;
__m256i state = _mm256_set_epi64x(device(), device(), device(), device());
return FastRng{state};
}
__m256i next_uniform() {
// ranqd1, Chapter 7.1, §An Even Quicker Generator, Eq. 7.1.6 parameters
// from Knuth and H. W. Lewis (via Wikipedia)
__m256i multiplier = _mm256_set1_epi32(1664525);
__m256i offset = _mm256_set1_epi32(1013904223);
state = _mm256_mullo_epi32(state, multiplier);
state = _mm256_add_epi32(state, offset);
return state;
}
__m256i next_jump(size_t offset_per_countl_zero) {
__m256i r = next_uniform();
// Only support probabilities where leading 0s cause an integer offset.
__m256i counts = countl_zero_vec(r);
__m256i offset_from_count = _mm256_mul_epu32(counts,_mm256_set1_epi64x(offset_per_countl_zero));
// Use low bits to to pick one of the values between leading-zero steps.
// There's a 2^-32 chance of the low bits having been double-used by the leading zero count.
__m256i extra = vec_approx_geometric_step(r, offset_per_countl_zero);
return _mm256_add_epi64(extra, offset_from_count);
}
};
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment