Skip to content

Instantly share code, notes, and snippets.

@magurosan
Created May 26, 2022 14:40
Show Gist options
  • Save magurosan/23a557cd1810cdf4e91ac44371d9b019 to your computer and use it in GitHub Desktop.
Save magurosan/23a557cd1810cdf4e91ac44371d9b019 to your computer and use it in GitHub Desktop.
x8 xoshiro256 (AVX-512)
#include <immintrin.h>
#include <stdint.h>
union PARALLEL_XOSHIRO_AVX512_STATE {
__m512i state512[4];
uint64_t state64[32];
uint32_t state32[64];
};
typedef union PARALLEL_XOSHIRO_AVX512_STATE xoshiro256_x8_avx512_state_t;
inline void next_common_uint64x8_avx512(xoshiro256_x8_avx512_state_t* state) {
__m512i s0, s1, s2, s3;
__m512i t;
__m512i u0, u1;
s0 = state->state512[0];
s1 = state->state512[1];
s2 = state->state512[2];
s3 = state->state512[3];
t = _mm512_slli_epi64(s1, 17);
s3 = _mm512_xor_epi64(s1, s3);
//0x96 : A xor B xor C
state->state512[1] = _mm512_ternarylogic_epi64(s1, s2, s0, 0x96);
state->state512[2] = _mm512_ternarylogic_epi64(t, s2, s0, 0x96);
state->state512[0] = _mm512_xor_epi64(s0, s3);
state->state512[3] = _mm512_rol_epi64(s3, 45);
}
//8x uint64 xoshiro256++
__m512i next_uint64x8(xoshiro256_x8_avx512_state_t* state) {
__m512i v = _mm512_add_epi64(state->state512[0], state->state512[3]);
v = _mm512_rol_epi64(v, 23);
v = _mm512_add_epi64(v, state->state512[0]);
next_common_uint64x8_avx512(state);
return v;
}
//8x uint64 xoshiro256**
__m512i next_uint64x8_ss(xoshiro256_x8_avx512_state_t* state) {
__m512i v = state->state512[0];
v = _mm512_add_epi64(v, _mm512_slli_epi64(v, 2)); // x5
v = _mm512_rol_epi64(v, 7);
v = _mm512_add_epi64(v, _mm512_slli_epi64(v, 3)); // x9
next_common_uint64x8_avx512(state);
return v;
}
//8x float64 xoshiro256+
__m512d next_float64x8(xoshiro256_x8_avx512_state_t* state)
{
__m512i v = _mm512_add_epi64(state->state512[0], state->state512[3]);
__m512d vd = _mm512_mul_pd(_mm512_cvtepi64_pd(v),
_mm512_set1_pd(1.0/18446744073709551616.0));
/* NOTE : もし下位52ビットでOKであればこっちの方が速いかも
v = _mm512_ternarylogic_epi64(v,
_mm512_set1_epi64(0x000FFFFFFFFFFFFFULL),
_mm512_set1_epi64(0x3FF0000000000000ULL),
0xEA // (A & B) | C
);
__m512d vd = _mm512_sub_pd(_mm512_castsi512_pd(v), _mm512_set1_pd(1.0));
*/
next_common_uint64x8_avx512(state);
return vd;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment