Skip to content

Instantly share code, notes, and snippets.

@fp64
Last active March 27, 2023 08:48
Show Gist options
  • Save fp64/126289051f0c9e1b70bcba7c3e07a82f to your computer and use it in GitHub Desktop.
Save fp64/126289051f0c9e1b70bcba7c3e07a82f to your computer and use it in GitHub Desktop.
// Public Domain under http://unlicense.org, see link for details.
// Several stateless PRNGs.
// Emphatically NOT cryptographically secure.
#ifndef U32NOISE_H
#define U32NOISE_H
#include <stdint.h>
// May be turned into uint128->uint32 by directly taking a, b.
// Might be better to use 4 distinct multiplier constants, not 2.
// Can compute a-side and b-side in parallel.
static inline uint32_t u32noise1(uint64_t n)
{
uint64_t a=n,b=~n;
uint64_t c=0xB564EF22EC7AECE5ull,d=0x2C6FE96EE78B6955ull;
a^=a>>32;b^=b>>32;
a*=c; b*=d;
a^=a>>32;b^=b>>32;
a*=d; b*=c;
n=a^b;
return (uint32_t)(n^(n>>32));
}
// Seems a decent default choice w.r.t. quality and speed.
// See https://github.com/skeeto/hash-prospector/issues/23 for XQO primitive.
// 32-bit constant (e.g. 0xF2FC5985ull) might be good enough too.
static inline uint32_t u32noise2(uint64_t n)
{
n^=~n>>32; n*=0xAF251AF3B0F025B5ull;
n^=~n>>32; n=(n|1ull)^(n*n);
return (uint32_t)(n^(n>>32));
}
// Slower, but simple enough to type in from memory.
// Tempting to move negation into the 2nd column, but seems to hurt quality.
static inline uint32_t u32noise3(uint64_t n)
{
n^=~n>>35; n=(n|1ull)^(n*n);
n^=~n>>35; n=(n|1ull)^(n*n);
n^=~n>>35; n=(n|1ull)^(n*n);
return (uint32_t)(n^(n>>32));
}
// Only requires 32x32->32 multiplication.
static inline uint32_t u32noise4(uint64_t n)
{
uint32_t a=(uint32_t)(n^(n>>32)),b=~(uint32_t)n,c,d;
c=a^(b>>17) ; d=b^(a>>15) ;
a=c*0x01C8E815u; b=d*0xA13FC965u;
c=a^(b>>15) ; d=b^(a>>19) ;
a=c*0xCF019D85u; b=d*0xAC564B05u;
c=a^(b>>19) ; d=b^(a>>17) ;
a=c*0x01ED0675u; b=d*0x8664F205u;
c=a^(b>>17) ; d=b^(a>>17) ;
return c+d;
}
// Vaguely inspired by http://burtleburtle.net/bob/rand/smallprng.html .
// Does not require multiplication at all.
static inline uint32_t u32noise5(uint64_t n)
{
uint32_t a=0xF1EA5EEDu,b=(uint32_t)n,c=(uint32_t)(n>>32),d=b,e;
#define ROTL(x,y) (((x)<<(y))|((x)>>((-(y))&31)))
e=a-ROTL(b,19);a=b^ROTL(c,19);b=c^ROTL(d,11);c=d+ROTL(e,11);d=e^ROTL(a,17);
e=a-ROTL(b,23);a=b^ROTL(c,21);b=c^ROTL(d,23);c=d+ROTL(e,19);d=e^ROTL(a,19);
e=a-ROTL(b,27);a=b^ROTL(c,17);b=c^(d>>9) ;c=d+(e<<3) ;d=e+a;
e=a-ROTL(b,11);a=b^ROTL(c,19);b=c^ROTL(d,17);c=d+ROTL(e,17);d=e^ROTL(a,21);
e=a-ROTL(b,15);a=b^ROTL(c,13);b=c^ROTL(d,15);c=d+ROTL(e,15);d=e^ROTL(a,25);
#undef ROTL
return a^c^b^d;
}
// The following 2 functions are only really good for
// uint32-uint32 noise. Signatures are for the sake of consistency.
// Fastest function here (on x64).
// PractRand detects problems at at 32GB (2^33 outputs).
static inline uint32_t u32noise6(uint64_t n)
{
n=(n^(~n<<32))*0xF7C2EBC08F67F2B5ull;
n=(n^( n>>32))*0x9E3779B97F4A7C15ull;
return (uint32_t)(n^(n>>32));
}
// Based on https://burtleburtle.net/bob/hash/integer.html .
// Does not require multiplication at all.
// PractRand detects problems at 128GB (2^35 outputs).
static inline uint32_t u32noise7(uint64_t n)
{
uint32_t a=(uint32_t)n,b=(uint32_t)(n^(n>>32));
a-=(a<< 6); b+=~(b<<15);
a^=(a>>17); b^= (b>>10);
a-=(a<< 9); b+= (b<< 3);
a^=(a<< 4); b^= (b>> 6);
a-=(a<< 3); b+=~(b<<11);
a^=(a<<10); b^= (b>>16);
a^=(a>>15); b+= (b<<20)^(b>>12);
return a+b;
}
#endif
@fp64
Copy link
Author

fp64 commented Jan 24, 2023

Note: changed the definition of ROTL slightly. More solid, and seems to generate better code on MSVC.

Might as well post the test results (bit patchwork, due to limited machine time spent).

Everything passes SmallCrush and Crush from TestU01, both straight, and bitreversed. u32noise1 .. u32noise5 pass BigCrush (again, both straight, and bitreversed), haven't tested the other two.

As noted in code, u32noise6 and u32noise7 exhibit problems in PractRand at 32GB and 128GB respectively. Other functions pass up to at least 256GB. Tested with RNG_test stdin32 -tlmin 1K -te 0 -tf 2, which is weak: you probably want -te 1.

Results for half-sized functions (u16noise.h):

Function PractRand
u16noise1 fails at 16GB (i.e. good for full cycle)
u16noise2 problems at 128MB, fails at 256MB
u16noise3 problems at 512MB, fails at 1GB
u16noise4 problems at 512MB, fails at 1GB
u16noise5 fails at 16GB (i.e. good for full cycle)
u16noise6 problems at 2MB, fails at 8MB
u16noise7 problems at 512KB, fails at 1MB

Everything other than u32noise6 passes gjrand up to and including --big (u32noise6 actually passes --big just fine, but fails --standard horribly; go figure) with handful of "probably ok" grade1 and grade2 failures. Tested by piping to stdin (so no rewind?). Update: tested up to --huge, u32noise6 and u32noise7 fail, u32noise5 is pretty suspicious. Results:

Function tiny small standard big huge
u32noise1 0.459 0.363 0.385 0.753 0.655
u32noise2 0.816 0.989 0.28 0.716 0.813
u32noise3 0.985 0.649 0.462 0.707 0.348
u32noise4 0.816 0.272 0.0713 0.777 0.458
u32noise5 0.142 0.225 0.334 0.428 3.6e-05
u32noise6 0.813 0.961 2.83e-32 0.968 8.18e-09
u32noise7 0.919 0.25 0.881 0.222 4.5e-34

Tested by outputting sequential values, starting from zero (essentially uint64_t counter=0; while(1) output(u32noiseN(counter++));). Other testing patterns may make sense, or at least a different starting position (similar to testing PRNGs with different seeds).

If you want to use u32noise6 and/or u32noise7 as uint32->uint32 functions, as the code suggests, please note, that they would be biased (since they pass the birthday test, they have to).

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment