fp64/u32noise.h

## u32noise.h
// Public Domain under http://unlicense.org, see link for details.

// Several stateless PRNGs.
// Emphatically NOT cryptographically secure.

#ifndef U32NOISE_H
#define U32NOISE_H

#include <stdint.h>

// May be turned into uint128->uint32 by directly taking a, b.
// Might be better to use 4 distinct multiplier constants, not 2.
// Can compute a-side and b-side in parallel.
static inline uint32_t u32noise1(uint64_t n)
{
    uint64_t a=n,b=~n;
    uint64_t c=0xB564EF22EC7AECE5ull,d=0x2C6FE96EE78B6955ull;
    a^=a>>32;b^=b>>32;
    a*=c;    b*=d;
    a^=a>>32;b^=b>>32;
    a*=d;    b*=c;
    n=a^b;
    return (uint32_t)(n^(n>>32));
}

// Seems a decent default choice w.r.t. quality and speed.
// See https://github.com/skeeto/hash-prospector/issues/23 for XQO primitive.
// 32-bit constant (e.g. 0xF2FC5985ull) might be good enough too.
static inline uint32_t u32noise2(uint64_t n)
{
    n^=~n>>32; n*=0xAF251AF3B0F025B5ull;
    n^=~n>>32; n=(n|1ull)^(n*n);
    return (uint32_t)(n^(n>>32));
}

// Slower, but simple enough to type in from memory.
// Tempting to move negation into the 2nd column, but seems to hurt quality.
static inline uint32_t u32noise3(uint64_t n)
{
    n^=~n>>35; n=(n|1ull)^(n*n);
    n^=~n>>35; n=(n|1ull)^(n*n);
    n^=~n>>35; n=(n|1ull)^(n*n);
    return (uint32_t)(n^(n>>32));
}

// Only requires 32x32->32 multiplication.
static inline uint32_t u32noise4(uint64_t n)
{
    uint32_t a=(uint32_t)(n^(n>>32)),b=~(uint32_t)n,c,d;
    c=a^(b>>17)    ; d=b^(a>>15)    ;
    a=c*0x01C8E815u; b=d*0xA13FC965u;
    c=a^(b>>15)    ; d=b^(a>>19)    ;
    a=c*0xCF019D85u; b=d*0xAC564B05u;
    c=a^(b>>19)    ; d=b^(a>>17)    ;
    a=c*0x01ED0675u; b=d*0x8664F205u;
    c=a^(b>>17)    ; d=b^(a>>17)    ;
    return c+d;
}

// Vaguely inspired by http://burtleburtle.net/bob/rand/smallprng.html .
// Does not require multiplication at all.
static inline uint32_t u32noise5(uint64_t n)
{
    uint32_t a=0xF1EA5EEDu,b=(uint32_t)n,c=(uint32_t)(n>>32),d=b,e;
#define ROTL(x,y) (((x)<<(y))|((x)>>((-(y))&31)))
    e=a-ROTL(b,19);a=b^ROTL(c,19);b=c^ROTL(d,11);c=d+ROTL(e,11);d=e^ROTL(a,17);
    e=a-ROTL(b,23);a=b^ROTL(c,21);b=c^ROTL(d,23);c=d+ROTL(e,19);d=e^ROTL(a,19);
    e=a-ROTL(b,27);a=b^ROTL(c,17);b=c^(d>>9)    ;c=d+(e<<3)    ;d=e+a;
    e=a-ROTL(b,11);a=b^ROTL(c,19);b=c^ROTL(d,17);c=d+ROTL(e,17);d=e^ROTL(a,21);
    e=a-ROTL(b,15);a=b^ROTL(c,13);b=c^ROTL(d,15);c=d+ROTL(e,15);d=e^ROTL(a,25);
#undef ROTL
    return a^c^b^d;
}

// The following 2 functions are only really good for
// uint32-uint32 noise. Signatures are for the sake of consistency.

// Fastest function here (on x64).
// PractRand detects problems at at 32GB (2^33 outputs).
static inline uint32_t u32noise6(uint64_t n)
{
    n=(n^(~n<<32))*0xF7C2EBC08F67F2B5ull;
    n=(n^( n>>32))*0x9E3779B97F4A7C15ull;
    return (uint32_t)(n^(n>>32));
}

// Based on https://burtleburtle.net/bob/hash/integer.html .
// Does not require multiplication at all.
// PractRand detects problems at 128GB (2^35 outputs).
static inline uint32_t u32noise7(uint64_t n)
{
    uint32_t a=(uint32_t)n,b=(uint32_t)(n^(n>>32));
    a-=(a<< 6); b+=~(b<<15);
    a^=(a>>17); b^= (b>>10);
    a-=(a<< 9); b+= (b<< 3);
    a^=(a<< 4); b^= (b>> 6);
    a-=(a<< 3); b+=~(b<<11);
    a^=(a<<10); b^= (b>>16);
    a^=(a>>15); b+= (b<<20)^(b>>12);
    return a+b;
}

#endif
	// Public Domain under http://unlicense.org, see link for details.

	// Several stateless PRNGs.
	// Emphatically NOT cryptographically secure.

	#ifndef U32NOISE_H
	#define U32NOISE_H

	#include <stdint.h>

	// May be turned into uint128->uint32 by directly taking a, b.
	// Might be better to use 4 distinct multiplier constants, not 2.
	// Can compute a-side and b-side in parallel.
	static inline uint32_t u32noise1(uint64_t n)
	{
	uint64_t a=n,b=~n;
	uint64_t c=0xB564EF22EC7AECE5ull,d=0x2C6FE96EE78B6955ull;
	a^=a>>32;b^=b>>32;
	a=c; b=d;
	a^=a>>32;b^=b>>32;
	a=d; b=c;
	n=a^b;
	return (uint32_t)(n^(n>>32));
	}

	// Seems a decent default choice w.r.t. quality and speed.
	// See https://github.com/skeeto/hash-prospector/issues/23 for XQO primitive.
	// 32-bit constant (e.g. 0xF2FC5985ull) might be good enough too.
	static inline uint32_t u32noise2(uint64_t n)
	{
	n^=~n>>32; n*=0xAF251AF3B0F025B5ull;
	n^=~n>>32; n=(n\|1ull)^(n*n);
	return (uint32_t)(n^(n>>32));
	}

	// Slower, but simple enough to type in from memory.
	// Tempting to move negation into the 2nd column, but seems to hurt quality.
	static inline uint32_t u32noise3(uint64_t n)
	{
	n^=~n>>35; n=(n\|1ull)^(n*n);
	n^=~n>>35; n=(n\|1ull)^(n*n);
	n^=~n>>35; n=(n\|1ull)^(n*n);
	return (uint32_t)(n^(n>>32));
	}

	// Only requires 32x32->32 multiplication.
	static inline uint32_t u32noise4(uint64_t n)
	{
	uint32_t a=(uint32_t)(n^(n>>32)),b=~(uint32_t)n,c,d;
	c=a^(b>>17) ; d=b^(a>>15) ;
	a=c0x01C8E815u; b=d0xA13FC965u;
	c=a^(b>>15) ; d=b^(a>>19) ;
	a=c0xCF019D85u; b=d0xAC564B05u;
	c=a^(b>>19) ; d=b^(a>>17) ;
	a=c0x01ED0675u; b=d0x8664F205u;
	c=a^(b>>17) ; d=b^(a>>17) ;
	return c+d;
	}

	// Vaguely inspired by http://burtleburtle.net/bob/rand/smallprng.html .
	// Does not require multiplication at all.
	static inline uint32_t u32noise5(uint64_t n)
	{
	uint32_t a=0xF1EA5EEDu,b=(uint32_t)n,c=(uint32_t)(n>>32),d=b,e;
	#define ROTL(x,y) (((x)<<(y))\|((x)>>((-(y))&31)))
	e=a-ROTL(b,19);a=b^ROTL(c,19);b=c^ROTL(d,11);c=d+ROTL(e,11);d=e^ROTL(a,17);
	e=a-ROTL(b,23);a=b^ROTL(c,21);b=c^ROTL(d,23);c=d+ROTL(e,19);d=e^ROTL(a,19);
	e=a-ROTL(b,27);a=b^ROTL(c,17);b=c^(d>>9) ;c=d+(e<<3) ;d=e+a;
	e=a-ROTL(b,11);a=b^ROTL(c,19);b=c^ROTL(d,17);c=d+ROTL(e,17);d=e^ROTL(a,21);
	e=a-ROTL(b,15);a=b^ROTL(c,13);b=c^ROTL(d,15);c=d+ROTL(e,15);d=e^ROTL(a,25);
	#undef ROTL
	return a^c^b^d;
	}

	// The following 2 functions are only really good for
	// uint32-uint32 noise. Signatures are for the sake of consistency.

	// Fastest function here (on x64).
	// PractRand detects problems at at 32GB (2^33 outputs).
	static inline uint32_t u32noise6(uint64_t n)
	{
	n=(n^(~n<<32))*0xF7C2EBC08F67F2B5ull;
	n=(n^( n>>32))*0x9E3779B97F4A7C15ull;
	return (uint32_t)(n^(n>>32));
	}

	// Based on https://burtleburtle.net/bob/hash/integer.html .
	// Does not require multiplication at all.
	// PractRand detects problems at 128GB (2^35 outputs).
	static inline uint32_t u32noise7(uint64_t n)
	{
	uint32_t a=(uint32_t)n,b=(uint32_t)(n^(n>>32));
	a-=(a<< 6); b+=~(b<<15);
	a^=(a>>17); b^= (b>>10);
	a-=(a<< 9); b+= (b<< 3);
	a^=(a<< 4); b^= (b>> 6);
	a-=(a<< 3); b+=~(b<<11);
	a^=(a<<10); b^= (b>>16);
	a^=(a>>15); b+= (b<<20)^(b>>12);
	return a+b;
	}

	#endif
Function	PractRand
u16noise1	fails at 16GB (i.e. good for full cycle)
u16noise2	problems at 128MB, fails at 256MB
u16noise3	problems at 512MB, fails at 1GB
u16noise4	problems at 512MB, fails at 1GB
u16noise5	fails at 16GB (i.e. good for full cycle)
u16noise6	problems at 2MB, fails at 8MB
u16noise7	problems at 512KB, fails at 1MB
Function	tiny	small	standard	big	huge
u32noise1	0.459	0.363	0.385	0.753	0.655
u32noise2	0.816	0.989	0.28	0.716	0.813
u32noise3	0.985	0.649	0.462	0.707	0.348
u32noise4	0.816	0.272	0.0713	0.777	0.458
u32noise5	0.142	0.225	0.334	0.428	3.6e-05
u32noise6	0.813	0.961	2.83e-32	0.968	8.18e-09
u32noise7	0.919	0.25	0.881	0.222	4.5e-34