Skip to content

Instantly share code, notes, and snippets.

@mrbid
Last active January 1, 2024 22:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mrbid/310bebaa9b0b5fb1bc47a3b5c7915231 to your computer and use it in GitHub Desktop.
Save mrbid/310bebaa9b0b5fb1bc47a3b5c7915231 to your computer and use it in GitHub Desktop.
Favorite random functions and benchmark.
/*
James William Fletcher (github.com/mrbid)
Feb 2023
Bench of my favorite random functions.
Benching like this never reflects real world scenarios,
but it does give you a rough idea of the costs involved.
gcc rand_bench.c -lm -mrdrnd -mrdseed -Ofast -o rand
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <stdio.h>
#include <string.h>
#include <locale.h>
#include <math.h>
#include <sys/file.h>
#include <sys/time.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <sched.h>
#include <x86intrin.h>
//#include <immintrin.h>
#pragma GCC diagnostic ignored "-Wunused-result"
#define forceinline __attribute__((always_inline)) inline
// adapted from ogre3d asm_math.h
// https://www.flipcode.com/archives/07-15-2002.shtml
// https://www.cs.cmu.edu/afs/andrew/scs/cs/oldfiles/15-494-sp09/dst/A/sw/ogre-1.6.4/OgreMain/include/asm_math.h
// https://gist.github.com/mrbid/310bebaa9b0b5fb1bc47a3b5c7915231
float mmxf32(const __int64_t seed)
{
// static __int64_t q = 74235;
// __m64 mm0 = _mm_cvtsi64_m64(q);
// __m64 mm1 = _m_pshufw(mm0, 0x1E);
// mm0 = _mm_add_pi32(mm0, mm1);
// q = _m_to_int64(mm0);
// _m_empty();
// return q * 1.084202172e-19F;
static const float RECIP_FLOAT_MAX = 1.f/(float)INT64_MAX;
static __int64_t q = 74235;
if(seed != 0)
q = seed;
__m64 mm0 = _mm_cvtsi64_m64(q);
__m64 mm1 = _m_pshufw(mm0, 0x1E);
mm0 = _mm_add_pi32(mm0, mm1);
q = _m_to_int64(mm0);
_m_empty();
return fabsf(q) * RECIP_FLOAT_MAX;
}
float mmxf32c(const __int64_t seed)
{
// static __int64_t q = 74235;
// __m64 mm0 = _mm_cvtsi64_m64(q);
// __m64 mm1 = _m_pshufw(mm0, 0x1E);
// mm0 = _mm_add_pi32(mm0, mm1);
// q = _m_to_int64(mm0);
// _m_empty();
// return q * 2.168404345e-19F;
static const float RECIP_FLOAT_MAX = 2.f/(float)INT64_MAX;
static __int64_t q = 74235;
if(seed != 0)
q = seed;
__m64 mm0 = _mm_cvtsi64_m64(q);
__m64 mm1 = _m_pshufw(mm0, 0x1E);
mm0 = _mm_add_pi32(mm0, mm1);
q = _m_to_int64(mm0);
_m_empty();
return (fabsf(q) * RECIP_FLOAT_MAX)-1.f;
}
__int64_t mmxi64(const __int64_t seed)
{
// static __int64_t q = 74235;
// __m64 mm0 = _mm_cvtsi64_m64(q);
// __m64 mm1 = _m_pshufw(mm0, 0x1E);
// mm0 = _mm_add_pi32(mm0, mm1);
// q = _m_to_int64(mm0);
// _m_empty();
// return q;
static __int64_t q = 74235;
if(seed != 0)
q = seed;
__m64 mm0 = _mm_cvtsi64_m64(q);
__m64 mm1 = _m_pshufw(mm0, 0x1E);
mm0 = _mm_add_pi32(mm0, mm1);
q = _m_to_int64(mm0);
_m_empty();
return q;
}
forceinline float rdseed16c()
{
static const float RECIP_FLOAT_UINT16_MAX = 2.f/(float)((unsigned short)-1);
unsigned short s = 0;
_rdseed16_step(&s);
return (((float)s) * RECIP_FLOAT_UINT16_MAX)-1.f;
}
forceinline float rdrand16c()
{
static const float RECIP_FLOAT_UINT16_MAX = 2.f/(float)((unsigned short)-1);
unsigned short s = 0;
_rdrand16_step(&s);
return (((float)s) * RECIP_FLOAT_UINT16_MAX)-1.f;
}
forceinline float rdseed32c()
{
static const float RECIP_FLOAT_UINT32_MAX = 2.f/(float)((unsigned int)-1);
unsigned int s = 0;
_rdseed32_step(&s);
return (((float)s) * RECIP_FLOAT_UINT32_MAX)-1.f;
}
forceinline float rdrand32c()
{
static const float RECIP_FLOAT_UINT32_MAX = 2.f/(float)((unsigned int)-1);
unsigned int s = 0;
_rdrand32_step(&s);
return (((float)s) * RECIP_FLOAT_UINT32_MAX)-1.f;
}
forceinline float rdseed64c()
{
static const float RECIP_FLOAT_UINT64_MAX = 2.f/(float)UINT64_MAX;
unsigned long long s = 0;
_rdseed64_step(&s);
return (((float)s) * RECIP_FLOAT_UINT64_MAX)-1.f;
}
forceinline float rdrand64c()
{
static const float RECIP_FLOAT_UINT64_MAX = 2.f/(float)UINT64_MAX;
unsigned long long s = 0;
_rdrand64_step(&s);
return (((float)s) * RECIP_FLOAT_UINT64_MAX)-1.f;
}
forceinline float rdseed16()
{
static const float RECIP_FLOAT_UINT16_MAX = 1.f/(float)((unsigned short)-1);
unsigned short s = 0;
_rdseed16_step(&s);
return ((float)s) * RECIP_FLOAT_UINT16_MAX;
}
forceinline float rdrand16()
{
static const float RECIP_FLOAT_UINT16_MAX = 1.f/(float)((unsigned short)-1);
unsigned short s = 0;
_rdrand16_step(&s);
return ((float)s) * RECIP_FLOAT_UINT16_MAX;
}
forceinline float rdseed32()
{
static const float RECIP_FLOAT_UINT32_MAX = 1.f/(float)((unsigned int)-1);
unsigned int s = 0;
_rdseed32_step(&s);
return ((float)s) * RECIP_FLOAT_UINT32_MAX;
}
forceinline float rdrand32()
{
static const float RECIP_FLOAT_UINT32_MAX = 1.f/(float)((unsigned int)-1);
unsigned int s = 0;
_rdrand32_step(&s);
return ((float)s) * RECIP_FLOAT_UINT32_MAX;
}
forceinline float rdseed64()
{
static const float RECIP_FLOAT_UINT64_MAX = 1.f/(float)UINT64_MAX;
unsigned long long s = 0;
_rdseed64_step(&s);
return ((float)s) * RECIP_FLOAT_UINT64_MAX;
}
forceinline float rdrand64()
{
static const float RECIP_FLOAT_UINT64_MAX = 1.f/(float)UINT64_MAX;
unsigned long long s = 0;
_rdrand64_step(&s);
return ((float)s) * RECIP_FLOAT_UINT64_MAX;
}
forceinline unsigned short rdseed_ushort()
{
unsigned short s = 0;
_rdseed16_step(&s);
return s;
}
forceinline unsigned short rdrand_ushort()
{
unsigned short s = 0;
_rdrand16_step(&s);
return s;
}
forceinline unsigned int rdseed_uint()
{
unsigned int s = 0;
_rdseed32_step(&s);
return s;
}
forceinline unsigned int rdrand_uint()
{
unsigned int s = 0;
_rdrand32_step(&s);
return s;
}
forceinline unsigned long long rdseed_u64()
{
unsigned long long s = 0;
_rdseed64_step(&s);
return s;
}
forceinline unsigned long long rdrand_u64()
{
unsigned long long s = 0;
_rdrand64_step(&s);
return s;
}
int srandfq = 74235;
forceinline void srandf(const int seed)
{
srandfq = seed;
}
float sfrand()
{
// https://iquilezles.org/articles/sfrand/
// Inigo Quilez
float res;
srandfq *= 16807;
*((unsigned int *)&res) = (((unsigned int)srandfq)>>9) | 0x40000000;
return res-3.0f;
}
forceinline float randf()
{
// https://www.musicdsp.org/en/latest/Other/273-fast-float-random-numbers.html
// moc.liamg@seir.kinimod
srandfq *= 16807;
return (float)(srandfq & 0x7FFFFFFF) * 4.6566129e-010f;
}
forceinline float randfc()
{
// https://www.musicdsp.org/en/latest/Other/273-fast-float-random-numbers.html
// moc.liamg@seir.kinimod
srandfq *= 16807;
return ((float)(srandfq)) * 4.6566129e-010f;
}
forceinline float urandf64()
{
static const float RECIP_FLOAT_UINT64_MAX = 1.f/(float)UINT64_MAX;
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
uint64_t s = 0;
read(f, &s, sizeof(uint64_t));
close(f);
return ((float)s) * RECIP_FLOAT_UINT64_MAX;
}
forceinline float urandfc64()
{
static const float RECIP_FLOAT_UINT64_MAX = 2.f/(float)UINT64_MAX;
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
uint64_t s = 0;
read(f, &s, sizeof(uint64_t));
close(f);
return (((float)s) * RECIP_FLOAT_UINT64_MAX)-1.f;
}
forceinline float urandf32()
{
static const float RECIP_FLOAT_UINT32_MAX = 1.f/(float)((unsigned int)-1);
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned int s = 0;
read(f, &s, sizeof(unsigned int));
close(f);
return ((float)s) * RECIP_FLOAT_UINT32_MAX;
}
forceinline float urandfc32()
{
static const float RECIP_FLOAT_UINT32_MAX = 2.f/(float)((unsigned int)-1);
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned int s = 0;
read(f, &s, sizeof(unsigned int));
close(f);
return (((float)s) * RECIP_FLOAT_UINT32_MAX)-1.f;
}
forceinline float urandf16()
{
static const float RECIP_FLOAT_UINT16_MAX = 1.f/(float)((unsigned short)-1);
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned short s = 0;
read(f, &s, sizeof(unsigned short));
close(f);
return ((float)s) * RECIP_FLOAT_UINT16_MAX;
}
forceinline float urandfc16()
{
static const float RECIP_FLOAT_UINT16_MAX = 2.f/(float)((unsigned short)-1);
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned short s = 0;
read(f, &s, sizeof(unsigned short));
close(f);
return (((float)s) * RECIP_FLOAT_UINT16_MAX)-1.f;
}
forceinline float urandf8()
{
static const float RECIP_FLOAT_UINT8_MAX = 1.f/(float)((unsigned char)-1);
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned char s = 0;
read(f, &s, sizeof(unsigned char));
close(f);
return ((float)s) * RECIP_FLOAT_UINT8_MAX;
}
forceinline float urandfc8()
{
static const float RECIP_FLOAT_UINT8_MAX = 2.f/(float)((unsigned char)-1);
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned char s = 0;
read(f, &s, sizeof(unsigned char));
close(f);
return (((float)s) * RECIP_FLOAT_UINT8_MAX)-1.f;
}
forceinline int64_t urand_i64()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
int64_t s = 0;
ssize_t result = read(f, &s, sizeof(int64_t));
close(f);
return s;
}
forceinline int urand_int()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
int s = 0;
read(f, &s, sizeof(int));
close(f);
return s;
}
forceinline short urand_short()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
short s = 0;
read(f, &s, sizeof(short));
close(f);
return s;
}
forceinline char urand_char()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
char s = 0;
read(f, &s, sizeof(char));
close(f);
return s;
}
forceinline uint64_t urand_u64()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
uint64_t s = 0;
ssize_t result = read(f, &s, sizeof(uint64_t));
close(f);
return s;
}
forceinline int urand_uint()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned int s = 0;
read(f, &s, sizeof(unsigned int));
close(f);
return s;
}
forceinline short urand_ushort()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned short s = 0;
read(f, &s, sizeof(unsigned short));
close(f);
return s;
}
forceinline char urand_uchar()
{
int f = open("/dev/urandom", O_RDONLY | O_CLOEXEC);
unsigned char s = 0;
read(f, &s, sizeof(unsigned char));
close(f);
return s;
}
forceinline float uRandFloat64(const float min, const float max)
{
return urandf64() * (max-min) + min;
}
forceinline float uRandFloat32(const float min, const float max)
{
return urandf32() * (max-min) + min;
}
forceinline float uRandFloat16(const float min, const float max)
{
return urandf16() * (max-min) + min;
}
forceinline float uRandFloat8(const float min, const float max)
{
return urandf8() * (max-min) + min;
}
forceinline unsigned int urnd(const unsigned int min, const unsigned int max)
{
return (rand()%(max+1-min))+min;
}
forceinline int srnd(const int min, const int max)
{
return (rand()%(max+1-min))+min;
}
static forceinline float frnd(const float min, const float max)
{
static float rrndmax = 1.f/(float)RAND_MAX;
return (((float)rand()) * rrndmax) * (max-min) + min;
}
uint64_t microtime()
{
struct timeval tv;
struct timezone tz;
memset(&tz, 0, sizeof(struct timezone));
gettimeofday(&tv, &tz);
return 1000000 * tv.tv_sec + tv.tv_usec;
}
void bench(const uint state, const char* name)
{
setlocale(LC_NUMERIC, "");
static uint64_t stm, st, stf, stmf;
if(state == 0)
{
stm = microtime();
st = __rdtsc();
}
else
{
stf = __rdtsc()-st;
stmf = microtime()-stm;
printf(":: %s :: %'lu μs, %'lu Cycles\n", name, stmf, stf);
}
}
#define MAXITER 100000
#define bench_loop for(uint i = 0; i < MAXITER; i++)
int main(int argc, char** argv)
{
// bench_loop{printf("%i\n", srnd(-333, 333));}
// bench_loop{printf("%f\n", mmxf32(0));}
// return 0;
//
errno = 0;
if(nice(-20) < 0)
{
while(errno != 0)
{
errno = 0;
if(nice(-20) < 0)
printf("Attempting to set process to nice of -20 (run with sudo)...\n");
sleep(1);
}
}
//
cpu_set_t mask;
CPU_ZERO(&mask);
CPU_SET(0, &mask);
if(sched_setaffinity(0, sizeof(mask), &mask) < 0)
printf("ERROR: sched_setaffinity() failed.\n\n");
//
float antioptim = 0.f;
//
bench(0, NULL);
bench_loop{antioptim += uRandFloat64(-333.333f, 333.333f);}
bench(1, "uRandFloat64");
bench(0, NULL);
bench_loop{antioptim += uRandFloat32(-333.333f, 333.333f);}
bench(1, "uRandFloat32");
bench(0, NULL);
bench_loop{antioptim += uRandFloat16(-333.333f, 333.333f);}
bench(1, "uRandFloat16");
bench(0, NULL);
bench_loop{antioptim += uRandFloat8(-333.333f, 333.333f);}
bench(1, "uRandFloat8 ");
//
bench(0, NULL);
bench_loop{antioptim += urandf64();}
bench(1, "urandf64 ");
bench(0, NULL);
bench_loop{antioptim += urandfc64();}
bench(1, "urandfc64");
//
bench(0, NULL);
bench_loop{antioptim += urandf32();}
bench(1, "urandf32 ");
bench(0, NULL);
bench_loop{antioptim += urandfc32();}
bench(1, "urandfc32");
//
bench(0, NULL);
bench_loop{antioptim += urandf16();}
bench(1, "urandf16 ");
bench(0, NULL);
bench_loop{antioptim += urandfc16();}
bench(1, "urandfc16");
//
bench(0, NULL);
bench_loop{antioptim += urandf8();}
bench(1, "urandf8 ");
bench(0, NULL);
bench_loop{antioptim += urandfc8();}
bench(1, "urandfc8 ");
//
bench(0, NULL);
bench_loop{antioptim += urand_i64();}
bench(1, "urand_i64 ");
bench(0, NULL);
bench_loop{antioptim += urand_int();}
bench(1, "urand_int ");
bench(0, NULL);
bench_loop{antioptim += urand_short();}
bench(1, "urand_short ");
bench(0, NULL);
bench_loop{antioptim += urand_char();}
bench(1, "urand_char ");
//
bench(0, NULL);
bench_loop{antioptim += urand_u64();}
bench(1, "urand_u64 ");
bench(0, NULL);
bench_loop{antioptim += urand_uint();}
bench(1, "urand_uint ");
bench(0, NULL);
bench_loop{antioptim += urand_ushort();}
bench(1, "urand_ushort");
bench(0, NULL);
bench_loop{antioptim += urand_uchar();}
bench(1, "urand_uchar ");
//
bench(0, NULL);
bench_loop{antioptim += rdrand64();}
bench(1, "rdrand64 ");
bench(0, NULL);
bench_loop{antioptim += rdrand64c();}
bench(1, "rdrand64c");
bench(0, NULL);
bench_loop{antioptim += rdrand32();}
bench(1, "rdrand32 ");
bench(0, NULL);
bench_loop{antioptim += rdrand32c();}
bench(1, "rdrand32c");
bench(0, NULL);
bench_loop{antioptim += rdrand16();}
bench(1, "rdrand16 ");
bench(0, NULL);
bench_loop{antioptim += rdrand16c();}
bench(1, "rdrand16c");
bench(0, NULL);
bench_loop{antioptim += rdseed64();}
bench(1, "rdseed64 ");
bench(0, NULL);
bench_loop{antioptim += rdseed64c();}
bench(1, "rdseed64c");
bench(0, NULL);
bench_loop{antioptim += rdseed32();}
bench(1, "rdseed32 ");
bench(0, NULL);
bench_loop{antioptim += rdseed32c();}
bench(1, "rdseed32c");
bench(0, NULL);
bench_loop{antioptim += rdseed16();}
bench(1, "rdseed16 ");
bench(0, NULL);
bench_loop{antioptim += rdseed16c();}
bench(1, "rdseed16c");
//
bench(0, NULL);
bench_loop{antioptim += rdrand_ushort();}
bench(1, "rdrand_ushort");
bench(0, NULL);
bench_loop{antioptim += rdrand_uint();}
bench(1, "rdrand_uint ");
bench(0, NULL);
bench_loop{antioptim += rdrand_u64();}
bench(1, "rdrand_u64 ");
bench(0, NULL);
bench_loop{antioptim += rdseed_ushort();}
bench(1, "rdseed_ushort");
bench(0, NULL);
bench_loop{antioptim += rdseed_uint();}
bench(1, "rdseed_uint ");
bench(0, NULL);
bench_loop{antioptim += rdseed_u64();}
bench(1, "rdseed_u64 ");
//
bench(0, NULL);
bench_loop{antioptim += urnd(111, 333);}
bench(1, "urnd ");
bench(0, NULL);
bench_loop{antioptim += srnd(-333, 333);}
bench(1, "srnd ");
bench(0, NULL);
bench_loop{antioptim += frnd(-333.333f, 333.333f);}
bench(1, "frnd ");
//
bench(0, NULL);
bench_loop{antioptim += mmxf32(0);}
bench(1, "mmxf32 ");
bench(0, NULL);
bench_loop{antioptim += mmxf32c(0);}
bench(1, "mmxf32c");
bench(0, NULL);
bench_loop{antioptim += mmxi64(0);}
bench(1, "mmxi64 ");
//
bench(0, NULL);
bench_loop{antioptim += sfrand();}
bench(1, "sfrand ");
bench(0, NULL);
bench_loop{antioptim += randf();}
bench(1, "randf ");
bench(0, NULL);
bench_loop{antioptim += randfc();}
bench(1, "randfc ");
//
printf("\n%c\n", (char)antioptim); // forces the compiler to not disregard the functions we are testing
return 0;
}
/*
:: uRandFloat64 :: 205,903 μs, 369,920,124 Cycles
:: uRandFloat32 :: 194,706 μs, 349,801,128 Cycles
:: uRandFloat16 :: 203,920 μs, 366,354,396 Cycles
:: uRandFloat8 :: 199,900 μs, 359,132,274 Cycles
:: urandf64 :: 204,401 μs, 367,220,448 Cycles
:: urandfc64 :: 208,282 μs, 374,192,082 Cycles
:: urandf32 :: 193,990 μs, 348,517,440 Cycles
:: urandfc32 :: 198,519 μs, 356,651,928 Cycles
:: urandf16 :: 205,598 μs, 369,370,296 Cycles
:: urandfc16 :: 201,953 μs, 362,823,372 Cycles
:: urandf8 :: 198,604 μs, 356,806,368 Cycles
:: urandfc8 :: 202,944 μs, 364,606,002 Cycles
:: urand_i64 :: 201,029 μs, 361,165,158 Cycles
:: urand_int :: 209,511 μs, 376,401,168 Cycles
:: urand_short :: 197,778 μs, 355,322,016 Cycles
:: urand_char :: 202,014 μs, 362,931,660 Cycles
:: urand_u64 :: 202,757 μs, 364,268,322 Cycles
:: urand_uint :: 202,535 μs, 363,868,992 Cycles
:: urand_ushort :: 197,978 μs, 355,681,818 Cycles
:: urand_uchar :: 197,666 μs, 355,120,524 Cycles
:: rdrand64 :: 89,324 μs, 160,473,618 Cycles
:: rdrand64c :: 85,533 μs, 153,664,272 Cycles
:: rdrand32 :: 42,711 μs, 76,728,708 Cycles
:: rdrand32c :: 42,801 μs, 76,893,210 Cycles
:: rdrand16 :: 42,755 μs, 76,809,222 Cycles
:: rdrand16c :: 42,767 μs, 76,831,776 Cycles
:: rdseed64 :: 85,539 μs, 153,674,046 Cycles
:: rdseed64c :: 85,289 μs, 153,224,586 Cycles
:: rdseed32 :: 42,542 μs, 76,425,552 Cycles
:: rdseed32c :: 42,718 μs, 76,741,740 Cycles
:: rdseed16 :: 42,549 μs, 76,438,062 Cycles
:: rdseed16c :: 42,479 μs, 76,311,648 Cycles
:: rdrand_ushort :: 42,571 μs, 76,477,914 Cycles
:: rdrand_uint :: 42,596 μs, 76,522,050 Cycles
:: rdrand_u64 :: 85,347 μs, 153,329,904 Cycles
:: rdseed_ushort :: 42,736 μs, 76,773,924 Cycles
:: rdseed_uint :: 42,640 μs, 76,602,870 Cycles
:: rdseed_u64 :: 85,133 μs, 152,944,830 Cycles
:: urnd :: 1,119 μs, 2,007,882 Cycles
:: srnd :: 1,107 μs, 1,986,912 Cycles
:: frnd :: 1,080 μs, 1,937,520 Cycles
:: mmxf32 :: 249 μs, 444,510 Cycles
:: mmxf32c :: 257 μs, 458,802 Cycles
:: mmxi64 :: 245 μs, 436,986 Cycles
:: randf :: 75 μs, 131,544 Cycles
:: randfc :: 75 μs, 131,544 Cycles
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment