Skip to content

Instantly share code, notes, and snippets.

@dchansen
Last active December 18, 2021 06:12
Show Gist options
  • Save dchansen/4975e3558cb6bfb21145 to your computer and use it in GitHub Desktop.
Save dchansen/4975e3558cb6bfb21145 to your computer and use it in GitHub Desktop.
Ranshi64 speed test
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <assert.h>
#include <stdbool.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <math.h>
#include <limits.h>
//static uint64_t
#define NB 512
#define NBIT 64
#define IS 17
#define ISL (NBIT-IS)
const int64_t nb = NB;
const int64_t mask = NB-1;
const uint64_t is = IS;
const int64_t isl = ISL;
const int64_t isr = -IS;
static int64_t buffer[NB];
static int64_t iangle;
static int64_t ic;
static int lower=0;
static char offset=1;
static uint64_t __inline ranshi64() {
uint64_t ir = buffer[iangle];
uint64_t iro = (ir << is) | ( ir >> (NBIT-is));
buffer[iangle] = iro ^ ic;
iangle = (mask & (ir+offset));
offset++;
ic = ir;
return iro;
}
static uint64_t y;
unsigned long ranshi() {
if ( lower ^= 1 ) return (uint32_t)y;
y = ranshi64();
const uint32_t result = y >> 32;
return result;
}
int32_t lcg_ran(double* state){
const double modu = 1771875.0;
const double modi = 1.0/modu;
const double aa = 2416.0;
const double ac = 37444.0;
const double scale = pow(2.0,32);
double xint = *state*aa+ac;
int64_t ntval = xint*modi;
*state = xint-ntval*modu;
double rnanf = *state*modi;
return scale*(rnanf-0.5);
}
void init_ran(int64_t seed){
double rtval = 4*seed+1;
for (int i = 0; i < nb; i++){
buffer[i]=lcg_ran(&rtval);
buffer[i] |= ((int64_t)lcg_ran(&rtval)) << 32;
}
iangle =lcg_ran(&rtval);
iangle &= mask;
ic=lcg_ran(&rtval);
ic |= ((int64_t)lcg_ran(&rtval)) << 32;
//Warmup
//
for (int i = 0; i < 20*NB; i++)
ranshi();
}
uint64_t getusertime() {
struct rusage rusage;
getrusage( 0, &rusage );
return rusage.ru_utime.tv_sec * 1000000ULL + ( rusage.ru_utime.tv_usec / 1000 ) * 1000;
}
int main( int argc, char* argv[] ) {
const long long int n = strtoll( argv[1], NULL, 0 );
uint64_t t = 0;
init_ran(22479835);
const int64_t start = getusertime();
for( long long int i = n; i-- != 0; ) t ^= ranshi();
const int64_t elapsed = getusertime() - start;
const double secs = elapsed / 1E6;
printf( "%f s, %.02f queries/s, %.02f ns/query\n", secs, n / secs, 1E9 * secs / n );
if ( t == 0 ) putchar( 0 );
return 0;
}
@nbassler
Copy link

<3 thanks. @grzanka also suggested to look into xoshiro256++ I will check :)

@dchansen
Copy link
Author

Ohh, and be sure you test the speed in the actual application. Ranshi (and the Mersenne Twister) have a fairly large state size. This means they can cause cache invalidation, which makes them slower in real world usage than in microbenchmarks.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment