Last active
March 25, 2018 15:29
-
-
Save zeux/cd41829e610fd3ce33e4ca9a16a16293 to your computer and use it in GitHub Desktop.
This is a response to https://lemire.me/blog/2018/03/24/when-shuffling-large-arrays-how-much-time-can-be-attributed-to-random-number-generation/; the results of that benchmark looked off to me so I reimplemented this in C++ and re-measured it.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// On a Core i7-8650U running at 2.1 GHz, I get the following numbers: | |
// /mnt/c/work $ g++ -O2 rngshuf.cpp && ./a.out | |
// shuf 2.195379 s, gen 0.338417 s, shuf-gen 2.731224 s, fwd shuf 2.227119 s | |
// shuf 2.167099 s, gen 0.122208 s, shuf-gen 2.870520 s, fwd shuf 2.314143 s | |
// shuf 2.269147 s, gen 0.122214 s, shuf-gen 2.845593 s, fwd shuf 2.367242 s | |
// shuf 2.270375 s, gen 0.122476 s, shuf-gen 2.867398 s, fwd shuf 2.314249 s | |
// shuf 2.258742 s, gen 0.120076 s, shuf-gen 2.863602 s, fwd shuf 2.328118 s | |
// shuf 2.265792 s, gen 0.126285 s, shuf-gen 2.923946 s, fwd shuf 2.356490 s | |
// shuf 2.264414 s, gen 0.122542 s, shuf-gen 2.880007 s, fwd shuf 2.365302 s | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <time.h> | |
#if defined(__linux__) | |
double timestamp() | |
{ | |
timespec ts; | |
clock_gettime(CLOCK_MONOTONIC, &ts); | |
return double(ts.tv_sec) + 1e-9 * double(ts.tv_nsec); | |
} | |
#elif defined(_WIN32) | |
struct LARGE_INTEGER { __int64 QuadPart; }; | |
extern "C" __declspec(dllimport) int __stdcall QueryPerformanceCounter(LARGE_INTEGER* lpPerformanceCount); | |
extern "C" __declspec(dllimport) int __stdcall QueryPerformanceFrequency(LARGE_INTEGER* lpFrequency); | |
double timestamp() | |
{ | |
LARGE_INTEGER freq, counter; | |
QueryPerformanceFrequency(&freq); | |
QueryPerformanceCounter(&counter); | |
return double(counter.QuadPart) / double(freq.QuadPart); | |
} | |
#else | |
double timestamp() | |
{ | |
return double(clock()) / double(CLOCKS_PER_SEC); | |
} | |
#endif | |
#define PCG32_INITIALIZER { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } | |
typedef struct { uint64_t state; uint64_t inc; } pcg32_random_t; | |
uint32_t pcg32_random_r(pcg32_random_t* rng) | |
{ | |
uint64_t oldstate = rng->state; | |
// Advance internal state | |
rng->state = oldstate * 6364136223846793005ULL + (rng->inc|1); | |
// Calculate output function (XSH RR), uses old state for max ILP | |
uint32_t xorshifted = ((oldstate >> 18u) ^ oldstate) >> 27u; | |
uint32_t rot = oldstate >> 59u; | |
return (xorshifted >> rot) | (xorshifted << ((-rot) & 31)); | |
} | |
uint32_t random_bounded(pcg32_random_t* rng, uint32_t range) | |
{ | |
uint64_t random32bit = pcg32_random_r(rng); | |
uint64_t result = random32bit * range; | |
return result >> 32; | |
} | |
void swap(int* a, int* b) | |
{ | |
int va = *a; | |
int vb = *b; | |
*a = vb; | |
*b = va; | |
} | |
int main() | |
{ | |
int N = 100*1000*1000; | |
int* arr = new int[N]; | |
int* ind = new int[N]; | |
for (;;) | |
{ | |
for (int i = 0; i < N; ++i) | |
arr[i] = i; | |
double ts0 = timestamp(); | |
{ | |
pcg32_random_t rng = PCG32_INITIALIZER; | |
for (int i=N; i>1; i--) | |
{ | |
int p = random_bounded(&rng, i); // number in [0,i) | |
swap(arr+i-1, arr+p); // swap the values at i-1 and p | |
} | |
} | |
double te0 = timestamp(); | |
double ts1 = timestamp(); | |
{ | |
pcg32_random_t rng = PCG32_INITIALIZER; | |
for (int i=N; i>1; i--) | |
{ | |
ind[i-1] = random_bounded(&rng, i); // number in [0,i) | |
} | |
} | |
double te1 = timestamp(); | |
for (int i = 0; i < N; ++i) | |
arr[i] = i; | |
double ts2 = timestamp(); | |
{ | |
for (int i=N; i>1; i--) | |
{ | |
int p = ind[i-1]; | |
swap(arr+i-1, arr+p); // swap the values at i-1 and p | |
} | |
} | |
double te2 = timestamp(); | |
for (int i = 0; i < N; ++i) | |
arr[i] = i; | |
double ts3 = timestamp(); | |
{ | |
pcg32_random_t rng = PCG32_INITIALIZER; | |
for (int i=0; i < N-1; ++i) | |
{ | |
int p = i + random_bounded(&rng, N-i); | |
swap(arr+i, arr+p); // swap the values at i and p | |
} | |
} | |
double te3 = timestamp(); | |
printf("shuf %f s, gen %f s, shuf-gen %f s, fwd shuf %f s\n", te0 - ts0, te1 - ts1, te2 - ts2, te3 - ts3); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is not a re-implementation of the algorithms implemented in Java.