Skip to content

Instantly share code, notes, and snippets.

@wilzbach
Last active January 13, 2017 08:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wilzbach/74ffebc236d3a9c10bcae730fc1571fa to your computer and use it in GitHub Desktop.
Save wilzbach/74ffebc236d3a9c10bcae730fc1571fa to your computer and use it in GitHub Desktop.
std.random.uniform vs. bitwise
> ldc -O5 -release -boundscheck=off test.d && ./test 1 2 8 256 1024 65536
benchmark: 1 (bits: 1)
random.uniform = 742 ms, 824 μs, and 5 hnsecs
random.bitwise = 218 ms, 972 μs, and 9 hnsecs
random.uniform.fixed = 592 ms, 647 μs, and 2 hnsecs
benchmark: 2 (bits: 2)
random.uniform = 746 ms, 839 μs, and 5 hnsecs
random.bitwise = 497 ms and 189 μs
random.uniform.fixed = 594 ms, 196 μs, and 1 hnsec
benchmark: 7 (bits: 3)
random.uniform = 747 ms, 499 μs, and 6 hnsecs
random.bitwise = 712 ms, 791 μs, and 7 hnsecs
random.uniform.fixed = 595 ms, 432 μs, and 4 hnsecs
benchmark: 255 (bits: 8)
random.uniform = 746 ms, 915 μs, and 1 hnsec
random.bitwise = 1 sec, 787 ms, 508 μs, and 2 hnsecs
random.uniform.fixed = 592 ms, 198 μs, and 7 hnsecs
benchmark: 1023 (bits: 10)
random.uniform = 743 ms and 597 μs
random.bitwise = 2 secs, 208 ms, 421 μs, and 5 hnsecs
random.uniform.fixed = 593 ms, 368 μs, and 6 hnsecs
benchmark: 65535 (bits: 16)
random.uniform = 743 ms, 132 μs, and 7 hnsecs
random.bitwise = 3 secs, 483 ms, 936 μs, and 3 hnsecs
random.uniform.fixed = 592 ms, 316 μs, and 4 hnsecs
> dmd -O -release -boundscheck=off test.d && ./test 1 2 8 256 1024 65536
benchmark: 1 (bits: 1)
random.uniform = 1 sec, 246 ms, 722 μs, and 9 hnsecs
random.bitwise = 630 ms, 657 μs, and 7 hnsecs
random.uniform.fixed = 1 sec, 236 ms, 366 μs, and 6 hnsecs
benchmark: 2 (bits: 2)
random.uniform = 1 sec, 239 ms, 266 μs, and 8 hnsecs
random.bitwise = 1 sec, 241 ms, 494 μs, and 5 hnsecs
random.uniform.fixed = 1 sec, 238 ms, 637 μs, and 3 hnsecs
benchmark: 7 (bits: 3)
random.uniform = 1 sec, 281 ms, 351 μs, and 6 hnsecs
random.bitwise = 1 sec, 808 ms, 20 μs, and 9 hnsecs
random.uniform.fixed = 1 sec, 236 ms, 833 μs, and 2 hnsecs
benchmark: 254 (bits: 8)
random.uniform = 1 sec, 242 ms, 289 μs, and 6 hnsecs
random.bitwise = 4 secs, 745 ms, 717 μs, and 2 hnsecs
random.uniform.fixed = 1 sec, 247 ms, 597 μs, and 6 hnsecs
benchmark: 1023 (bits: 10)
random.uniform = 1 sec, 245 ms, 6 μs, and 6 hnsecs
random.bitwise = 6 secs, 296 ms, 748 μs, and 9 hnsecs
random.uniform.fixed = 1 sec, 237 ms, 685 μs, and 5 hnsecs
benchmark: 65535 (bits: 16)
random.uniform = 1 sec, 245 ms, 286 μs, and 7 hnsecs
random.bitwise = 9 secs, 638 ms, 437 μs, and 1 hnsec
random.uniform.fixed = 1 sec, 239 ms, 99 μs, and 5 hnsecs
import std.algorithm, std.conv, std.datetime, std.functional, std.math, std.range, std.random, std.stdio, std.traits, std.typecons;
private void doNotOptimizeAway(T)(auto ref T t)
{
import core.thread : getpid;
import std.stdio : writeln;
if(getpid() == 1) {
writeln(*cast(char*)&t);
}
}
void main(string[] args)
{
assert(args.length > 1);
args[1..$].each!(x => x.to!int.test);
}
void test(int k)
{
auto n = 50_000;
k = k > 2 ? k - 1 : k;
int kbits = cast(int) log2(k) + 1;
writefln("benchmark: %d (bits: %d)", k, kbits);
auto bench = benchmark!(
{ doNotOptimizeAway({
long bs;
auto rng = Mt19937(42);
foreach (_; 0..n)
bs += uniform(0, k, rng);
return bs;
}());},
{ doNotOptimizeAway({
import core.bitop;
long bs;
auto rng = Mt19937(42);
auto r = rng.bitwise;
foreach (_; 0..n)
{
long t;
// opSlice doesn't work with take
foreach (i; 0..kbits)
{
t &= r.front << i;
r.popFront;
}
bs += t;
}
return bs;
}());},
{ doNotOptimizeAway({
long bs;
auto rng = Mt19937(42);
foreach (_; 0..n)
bs += uniform(0, 255, rng);
return bs;
}());},
)(1000);
string[] names = ["random.uniform", "random.bitwise", "random.uniform.fixed"];
foreach(j,r;bench)
writefln("%-15s = %s", names[j], r.to!Duration);
}
// from PR 5002
private struct Bitwise(R)
if (isInputRange!R && isIntegral!(ElementType!R))
{
private:
alias ElemType = ElementType!R;
alias UnsignedElemType = Unsigned!ElemType;
R parent;
enum bitsNum = ElemType.sizeof * 8;
size_t maskPos = 1;
static if (isBidirectionalRange!R)
{
size_t backMaskPos = bitsNum;
}
public:
this()(auto ref R range)
{
parent = range;
}
static if (isInfinite!R)
{
enum empty = false;
}
else
{
/**
* Check if the range is empty
*
* Returns: a boolean true or false
*/
bool empty()
{
static if (hasLength!R)
{
return length == 0;
}
else static if (isBidirectionalRange!R)
{
if (parent.empty)
{
return true;
}
else
{
/*
If we have consumed the last element of the range both from
the front and the back, then the masks positions will overlap
*/
return parent.save.dropOne.empty && (maskPos > backMaskPos);
}
}
else
{
/*
If we consumed the last element of the range, but not all the
bits in the last element
*/
return parent.empty;
}
}
}
bool front()
{
assert(!empty);
return (parent.front & mask(maskPos)) != 0;
}
void popFront()
{
assert(!empty);
++maskPos;
if (maskPos > bitsNum)
{
parent.popFront;
maskPos = 1;
}
}
static if (hasLength!R)
{
size_t length()
{
auto len = parent.length * bitsNum - (maskPos - 1);
static if (isBidirectionalRange!R)
{
len -= bitsNum - backMaskPos;
}
return len;
}
alias opDollar = length;
}
static if (isForwardRange!R)
{
typeof(this) save()
{
auto result = this;
result.parent = parent.save;
return result;
}
}
static if (isBidirectionalRange!R)
{
bool back()
{
assert(!empty);
return (parent.back & mask(backMaskPos)) != 0;
}
void popBack()
{
assert(!empty);
--backMaskPos;
if (backMaskPos == 0)
{
parent.popBack;
backMaskPos = bitsNum;
}
}
}
static if (isRandomAccessRange!R)
{
/**
Return the `n`th bit within the range
*/
bool opIndex(size_t n)
in
{
/*
If it does not have the length property, it means that R is
an infinite range
*/
static if (hasLength!R)
{
assert(n < length, "Index out of bounds");
}
}
body
{
immutable size_t remainingBits = bitsNum - maskPos + 1;
// If n >= maskPos, then the bit sign will be 1, otherwise 0
immutable sizediff_t sign = (remainingBits - n - 1) >> (sizediff_t.sizeof * 8 - 1);
/*
By truncating n with remainingBits bits we have skipped the
remaining bits in parent[0], so we need to add 1 to elemIndex.
Because bitsNum is a power of 2, n / bitsNum == n >> bitsNum.bsf
*/
import core.bitop : bsf;
immutable size_t elemIndex = sign * (((n - remainingBits) >> bitsNum.bsf) + 1);
/*
Since the indexing is from LSB to MSB, we need to index at the
remainder of (n - remainingBits).
Because bitsNum is a power of 2, n % bitsNum == n & (bitsNum - 1)
*/
immutable size_t elemMaskPos = (sign ^ 1) * (maskPos + n)
+ sign * (1 + ((n - remainingBits) & (bitsNum - 1)));
return (parent[elemIndex] & mask(elemMaskPos)) != 0;
}
/**
Assigns `flag` to the `n`th bit within the range
*/
void opIndexAssign(bool flag, size_t n)
in
{
static if (hasLength!R)
{
assert(n < length, "Index out of bounds");
}
}
body
{
import core.bitop : bsf;
immutable size_t remainingBits = bitsNum - maskPos + 1;
immutable sizediff_t sign = (remainingBits - n - 1) >> (sizediff_t.sizeof * 8 - 1);
immutable size_t elemIndex = sign * (((n - remainingBits) >> bitsNum.bsf) + 1);
immutable size_t elemMaskPos = (sign ^ 1) * (maskPos + n)
+ sign * (1 + ((n - remainingBits) & (bitsNum - 1)));
auto elem = parent[elemIndex];
auto elemMask = mask(elemMaskPos);
parent[elemIndex] = cast(UnsignedElemType)(flag * (elem | elemMask)
+ (flag ^ 1) * (elem & ~elemMask));
}
Bitwise!R opSlice()
{
return this.save;
}
Bitwise!R opSlice(size_t start, size_t end)
in
{
assert(start < end, "Invalid bounds: end <= start");
}
body
{
import core.bitop : bsf;
size_t remainingBits = bitsNum - maskPos + 1;
sizediff_t sign = (remainingBits - start - 1) >> (sizediff_t.sizeof * 8 - 1);
immutable size_t startElemIndex = sign * (((start - remainingBits) >> bitsNum.bsf) + 1);
immutable size_t startElemMaskPos = (sign ^ 1) * (maskPos + start)
+ sign * (1 + ((start - remainingBits) & (bitsNum - 1)));
immutable size_t sliceLen = end - start - 1;
remainingBits = bitsNum - startElemMaskPos + 1;
sign = (remainingBits - sliceLen - 1) >> (sizediff_t.sizeof * 8 - 1);
immutable size_t endElemIndex = startElemIndex
+ sign * (((sliceLen - remainingBits) >> bitsNum.bsf) + 1);
immutable size_t endElemMaskPos = (sign ^ 1) * (startElemMaskPos + sliceLen)
+ sign * (1 + ((sliceLen - remainingBits) & (bitsNum - 1)));
typeof(return) result;
// Get the slice to be returned from the parent
result.parent = (parent[startElemIndex .. endElemIndex + 1]).save;
result.maskPos = startElemMaskPos;
static if (isBidirectionalRange!R)
{
result.backMaskPos = endElemMaskPos;
}
return result;
}
}
private:
auto mask(size_t maskPos)
{
return (1UL << (maskPos - 1UL));
}
}
/**
Bitwise adapter over an integral type range. Consumes the range elements bit by
bit, from the least significant bit to the most significant bit.
Params:
R = an integral input range to iterate over
Returns:
A `Bitwise` input range with propagated forward, bidirectional
and random access capabilities
*/
auto bitwise(R)(auto ref R range)
if (isInputRange!R && isIntegral!(ElementType!R))
{
return Bitwise!R(range);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment