Skip to content

Instantly share code, notes, and snippets.

@DmitryOlshansky
Created February 2, 2013 21:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DmitryOlshansky/4699388 to your computer and use it in GitHub Desktop.
Save DmitryOlshansky/4699388 to your computer and use it in GitHub Desktop.
(ab)using virtual memory for fun and profit
///Written in the D programming language
// Bench built-in array append, std.array.Appender
// and custom virtual-memory aware VirtualArray in terms of
// memory usage and the time taken
// to append up to X megabytes using different chunk sizes:
// 16 bytes, 64bytes, 256 bytes and 16Kb at a time.
// pass this version to take insert readln's
// at interesting points and investigate RAM usage
// make sure to enable relevant columns in task manager process details:
// committed size and private working set
// version = diagnose;
import std.c.windows.windows;
import std.exception, std.datetime, std.algorithm, std.range, std.array;
auto roundUpToPowerOf2(size_t var, size_t pow2)
{
return (var + pow2-1) & ~(pow2-1);
}
struct VirtualArray
{
private:
ubyte* pointer; // to the beginning of the reserved memory
size_t _length; // length of data
size_t commited; // committed memory so far
size_t reserved; // total possible maximum to grow to
size_t pageSize; // page size, this could be global
//size_t pageSize;
// commit some more memory from the reserved range
void extend(size_t size)
{
size = roundUpToPowerOf2(size, pageSize);
// this will throw once run out of reserved pages
enforce(VirtualAlloc(pointer+commited, size,
MEM_COMMIT, PAGE_READWRITE));
commited += size;
}
public:
this(size_t maxCapacity)
{
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
pageSize = sysinfo.dwPageSize;
// the page size is a power of 2
// round the capacity up to a multiple of page size
maxCapacity = roundUpToPowerOf2(maxCapacity, pageSize);
pointer = cast(ubyte*)enforce(VirtualAlloc(null, maxCapacity,
MEM_RESERVE, PAGE_READWRITE));
commited = 0;
reserved = maxCapacity;
_length = 0;
}
// bare minimum primitives to run benchmark
ref ubyte opIndex(size_t idx)
{
assert(idx < length);
return pointer[idx];
}
// ditto
ubyte[] opSlice(size_t from, size_t to)
{
assert(from < to && to <= length);
return pointer[from..to];
}
// ditto
@property size_t length(){ return _length; }
void opOpAssign(string op:"~")(ubyte[] data)
{
size_t end = length + data.length;
while(end > commited)
extend(end-commited);
pointer[length..end] = data[];
_length = end;
}
~this()
{
// should not throw, struct is not copyable
enforce(VirtualFree(pointer, 0, MEM_RELEASE));
}
}
import std.stdio;
void timeIt(string prefix, void delegate() run)
{
StopWatch sw;
sw.start();
run();
sw.stop();
writefln(prefix~ " %4s ms", sw.peek().msecs);
}
bool verify(Arr)(ubyte[] pattern, ref Arr arr)
{
for(size_t i=0; i<arr.length; i+= pattern.length)
{
if(arr[i..i+pattern.length] != pattern[])
{
writeln(arr[i .. i+pattern.length], " vs ", pattern);
return false;
}
}
return true;
}
void main()
{
size_t totalSize = 120*2^^20;
auto chunks = [
iota(0, 16).map!(x=>cast(ubyte)x).array,
iota(0, 64).map!(x=>cast(ubyte)x).array,
iota(0, 256).map!(x=>cast(ubyte)x).array,
iota(0, 2^^10).map!(x=>cast(ubyte)x).array
];
auto titles = [ " 16b", " 64b", "256b", " 16K" ];
import core.memory;
GC.disable(); // to prevent measuring a collection by mistake
foreach(n, chunk; chunks)
{
// reserve memory anew on each iteration
// I was able to easily go up to 1.5 G on 32bits
// note: there are no reallocations here at all
version(diagnose)
if(n == 0)
{
writeln("Before reserving address space");
readln();
}
auto virtArr = VirtualArray(totalSize);
version(diagnose)
if(n == 0)
{
writeln("Reserved address space");
readln();
}
timeIt(titles[n]~" virtual", (){
foreach(i; 0..totalSize/chunk.length)
{
virtArr ~= chunk;
}
});
version(diagnose)
if(n == 0)
{
writeln("Commited all of virtual memory");
readln(); //uncomment to take a pause to investigate RAM usage
}
// time to verify is the same for all with -O -inline
enforce(verify(chunk, virtArr));
}
writeln("============");
foreach(n, chunk; chunks)
{
ubyte[] arr;
// the GC is out of the game as soon as 512 Mbytes on 32bits
// as I hit OutOfMemory on a 2nd iteration
// try to help poor runtime
// without reserve it crashes and burns at about 120 M
// with reserve it's on par with chunk >= 256
// but it _commits_ all memory beforehand !
version(diagnose)
if(n == 0)
{
writeln("Before array.reserve()");
readln();
}
arr.reserve(totalSize);
version(diagnose)
if(n == 0)
{
writeln("After array.reserve()");
readln();
}
timeIt(titles[n]~" array ", (){
foreach(i; 0..totalSize/chunk.length)
{
arr ~= chunk;
}
});
version(diagnose)
if(n == 0)
{
writeln("After all data touched");
readln();
}
// time to verify is the same for all with -O -inline
enforce(verify(chunk, arr));
// sadly need to not run out of memory on 32 bits
delete arr;
}
writeln("============");
foreach(n, chunk; chunks)
{
{
// appender is supposed to be faster then array on appends
// but it's actually slower starting at 256 byte chunks
// and esp. so with 16Kb chunks
auto app = appender!(ubyte[])();
timeIt(titles[n]~" appender", (){
foreach(i; 0..totalSize/chunk.length)
{
app.put(chunk);
}
});
auto appArr = app.data();
enforce(verify(chunk, appArr));
}
GC.collect();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment