paniq/twistpool.c

## twistpool.c
/*

Twisting Pool Allocator
=======================
written by Leonard Ritter (leonard.ritter@duangle.com)

This file is in the public domain

I don't know if I was the first one to stumble upon this technique, so
I can't guarantee there's no patent on it, but let's hope there's not,
then this counts as prior art.

--------------------------------------------------------------------------------

This is a proof of concept implementation for a pool allocator that guarantees
compactness (unsorted gapless iteration without indirections) while preserving
element ids (using one order-optimized indirection), with insertion, deletion
and lookup in O(1) time.

Because all id <-> index assignments are symmetric swaps (either
identity-mapped or twisted), only a single table is required to resolve
index from id and id from index.

Insertion and deletion is a self-sorting process. Both operations attempt
to untwist identifiers that share the same status (obtained/released) and
so, regardless of deallocation order, only few entries are accessed and
fragmentation is generally low; in my random allocation test, the average number
of fragmented entries appeared to be ~log2(capacity)); It is possible to produce
a worst case by obtaining all id's, then releasing every other id. The upper
bound on fragmented entries appears to be capacity/3.

The data being managed must be allocated separately, and the user must
copy at least none, at most one element after a call to obtain/release.

With this implementation, the memory requirement is
sizeof(unsigned int) * capacity, typically 4 bytes per entry.

It might however possible to implement the map with a compact sorted array or
hashtable, storing only twisted entries, which should tremendously reduce memory
usage and therefore notably improve cache efficiency.

--------------------------------------------------------------------------------

*/

#include <stdbool.h>
#include <assert.h>

// use whatever size you prefer; this implementation operates directly on
// global memory; a productive implementation would probably use the heap.
#define CAPACITY 10000
#define N (CAPACITY+1)

/*  index: offset of element in data array; elements are moved
    to keep the array compact and contiguous

    id: numerical name assigned to a data element; never changes.
    therefore, elements should typically be referenced by their id,
    not their index.

    id 0 and index 0 are synonymous with no element
    */

/*  symmetrical map
    maps index -> id; compact, gapless
    last index == count
    happens to also map id -> index; has gaps
    */
unsigned int map[N];

/*  all ids are mapped, even the unused ones;
    the ideal mapping is id == index
    the first free index is count+1
    if count == capacity, the pool is full.
    */
unsigned int count;

// pool constructor
void construct () {
    unsigned int i;
    // initially no elements
    count = 0;
    for (i = 0; i < N; ++i) {
        // start out with identity mapping;
        // all unused ids are mapped
        map[i] = i;
    }
}

// lookup index from id or id from index
unsigned int resolve(unsigned int id_or_index) {
    return map[id_or_index];
}

bool is_index_valid (unsigned int index) {
    return (index > 0) && (index <= count);
}

bool is_id_valid (unsigned int id) {
    return (id > 0) && (id <= CAPACITY) && is_index_valid(resolve(id));
}

bool is_identity (unsigned int id_or_index) {
    return (map[id_or_index] == id_or_index);
}

// swap the ids of two indices (or the indices of two ids)
void swap (unsigned int a, unsigned int b) {
    // ids can only be twisted or untwisted, map must not be shuffled any further
    assert((a == b)
            || ((map[a] == a) && (map[b] == b))
            || ((map[a] == b) && (map[b] == a)));
    unsigned int t = map[a];
    map[a] = map[b];
    map[b] = t;
}

typedef struct _pair {
    unsigned int _0;
    unsigned int _1;
} pair;

/*  allocate a new id from the pool
    user must copy slot[index(id)] to slot[count] after
    allocation. obtain() returns a { src, dst } tuple indicating
    how data must be moved; if (dst == src), then no move is necessary.
    src is also identical to the newly obtained id
    if the pool is full, obtain returns { 0, 0 }
    */
pair obtain () {
    if (count < CAPACITY) {
        unsigned int index;
        unsigned int id;

        // increase number of elements
        ++count;
        // index of new last element
        index = count;
        // id of new last element
        id = map[index];
        // if id not identical to index (is twisted)
        if (id != index) {
            // swap with index that matches this id,
            // so that index(id) == id
            swap(index, id);
        }
        // return new id/index and index of moved item
        pair result = { id, index };
        return result;
    } else {
        // out of space
        pair result = { 0, 0 };
        return result;
    }
}

/*  release an obtained id to the pool
    user must copy slot[count] to slot[index(id)] after
    deletion. (release id) returns a (src dst) tuple indicating
    how data must be moved; if (src == dst), then no move is necessary.
    */
pair release (unsigned int id) {
    unsigned int index;
    unsigned int last_index;
    unsigned int last_id;

    assert(is_id_valid(id));

    // index of element to be deleted
    index = map[id];

    // if element is twisted, then untwist
    if (id > count)
        swap(index, id);
    // index and id of element to take its place
    last_index = count;
    last_id = map[last_index];

    // swap indices so that tailing element fills the gap (twist)
    // if last element is twisted, then untwist first
    if (last_id > count) {
        swap(last_index, last_id);
        swap(index, last_id);
    } else {
        swap(index, last_index);
    }

    // decrease number of elements
    --count;
    // return index of element to be moved and index of gap
    pair result = { last_index, index };
    return result;
}

// ----------------------------------------------------------------------------
// test

#include <stdlib.h>
#include <stdio.h>

// our "data array", which just stores the id again, so we can easily verify
// if an id still matches its assigned content
unsigned int data[N];

void dump() {
    unsigned int i;

    printf("index -> ID:\n");
    for (i = 1; i <= CAPACITY; ++i) {
        if (i == (count + 1))
            printf("-------\n");
        unsigned int id = resolve(i);
        unsigned int ri = resolve(id);
        printf("%u\t%u\t%s\n", i, id, ((i != ri)?"!":""));
    }
    printf("%i used elements\n\n", count);
}

void move(pair k) {
    if (k._0 != k._1) {
        data[k._1] = data[k._0];
    }
}

unsigned int verify_data () {
    unsigned int i;
    unsigned int twisted = 0;
    for (i = 1; i <= count; ++i) {
        unsigned int id = resolve(i);
        assert(data[i] == id);
        assert(resolve(id) == i);
        if (!is_identity(i))
            ++twisted;
    }
    return twisted;
}

unsigned int test_obtain () {
    pair k = obtain();
    move(k);
    data[k._0] = k._0;
    return k._0;
}

unsigned int test_release (unsigned int id) {
    assert(id != 0);
    pair k = release(id);
    move(k);
}

#include <memory.h>
#include <stdio.h>

// array of ids in use
unsigned int used[CAPACITY];

int main (int argc, void** argv) {
    int i;
    unsigned int mintwisted = N;
    unsigned int maxtwisted = 0;
    unsigned int total = 0;
    unsigned int steps = 0;
    unsigned int used_count = 0;

    memset(used, 0, sizeof(used));

    construct();

    srand(time());
#if 1
    // do random obtains/releases, see if something breaks
    for (i = 0; i < 100000; ++i) {
        if (((rand() % 100) < 50) && (used_count > 0)) {
            unsigned int used_index = rand() % used_count;
            unsigned int id = used[used_index];
            // remove from used array and fill
            used_count--;
            used[used_index] = used[used_count];
            used[used_count] = 0;
            test_release(id);
            unsigned int t = verify_data();
            mintwisted = (mintwisted < t)?mintwisted:t;
            maxtwisted = (maxtwisted > t)?maxtwisted:t;
            total += t;
            ++steps;
        } else {
            unsigned int k = test_obtain();
            if (k != 0) {
                assert(used_count < CAPACITY);
                used[used_count] = k;
                ++used_count;
            }
            verify_data();
        }
    }
#else
    // attempt to fabricate a worst case
    for (i = 0; i < CAPACITY; ++i) {
        test_obtain();
    }
    for (i = 1; i <= CAPACITY; i += 2) {
        test_release(i);
        unsigned int t = verify_data();
        mintwisted = (mintwisted < t)?mintwisted:t;
        maxtwisted = (maxtwisted > t)?maxtwisted:t;
        total += t;
        ++steps;
    }
#endif

    dump();
    printf("releases: %u\nmin twisted: %u\nmax twisted: %u\ntotal twisted: %u\naverage: %f\n",
        steps, mintwisted, maxtwisted, total, (double)total / (double)steps);
    printf("OK.\n");

    return 0;
}
	/*

	Twisting Pool Allocator
	=======================
	written by Leonard Ritter (leonard.ritter@duangle.com)

	This file is in the public domain

	I don't know if I was the first one to stumble upon this technique, so
	I can't guarantee there's no patent on it, but let's hope there's not,
	then this counts as prior art.

	--------------------------------------------------------------------------------

	This is a proof of concept implementation for a pool allocator that guarantees
	compactness (unsorted gapless iteration without indirections) while preserving
	element ids (using one order-optimized indirection), with insertion, deletion
	and lookup in O(1) time.

	Because all id <-> index assignments are symmetric swaps (either
	identity-mapped or twisted), only a single table is required to resolve
	index from id and id from index.

	Insertion and deletion is a self-sorting process. Both operations attempt
	to untwist identifiers that share the same status (obtained/released) and
	so, regardless of deallocation order, only few entries are accessed and
	fragmentation is generally low; in my random allocation test, the average number
	of fragmented entries appeared to be ~log2(capacity)); It is possible to produce
	a worst case by obtaining all id's, then releasing every other id. The upper
	bound on fragmented entries appears to be capacity/3.

	The data being managed must be allocated separately, and the user must
	copy at least none, at most one element after a call to obtain/release.

	With this implementation, the memory requirement is
	sizeof(unsigned int) * capacity, typically 4 bytes per entry.

	It might however possible to implement the map with a compact sorted array or
	hashtable, storing only twisted entries, which should tremendously reduce memory
	usage and therefore notably improve cache efficiency.

	--------------------------------------------------------------------------------

	*/

	#include <stdbool.h>
	#include <assert.h>

	// use whatever size you prefer; this implementation operates directly on
	// global memory; a productive implementation would probably use the heap.
	#define CAPACITY 10000
	#define N (CAPACITY+1)

	/* index: offset of element in data array; elements are moved
	to keep the array compact and contiguous

	id: numerical name assigned to a data element; never changes.
	therefore, elements should typically be referenced by their id,
	not their index.

	id 0 and index 0 are synonymous with no element
	*/

	/* symmetrical map
	maps index -> id; compact, gapless
	last index == count
	happens to also map id -> index; has gaps
	*/
	unsigned int map[N];

	/* all ids are mapped, even the unused ones;
	the ideal mapping is id == index
	the first free index is count+1
	if count == capacity, the pool is full.
	*/
	unsigned int count;

	// pool constructor
	void construct () {
	unsigned int i;
	// initially no elements
	count = 0;
	for (i = 0; i < N; ++i) {
	// start out with identity mapping;
	// all unused ids are mapped
	map[i] = i;
	}
	}

	// lookup index from id or id from index
	unsigned int resolve(unsigned int id_or_index) {
	return map[id_or_index];
	}

	bool is_index_valid (unsigned int index) {
	return (index > 0) && (index <= count);
	}

	bool is_id_valid (unsigned int id) {
	return (id > 0) && (id <= CAPACITY) && is_index_valid(resolve(id));
	}

	bool is_identity (unsigned int id_or_index) {
	return (map[id_or_index] == id_or_index);
	}

	// swap the ids of two indices (or the indices of two ids)
	void swap (unsigned int a, unsigned int b) {
	// ids can only be twisted or untwisted, map must not be shuffled any further
	assert((a == b)
	\|\| ((map[a] == a) && (map[b] == b))
	\|\| ((map[a] == b) && (map[b] == a)));
	unsigned int t = map[a];
	map[a] = map[b];
	map[b] = t;
	}

	typedef struct _pair {
	unsigned int _0;
	unsigned int _1;
	} pair;

	/* allocate a new id from the pool
	user must copy slot[index(id)] to slot[count] after
	allocation. obtain() returns a { src, dst } tuple indicating
	how data must be moved; if (dst == src), then no move is necessary.
	src is also identical to the newly obtained id
	if the pool is full, obtain returns { 0, 0 }
	*/
	pair obtain () {
	if (count < CAPACITY) {
	unsigned int index;
	unsigned int id;

	// increase number of elements
	++count;
	// index of new last element
	index = count;
	// id of new last element
	id = map[index];
	// if id not identical to index (is twisted)
	if (id != index) {
	// swap with index that matches this id,
	// so that index(id) == id
	swap(index, id);
	}
	// return new id/index and index of moved item
	pair result = { id, index };
	return result;
	} else {
	// out of space
	pair result = { 0, 0 };
	return result;
	}
	}

	/* release an obtained id to the pool
	user must copy slot[count] to slot[index(id)] after
	deletion. (release id) returns a (src dst) tuple indicating
	how data must be moved; if (src == dst), then no move is necessary.
	*/
	pair release (unsigned int id) {
	unsigned int index;
	unsigned int last_index;
	unsigned int last_id;

	assert(is_id_valid(id));

	// index of element to be deleted
	index = map[id];

	// if element is twisted, then untwist
	if (id > count)
	swap(index, id);
	// index and id of element to take its place
	last_index = count;
	last_id = map[last_index];

	// swap indices so that tailing element fills the gap (twist)
	// if last element is twisted, then untwist first
	if (last_id > count) {
	swap(last_index, last_id);
	swap(index, last_id);
	} else {
	swap(index, last_index);
	}

	// decrease number of elements
	--count;
	// return index of element to be moved and index of gap
	pair result = { last_index, index };
	return result;
	}

	// ----------------------------------------------------------------------------
	// test

	#include <stdlib.h>
	#include <stdio.h>

	// our "data array", which just stores the id again, so we can easily verify
	// if an id still matches its assigned content
	unsigned int data[N];

	void dump() {
	unsigned int i;

	printf("index -> ID:\n");
	for (i = 1; i <= CAPACITY; ++i) {
	if (i == (count + 1))
	printf("-------\n");
	unsigned int id = resolve(i);
	unsigned int ri = resolve(id);
	printf("%u\t%u\t%s\n", i, id, ((i != ri)?"!":""));
	}
	printf("%i used elements\n\n", count);
	}

	void move(pair k) {
	if (k._0 != k._1) {
	data[k._1] = data[k._0];
	}
	}

	unsigned int verify_data () {
	unsigned int i;
	unsigned int twisted = 0;
	for (i = 1; i <= count; ++i) {
	unsigned int id = resolve(i);
	assert(data[i] == id);
	assert(resolve(id) == i);
	if (!is_identity(i))
	++twisted;
	}
	return twisted;
	}

	unsigned int test_obtain () {
	pair k = obtain();
	move(k);
	data[k._0] = k._0;
	return k._0;
	}

	unsigned int test_release (unsigned int id) {
	assert(id != 0);
	pair k = release(id);
	move(k);
	}

	#include <memory.h>
	#include <stdio.h>

	// array of ids in use
	unsigned int used[CAPACITY];

	int main (int argc, void** argv) {
	int i;
	unsigned int mintwisted = N;
	unsigned int maxtwisted = 0;
	unsigned int total = 0;
	unsigned int steps = 0;
	unsigned int used_count = 0;

	memset(used, 0, sizeof(used));

	construct();

	srand(time());
	#if 1
	// do random obtains/releases, see if something breaks
	for (i = 0; i < 100000; ++i) {
	if (((rand() % 100) < 50) && (used_count > 0)) {
	unsigned int used_index = rand() % used_count;
	unsigned int id = used[used_index];
	// remove from used array and fill
	used_count--;
	used[used_index] = used[used_count];
	used[used_count] = 0;
	test_release(id);
	unsigned int t = verify_data();
	mintwisted = (mintwisted < t)?mintwisted:t;
	maxtwisted = (maxtwisted > t)?maxtwisted:t;
	total += t;
	++steps;
	} else {
	unsigned int k = test_obtain();
	if (k != 0) {
	assert(used_count < CAPACITY);
	used[used_count] = k;
	++used_count;
	}
	verify_data();
	}
	}
	#else
	// attempt to fabricate a worst case
	for (i = 0; i < CAPACITY; ++i) {
	test_obtain();
	}
	for (i = 1; i <= CAPACITY; i += 2) {
	test_release(i);
	unsigned int t = verify_data();
	mintwisted = (mintwisted < t)?mintwisted:t;
	maxtwisted = (maxtwisted > t)?maxtwisted:t;
	total += t;
	++steps;
	}
	#endif

	dump();
	printf("releases: %u\nmin twisted: %u\nmax twisted: %u\ntotal twisted: %u\naverage: %f\n",
	steps, mintwisted, maxtwisted, total, (double)total / (double)steps);
	printf("OK.\n");

	return 0;
	}