graphitemaster/constant_time_allocator.c Secret

## constant_time_allocator.c
// The following is a completely constant-time & branchless allocator for where
// dynamic memory allocation is needed in security-sensitive code that may be
// susceptible to timing-analysis based attacks.
//
// The way this allocator works is similar to a slab allocator in that caches of
// objects are pre-assigned based on a size-class. In this case powers of two
// sizes, beginning from SMALLEST are assigned into an array which behaves as
// a bounded-queue. Individual indices are maintained into both a free and used
// queue where pointers are exchanged on allocation and free and indices are
// updated accordingly.
//
// Indirect function calls are used via indexing a table based on the result of
// conditional tests for OOM or null pointer passed to free. These are the only
// two cases where timing-analysis will show variance in calls to alloc or free,
// but such information leakage does not seem particularly dangerous in that
// such operations would only occur under resource-exhaustion (which is a
// different issue), or during a no-op respectively.
//
// This is not a particularly good algorithm for allocations as once all objects
// of a given size-class are allocated, no other remaining, unused memory may be
// used to service an allocation of that same size, like a slab. However, since
// allocations are grouped into sizes in this way, there is no fragmentation
// and objects of different type (thus different size) are isolated from one
// another, preventing classes of bugs involving buffer overflow from being used
// to exploit adjacent objects of different type.
//
// To use this allocator you must first construct a Heap object and initialize
// it with the number of objects max you'd like.
//
//   Heap heap;
//   heap_init(&heap, 1000); // 1000 objects of each size class
//
// You may then allocate and free objects of any size from the heap.
//
//   void* ptr = heap_alloc(&heap, size);
//   heap_free(&heap, ptr);
//
// Don't forget to finalize the heap when done.
//
//   heap_fini(&heap);
//
//
// Copyright (c) 2021 Dale Weiler
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include <stdlib.h>

// The number of size classes, beginning from ilog2(SMALLEST) as the 0th size
// class. The largest allocation is 1 << (CLASSES + ilog2(SMALLEST)), as that
// is the last class array size. Allocations larger than this will fail.
#define CLASSES 16

// Smallest allocation is actually 16 bytes, but the header necessary to store
// size information will add 4 to 8 byte overhead, yet we want to maintain 16
// byte alignment, so move to the next power of two.
#define SMALLEST 32

// Helper function to round |_size| to the next power of two.
static size_t pot(size_t _size) {
  _size--;
  _size |= _size >> 1;
  _size |= _size >> 2;
  _size |= _size >> 4;
  _size |= _size >> 8;
  _size |= _size >> 16;
  if constexpr (sizeof _size == 8) {
    _size |= _size >> 32;
  }
  _size++;
  return _size;
}

// Helper function to get the integer log 2 of |_value|, this is the same as
// using bitscan forward instructions, except done using a de Bruijn sequence.
static unsigned char ilog2(size_t _value) {
  static const unsigned char LUT[] = {
    0x00, 0x01, 0x1C, 0x02, 0x0D, 0x0E, 0x18, 0x03,
    0x1E, 0x16, 0x14, 0x0F, 0x19, 0x11, 0x04, 0x08,
    0x1F, 0x1B, 0x0D, 0x17, 0x15, 0x13, 0x10, 0x07,
    0x1A, 0x0C, 0x12, 0x06, 0x0B, 0x05, 0x0A, 0x09
  };
  return LUT[((size_t)(_value * 0x077CB531U) >> 0x1B) & 0x1F];
}

// The heap is composed of CLASSES count of arrays of objects with power of two
// sizes starting with the smallest object of size SMALLEST.
struct Heap {
    char** free_classes[CLASSES]; // free_classes[class_index][free_indices[class_index]] for next free object
    char** used_classes[CLASSES]; // used_classes[class_index][used_indices[class_index]] for last used object
    size_t free_indices[CLASSES]; // index into next free object in free queue
    size_t used_indices[CLASSES]; // index into last used object in used queue
    size_t objects;               // number of objects of each class allocated
};

bool heap_init(Heap* heap_, size_t _objects) {
  // Calculate size needed for the initial heap.
  size_t size = 0;
  size_t bytes = SMALLEST;
  for (size_t i = 0; i < CLASSES; i++, bytes <<= 1) {
    size += sizeof(char*) * _objects; // free_classes
    size += sizeof(char*) * _objects; // used_classes
    size += bytes * _objects;         // objects
  }

  char *data = (char*)malloc(size);
  if (!data) return false;

  // Wire up the pointers for the heap now.
  bytes = SMALLEST;
  for (size_t i = 0; i < CLASSES; i++, bytes <<= 1) {
    heap_->free_classes[i] = (char**)data;
    data += sizeof(char*) * _objects;
    heap_->used_classes[i] = (char**)data;
    data += sizeof(char*) * _objects;
    for (size_t j = 0; j < _objects; j++) {
      heap_->free_classes[i][j] = data;
      heap_->used_classes[i][j] = 0;
      data += bytes;
    }
  }
  for (size_t i = 0; i < CLASSES; i++) {
    heap_->free_indices[i] = _objects;
    heap_->used_indices[i] = 0;
  }
  heap_->objects = _objects;
  return true;
}

void heap_fini(Heap* _heap) {
  free(_heap->free_classes[0]);
}

// O(n) time and complexity, branchless - safe from timing attacks.
static void* _heap_alloc_valid(Heap* _heap, size_t _index) {
  const size_t bytes = 1 << (_index + ilog2(SMALLEST));
  size_t *const free_indices = &_heap->free_indices[_index];
  size_t *const used_indices = &_heap->used_indices[_index];
  char *const data = _heap->free_classes[_index][*free_indices - 1];
  _heap->used_classes[_index][*used_indices] = data;
  _heap->free_classes[_index][*free_indices] = 0;
  (*free_indices)--;
  (*used_indices)++;
  *(size_t*)data = bytes;
  return data + sizeof(size_t);
}
static void* _heap_alloc_oom(Heap*, size_t) {
  // Empty function.
  return 0;
}
void* heap_alloc(Heap* _heap, size_t _size) {
  static void* (*const table[2])(Heap*, size_t) = { &_heap_alloc_oom, &_heap_alloc_valid };
  const size_t bytes = pot(_size + sizeof _size);
  const size_t index = ilog2(bytes) - ilog2(SMALLEST);
  return table[_heap->used_indices[index] < _heap->objects](_heap, index);
}

// O(n) time and complexity, branchless - safe from timing attacks.
static void _heap_free_valid(Heap* _heap, void* _data) {
  char *const base = (char*)_data - sizeof(size_t);
  const size_t size = *(size_t*)base;
  const size_t index = ilog2(size) - ilog2(SMALLEST);
  _heap->free_classes[index][++_heap->free_indices[index]] = base;
  _heap->used_classes[index][--_heap->used_indices[index]] = 0;
}
static void _heap_free_null(Heap*, void*) {
  // Empty function
}
void heap_free(Heap* _heap, void* _data) {
  static void (*const table[2])(Heap*, void*) = { &_heap_free_null, &_heap_free_valid };
  table[!!_data](_heap, _data);
}
	// The following is a completely constant-time & branchless allocator for where
	// dynamic memory allocation is needed in security-sensitive code that may be
	// susceptible to timing-analysis based attacks.
	//
	// The way this allocator works is similar to a slab allocator in that caches of
	// objects are pre-assigned based on a size-class. In this case powers of two
	// sizes, beginning from SMALLEST are assigned into an array which behaves as
	// a bounded-queue. Individual indices are maintained into both a free and used
	// queue where pointers are exchanged on allocation and free and indices are
	// updated accordingly.
	//
	// Indirect function calls are used via indexing a table based on the result of
	// conditional tests for OOM or null pointer passed to free. These are the only
	// two cases where timing-analysis will show variance in calls to alloc or free,
	// but such information leakage does not seem particularly dangerous in that
	// such operations would only occur under resource-exhaustion (which is a
	// different issue), or during a no-op respectively.
	//
	// This is not a particularly good algorithm for allocations as once all objects
	// of a given size-class are allocated, no other remaining, unused memory may be
	// used to service an allocation of that same size, like a slab. However, since
	// allocations are grouped into sizes in this way, there is no fragmentation
	// and objects of different type (thus different size) are isolated from one
	// another, preventing classes of bugs involving buffer overflow from being used
	// to exploit adjacent objects of different type.
	//
	// To use this allocator you must first construct a Heap object and initialize
	// it with the number of objects max you'd like.
	//
	// Heap heap;
	// heap_init(&heap, 1000); // 1000 objects of each size class
	//
	// You may then allocate and free objects of any size from the heap.
	//
	// void* ptr = heap_alloc(&heap, size);
	// heap_free(&heap, ptr);
	//
	// Don't forget to finalize the heap when done.
	//
	// heap_fini(&heap);
	//
	//
	// Copyright (c) 2021 Dale Weiler
	//
	// Permission is hereby granted, free of charge, to any person obtaining a copy
	// of this software and associated documentation files (the "Software"), to deal
	// in the Software without restriction, including without limitation the rights
	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	// copies of the Software, and to permit persons to whom the Software is
	// furnished to do so, subject to the following conditions:
	//
	// The above copyright notice and this permission notice shall be included in all
	// copies or substantial portions of the Software.
	//
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	// SOFTWARE.
	#include <stdlib.h>

	// The number of size classes, beginning from ilog2(SMALLEST) as the 0th size
	// class. The largest allocation is 1 << (CLASSES + ilog2(SMALLEST)), as that
	// is the last class array size. Allocations larger than this will fail.
	#define CLASSES 16

	// Smallest allocation is actually 16 bytes, but the header necessary to store
	// size information will add 4 to 8 byte overhead, yet we want to maintain 16
	// byte alignment, so move to the next power of two.
	#define SMALLEST 32

	// Helper function to round \|_size\| to the next power of two.
	static size_t pot(size_t _size) {
	_size--;
	_size \|= _size >> 1;
	_size \|= _size >> 2;
	_size \|= _size >> 4;
	_size \|= _size >> 8;
	_size \|= _size >> 16;
	if constexpr (sizeof _size == 8) {
	_size \|= _size >> 32;
	}
	_size++;
	return _size;
	}

	// Helper function to get the integer log 2 of \|_value\|, this is the same as
	// using bitscan forward instructions, except done using a de Bruijn sequence.
	static unsigned char ilog2(size_t _value) {
	static const unsigned char LUT[] = {
	0x00, 0x01, 0x1C, 0x02, 0x0D, 0x0E, 0x18, 0x03,
	0x1E, 0x16, 0x14, 0x0F, 0x19, 0x11, 0x04, 0x08,
	0x1F, 0x1B, 0x0D, 0x17, 0x15, 0x13, 0x10, 0x07,
	0x1A, 0x0C, 0x12, 0x06, 0x0B, 0x05, 0x0A, 0x09
	};
	return LUT[((size_t)(_value * 0x077CB531U) >> 0x1B) & 0x1F];
	}

	// The heap is composed of CLASSES count of arrays of objects with power of two
	// sizes starting with the smallest object of size SMALLEST.
	struct Heap {
	char** free_classes[CLASSES]; // free_classes[class_index][free_indices[class_index]] for next free object
	char** used_classes[CLASSES]; // used_classes[class_index][used_indices[class_index]] for last used object
	size_t free_indices[CLASSES]; // index into next free object in free queue
	size_t used_indices[CLASSES]; // index into last used object in used queue
	size_t objects; // number of objects of each class allocated
	};

	bool heap_init(Heap* heap_, size_t _objects) {
	// Calculate size needed for the initial heap.
	size_t size = 0;
	size_t bytes = SMALLEST;
	for (size_t i = 0; i < CLASSES; i++, bytes <<= 1) {
	size += sizeof(char) _objects; // free_classes
	size += sizeof(char) _objects; // used_classes
	size += bytes * _objects; // objects
	}

	char data = (char)malloc(size);
	if (!data) return false;

	// Wire up the pointers for the heap now.
	bytes = SMALLEST;
	for (size_t i = 0; i < CLASSES; i++, bytes <<= 1) {
	heap_->free_classes[i] = (char**)data;
	data += sizeof(char) _objects;
	heap_->used_classes[i] = (char**)data;
	data += sizeof(char) _objects;
	for (size_t j = 0; j < _objects; j++) {
	heap_->free_classes[i][j] = data;
	heap_->used_classes[i][j] = 0;
	data += bytes;
	}
	}
	for (size_t i = 0; i < CLASSES; i++) {
	heap_->free_indices[i] = _objects;
	heap_->used_indices[i] = 0;
	}
	heap_->objects = _objects;
	return true;
	}

	void heap_fini(Heap* _heap) {
	free(_heap->free_classes[0]);
	}

	// O(n) time and complexity, branchless - safe from timing attacks.
	static void* _heap_alloc_valid(Heap* _heap, size_t _index) {
	const size_t bytes = 1 << (_index + ilog2(SMALLEST));
	size_t *const free_indices = &_heap->free_indices[_index];
	size_t *const used_indices = &_heap->used_indices[_index];
	char const data = _heap->free_classes[_index][free_indices - 1];
	_heap->used_classes[_index][*used_indices] = data;
	_heap->free_classes[_index][*free_indices] = 0;
	(*free_indices)--;
	(*used_indices)++;
	(size_t)data = bytes;
	return data + sizeof(size_t);
	}
	static void* _heap_alloc_oom(Heap*, size_t) {
	// Empty function.
	return 0;
	}
	void* heap_alloc(Heap* _heap, size_t _size) {
	static void* (const table[2])(Heap, size_t) = { &_heap_alloc_oom, &_heap_alloc_valid };
	const size_t bytes = pot(_size + sizeof _size);
	const size_t index = ilog2(bytes) - ilog2(SMALLEST);
	return table[_heap->used_indices[index] < _heap->objects](_heap, index);
	}

	// O(n) time and complexity, branchless - safe from timing attacks.
	static void _heap_free_valid(Heap* _heap, void* _data) {
	char const base = (char)_data - sizeof(size_t);
	const size_t size = (size_t)base;
	const size_t index = ilog2(size) - ilog2(SMALLEST);
	_heap->free_classes[index][++_heap->free_indices[index]] = base;
	_heap->used_classes[index][--_heap->used_indices[index]] = 0;
	}
	static void _heap_free_null(Heap, void) {
	// Empty function
	}
	void heap_free(Heap* _heap, void* _data) {
	static void (const table[2])(Heap, void*) = { &_heap_free_null, &_heap_free_valid };
	table[!!_data](_heap, _data);
	}