Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save laino/8a6e33e7cb08926182c0820b0caf849c to your computer and use it in GitHub Desktop.
Save laino/8a6e33e7cb08926182c0820b0caf849c to your computer and use it in GitHub Desktop.
From 159cf9f45d986ab7b8a1dd9c96d28755358e7528 Mon Sep 17 00:00:00 2001
From: Nils Kuhnhenn <kuhnhenn.nils@gmail.com>
Date: Fri, 19 Aug 2016 11:01:30 +0200
Subject: [PATCH] ntdll: drastically improve heap allocation performance.
- Automatically balancing free lists.
- Added a fast case to skip to the next free list when the
current list doesn't contain any large-enough block.
Signed-off-by: Nils Kuhnhenn <kuhnhenn.nils@gmail.com>
---
dlls/ntdll/heap.c | 349 +++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 305 insertions(+), 44 deletions(-)
diff --git a/dlls/ntdll/heap.c b/dlls/ntdll/heap.c
index f928ebf..00036b3 100644
--- a/dlls/ntdll/heap.c
+++ b/dlls/ntdll/heap.c
@@ -113,17 +113,37 @@ C_ASSERT( sizeof(ARENA_LARGE) % LARGE_ALIGNMENT == 0 );
#define HEAP_TAIL_EXTRA_SIZE(flags) \
((flags & HEAP_TAIL_CHECKING_ENABLED) || RUNNING_ON_VALGRIND ? ALIGNMENT : 0)
-/* Max size of the blocks on the free lists */
-static const SIZE_T HEAP_freeListSizes[] =
+static const DWORD HEAP_defaultFreeListSizes[] =
{
- 0x10, 0x20, 0x30, 0x40, 0x60, 0x80, 0x100, 0x200, 0x400, 0x1000, ~0UL
+ 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x80, 0xA0, 0xC0,
+ 0xE0, 0x100, 0x140, 0x180, 0x1C0, 0x200, 0x400, 0x1000,
+ 0x2000, 0x3000, 0x5000, 0xA000, 0xF000, 0xA0000, 0xA00000,
+ 0xA000000, ~0U
};
-#define HEAP_NB_FREE_LISTS (sizeof(HEAP_freeListSizes)/sizeof(HEAP_freeListSizes[0]))
+#define HEAP_NB_FREE_LISTS (sizeof(HEAP_defaultFreeListSizes)/sizeof(HEAP_defaultFreeListSizes[0]))
+#define HEAP_FREELIST_BALANCE_COUNT_THRESH 1024 * 3
+#define HEAP_FREELIST_BALANCE_RATIO_THRESH 4
-typedef union
+#define HEAP_FREELIST_FLAG_DONT_RESIZE 0x3
+#define HEAP_FREELIST_FLAG_DONT_RESIZE_UP 0x1
+#define HEAP_FREELIST_FLAG_DONT_RESIZE_DOWN 0x2
+
+#define HEAP_FREELIST_RATIO(a, b) ((float) max(a, b) / (float) min (a, b))
+
+#define HEAP_FREELIST_SIZE_STEP_TOWARDS(list, _size) \
+ (((SIZE_T)list->size * (SIZE_T)list->granularity + (SIZE_T)_size) \
+ / ((SIZE_T)list->granularity + 1UL)) & ~(ALIGNMENT - 1);
+
+#define HEAP_FREELIST_MAX_GRANULARITY (0xFFFFU)
+
+typedef struct tagFREELISTENTRY
{
ARENA_FREE arena;
- void *alignment[4];
+ DWORD count;
+ DWORD size;
+ DWORD flags;
+ DWORD largest_arena;
+ DWORD granularity;
} FREE_LIST_ENTRY;
struct tagHEAP;
@@ -157,7 +177,7 @@ typedef struct tagHEAP
DWORD pending_pos; /* Position in pending free requests ring */
ARENA_INUSE **pending_free; /* Ring buffer for pending free requests */
RTL_CRITICAL_SECTION critSection; /* Critical section for serialization */
- FREE_LIST_ENTRY *freeList; /* Free lists */
+ FREE_LIST_ENTRY freeList[HEAP_NB_FREE_LISTS]; /* Free lists */
} HEAP;
#define HEAP_MAGIC ((DWORD)('H' | ('E'<<8) | ('A'<<16) | ('P'<<24)))
@@ -299,12 +319,12 @@ static void subheap_notify_free_all(SUBHEAP const *subheap)
/* locate a free list entry of the appropriate size */
/* size is the size of the whole block including the arena header */
-static inline unsigned int get_freelist_index( SIZE_T size )
+static inline unsigned int get_freelist_index( HEAP *heap, SIZE_T size )
{
unsigned int i;
- size -= sizeof(ARENA_FREE);
- for (i = 0; i < HEAP_NB_FREE_LISTS - 1; i++) if (size <= HEAP_freeListSizes[i]) break;
+ for (i = 0; i < HEAP_NB_FREE_LISTS - 1; i++) if (size <= heap->freeList[i].size) break;
+
return i;
}
@@ -337,8 +357,8 @@ static void HEAP_Dump( HEAP *heap )
DPRINTF( "\nFree lists:\n Block Stat Size Id\n" );
for (i = 0; i < HEAP_NB_FREE_LISTS; i++)
- DPRINTF( "%p free %08lx prev=%p next=%p\n",
- &heap->freeList[i].arena, HEAP_freeListSizes[i],
+ DPRINTF( "%p free %u index=%u count=%u prev=%p next=%p\n",
+ &heap->freeList[i].arena, heap->freeList[i].size, i, heap->freeList[i].count,
LIST_ENTRY( heap->freeList[i].arena.entry.prev, ARENA_FREE, entry ),
LIST_ENTRY( heap->freeList[i].arena.entry.next, ARENA_FREE, entry ));
@@ -459,28 +479,231 @@ static HEAP *HEAP_GetPtr(
return heapPtr;
}
-
-/***********************************************************************
- * HEAP_InsertFreeBlock
+/*************************************************************************
+ * freelist_balance
*
- * Insert a free block into the free list.
+ * Balance the list at freelistIndex with the list at freelistIndex+1
+ *
+ * RETURNS
+ * Number of moved entries.
*/
-static inline void HEAP_InsertFreeBlock( HEAP *heap, ARENA_FREE *pArena, BOOL last )
+static DWORD freelist_balance ( HEAP *heap, unsigned int freelistIndex )
{
- FREE_LIST_ENTRY *pEntry = heap->freeList + get_freelist_index( pArena->size + sizeof(*pArena) );
- if (last)
+
+ ARENA_FREE *pArena;
+ struct list *insertAfter, *ptr,
+ *danglingListTail = NULL,
+ *danglingListHead = NULL;
+
+ int count;
+ BOOL grow;
+ DWORD arenaSize, newSize, prevListSize;
+
+ FREE_LIST_ENTRY *listA = &heap->freeList[freelistIndex];
+ FREE_LIST_ENTRY *listB = &heap->freeList[freelistIndex + 1];
+
+ if (HEAP_FREELIST_RATIO(listA->count, listB->count) <= HEAP_FREELIST_BALANCE_RATIO_THRESH)
+ return 0;
+
+ prevListSize = 0;
+
+ if (freelistIndex != 0)
+ prevListSize = heap->freeList[freelistIndex - 1].size;
+
+ if (listA->count > listB->count)
{
- /* insert at end of free list, i.e. before the next free list entry */
- pEntry++;
- if (pEntry == &heap->freeList[HEAP_NB_FREE_LISTS]) pEntry = heap->freeList;
- list_add_before( &pEntry->arena.entry, &pArena->entry );
+ if (listA->flags & HEAP_FREELIST_FLAG_DONT_RESIZE_DOWN)
+ return 0;
+
+ grow = FALSE;
+ newSize = HEAP_FREELIST_SIZE_STEP_TOWARDS(listA, prevListSize);
+
+ insertAfter = &listB->arena.entry;
+ ptr = &listA->arena.entry;
}
else
{
- /* insert at head of free list */
- list_add_after( &pEntry->arena.entry, &pArena->entry );
+ if (listA->flags & HEAP_FREELIST_FLAG_DONT_RESIZE_UP)
+ return 0;
+
+ grow = TRUE;
+ newSize = HEAP_FREELIST_SIZE_STEP_TOWARDS(listA, listB->size);
+
+ insertAfter = listB->arena.entry.prev;
+ ptr = &listB->arena.entry;
+ }
+
+ /* reached maximum granularity */
+ if (newSize == listA->size)
+ {
+ if (grow) newSize += ALIGNMENT;
+ else newSize -= ALIGNMENT;
+
+ listA->granularity = HEAP_FREELIST_MAX_GRANULARITY;
+ }
+
+ /* lists can't (and should not) be balanced any further */
+ if (newSize <= (prevListSize & ARENA_SIZE_MASK) ||
+ newSize >= (listB->size & ARENA_SIZE_MASK))
+ {
+
+ if (grow) listA->flags |= HEAP_FREELIST_FLAG_DONT_RESIZE_UP;
+ else listA->flags |= HEAP_FREELIST_FLAG_DONT_RESIZE_DOWN;
+
+ return 0;
+ }
+
+ count = 0;
+
+ while (1)
+ {
+ ptr = ptr->next;
+
+ pArena = LIST_ENTRY( ptr, ARENA_FREE, entry );
+
+ arenaSize = pArena->size & ARENA_SIZE_MASK;
+
+ if (arenaSize == 0) break;
+
+ if ( (grow && arenaSize <= newSize) || (!grow && arenaSize > newSize))
+ {
+ list_remove(ptr);
+
+ if (danglingListTail)
+ {
+ danglingListTail->next = ptr;
+ ptr->prev = danglingListTail;
+ } else {
+ danglingListHead = ptr;
+ }
+
+ danglingListTail = ptr;
+
+ count++;
+ }
+
}
+
+ TRACE("BEFORE (%u / %u): %u / %u (thresh %u)\n",
+ freelistIndex, freelistIndex + 1, listA->count, listB->count, listA->size);
+
+ if (count)
+ {
+ /* If we made things worse, undo changes and increase granularity. */
+ if (min(listA->count, listB->count) + count >=
+ max(listA->count, listB->count))
+ {
+
+ /* insert back at list head */
+ danglingListTail->next = danglingListHead->prev->next;
+ danglingListTail->next->prev = danglingListTail;
+
+ danglingListHead->prev->next = danglingListHead;
+
+ if (listA->granularity >= HEAP_FREELIST_MAX_GRANULARITY)
+ {
+ if (grow) listA->flags |= HEAP_FREELIST_FLAG_DONT_RESIZE_UP;
+ else listA->flags |= HEAP_FREELIST_FLAG_DONT_RESIZE_DOWN;
+ }
+ else listA->granularity = listA->granularity * 2 + 1;
+
+ TRACE("UNDID CHANGES\n");
+
+ return 0;
+ }
+
+ /* Otherwise insert dangling list */
+ danglingListHead->prev = insertAfter;
+ danglingListTail->next = insertAfter->next;
+
+ insertAfter->next->prev = danglingListTail;
+ insertAfter->next = danglingListHead;
+
+ listA->count += grow ? count : -count;
+ listB->count -= grow ? count : -count;
+
+ listA->largest_arena = 0;
+ listB->largest_arena = 0;
+ }
+
+ listA->size = newSize;
+ listA->granularity = 1;
+
+ TRACE("AFTER (%u / %u): %u / %u (thresh %u)\n",
+ freelistIndex, freelistIndex + 1, listA->count, listB->count, listA->size);
+
+ /* clear flags on neighbouring lists */
+ listB->flags &= ~HEAP_FREELIST_FLAG_DONT_RESIZE;
+
+ if (freelistIndex != 0)
+ heap->freeList[freelistIndex - 1].flags &= ~HEAP_FREELIST_FLAG_DONT_RESIZE;
+
+ return count;
+}
+
+
+/***********************************************************************
+ * HEAP_InsertFreeBlock
+ *
+ * Insert a free block into the free list.
+ */
+static void HEAP_InsertFreeBlock( HEAP *heap, ARENA_FREE *pArena )
+{
+ FREE_LIST_ENTRY *pEntry;
+
+ DWORD size = pArena->size & ARENA_SIZE_MASK;
+
+ unsigned int freelistIndex = get_freelist_index( heap, size );
+
+ pEntry = &heap->freeList[freelistIndex];
+
pArena->size |= ARENA_FLAG_FREE;
+
+ /* insert at head of free list */
+ list_add_after( &pEntry->arena.entry, &pArena->entry );
+
+ pEntry->count++;
+
+ /* update largest_arena if we are currently keeping track of it or if this
+ * is the first element */
+ if (pEntry->count == 1 || (pEntry->largest_arena && size > pEntry->largest_arena))
+ pEntry->largest_arena = size;
+
+ /* Check whether we need to balance this free list with its neighbours */
+ if (pEntry->count < HEAP_FREELIST_BALANCE_COUNT_THRESH ||
+ freelistIndex + 1 == HEAP_NB_FREE_LISTS) return;
+
+ if (freelistIndex + 2 < HEAP_NB_FREE_LISTS &&
+ freelist_balance(heap, freelistIndex)) return;
+
+ if (freelistIndex > 0) freelist_balance(heap, freelistIndex - 1);
+}
+
+/***********************************************************************
+ * HEAP_RemoveFreeBlock
+ *
+ * Remove a free block from the free list.
+ */
+static inline void HEAP_RemoveFreeBlock( HEAP *heap, ARENA_FREE *pArena )
+{
+ DWORD size = pArena->size & ARENA_SIZE_MASK;
+
+ FREE_LIST_ENTRY *pEntry = &heap->freeList[get_freelist_index( heap, size )];
+
+ assert(pEntry->count);
+
+ pEntry->count--;
+
+ list_remove(&pArena->entry);
+
+ pArena->size &= ~ARENA_FLAG_FREE;
+
+ /* If the largest arena is removed, reset tracker to 0.
+ * Note that this will still happen if there are multiple arenas
+ * of the same size and the tracker will only be updated on
+ * the next miss in HEAP_FindFreeBlock */
+ if (pEntry->largest_arena == size)
+ pEntry->largest_arena = 0;
}
@@ -592,7 +815,7 @@ static void HEAP_CreateFreeBlock( SUBHEAP *subheap, void *ptr, SIZE_T size )
{
/* Remove the next arena from the free list */
ARENA_FREE *pNext = (ARENA_FREE *)((char *)ptr + size);
- list_remove( &pNext->entry );
+ HEAP_RemoveFreeBlock( subheap->heap, pNext );
size += (pNext->size & ARENA_SIZE_MASK) + sizeof(*pNext);
mark_block_free( pNext, sizeof(ARENA_FREE), flags );
}
@@ -611,7 +834,7 @@ static void HEAP_CreateFreeBlock( SUBHEAP *subheap, void *ptr, SIZE_T size )
/* Last, insert the new block into the free list */
pFree->size = size - sizeof(*pFree);
- HEAP_InsertFreeBlock( subheap->heap, pFree, last );
+ HEAP_InsertFreeBlock( subheap->heap, pFree );
}
@@ -647,7 +870,7 @@ static void HEAP_MakeInUseBlockFree( SUBHEAP *subheap, ARENA_INUSE *pArena )
pFree = *((ARENA_FREE **)pArena - 1);
size += (pFree->size & ARENA_SIZE_MASK) + sizeof(ARENA_FREE);
/* Remove it from the free list */
- list_remove( &pFree->entry );
+ HEAP_RemoveFreeBlock( heap, pFree );
}
else pFree = (ARENA_FREE *)pArena;
@@ -667,7 +890,7 @@ static void HEAP_MakeInUseBlockFree( SUBHEAP *subheap, ARENA_INUSE *pArena )
size = 0;
/* Remove the free block from the list */
- list_remove( &pFree->entry );
+ HEAP_RemoveFreeBlock( heap, pFree );
/* Remove the subheap from the list */
list_remove( &subheap->entry );
/* Free the memory */
@@ -930,14 +1153,17 @@ static SUBHEAP *HEAP_CreateSubHeap( HEAP *heap, LPVOID address, DWORD flags,
/* Build the free lists */
- heap->freeList = (FREE_LIST_ENTRY *)((char *)heap + subheap->headerSize);
- subheap->headerSize += HEAP_NB_FREE_LISTS * sizeof(FREE_LIST_ENTRY);
list_init( &heap->freeList[0].arena.entry );
- for (i = 0, pEntry = heap->freeList; i < HEAP_NB_FREE_LISTS; i++, pEntry++)
+ for (i = 0; i < HEAP_NB_FREE_LISTS; i++)
{
+ pEntry = &heap->freeList[i];
pEntry->arena.size = 0 | ARENA_FLAG_FREE;
pEntry->arena.magic = ARENA_FREE_MAGIC;
- if (i) list_add_after( &pEntry[-1].arena.entry, &pEntry->arena.entry );
+ pEntry->size = HEAP_defaultFreeListSizes[i] & ARENA_SIZE_MASK;
+ pEntry->count = 0;
+ pEntry->flags = 0;
+ pEntry->granularity = 1;
+ if (i) list_add_after( &heap->freeList[i-1].arena.entry, &pEntry->arena.entry );
}
/* Initialize critical section */
@@ -993,7 +1219,27 @@ static ARENA_FREE *HEAP_FindFreeBlock( HEAP *heap, SIZE_T size,
SUBHEAP *subheap;
struct list *ptr;
SIZE_T total_size;
- FREE_LIST_ENTRY *pEntry = heap->freeList + get_freelist_index( size + sizeof(ARENA_INUSE) );
+
+ DWORD requested_size = size - sizeof(ARENA_FREE) + sizeof(ARENA_INUSE);
+ DWORD largest_arena = 0;
+
+ int freelistIndex = get_freelist_index( heap, requested_size );
+
+ FREE_LIST_ENTRY *pEntry = &heap->freeList[freelistIndex];
+
+ /* If we already know there's no large-enough block in this list,
+ * skip to the next one, where (if there's any block) the first
+ * block is guaranteed to be large enough. Making this O(1).
+ */
+ if (pEntry->largest_arena && pEntry->largest_arena < requested_size)
+ {
+ freelistIndex++;
+
+ if (freelistIndex == HEAP_NB_FREE_LISTS)
+ goto grow;
+
+ pEntry = &heap->freeList[freelistIndex];
+ }
/* Find a suitable free list, and in it find a block large enough */
@@ -1001,17 +1247,34 @@ static ARENA_FREE *HEAP_FindFreeBlock( HEAP *heap, SIZE_T size,
while ((ptr = list_next( &heap->freeList[0].arena.entry, ptr )))
{
ARENA_FREE *pArena = LIST_ENTRY( ptr, ARENA_FREE, entry );
- SIZE_T arena_size = (pArena->size & ARENA_SIZE_MASK) +
- sizeof(ARENA_FREE) - sizeof(ARENA_INUSE);
- if (arena_size >= size)
+ DWORD arena_size = pArena->size & ARENA_SIZE_MASK;
+
+ if (arena_size >= requested_size)
{
subheap = HEAP_FindSubHeap( heap, pArena );
if (!HEAP_Commit( subheap, (ARENA_INUSE *)pArena, size )) return NULL;
*ppSubHeap = subheap;
return pArena;
}
+
+ if (pEntry != NULL)
+ {
+ if (arena_size > largest_arena)
+ {
+ largest_arena = arena_size;
+ }
+ else if (arena_size == 0)
+ {
+ /* We reached the next free list and didn't manage
+ * to find any block large enough. Update largest_arena
+ * to keep this from happening again on subsequent runs. */
+ pEntry->largest_arena = largest_arena;
+ pEntry = NULL;
+ }
+ }
}
+grow:
/* If no block was found, attempt to grow the heap */
if (!(heap->flags & HEAP_GROWABLE))
@@ -1696,8 +1959,7 @@ PVOID WINAPI RtlAllocateHeap( HANDLE heap, ULONG flags, SIZE_T size )
}
/* Remove the arena from the free list */
-
- list_remove( &pArena->entry );
+ HEAP_RemoveFreeBlock( heap, pArena );
/* Build the in-use arena */
@@ -1705,7 +1967,7 @@ PVOID WINAPI RtlAllocateHeap( HANDLE heap, ULONG flags, SIZE_T size )
/* in-use arena is smaller than free arena,
* so we have to add the difference to the size */
- pInUse->size = (pInUse->size & ~ARENA_FLAG_FREE) + sizeof(ARENA_FREE) - sizeof(ARENA_INUSE);
+ pInUse->size = pInUse->size + sizeof(ARENA_FREE) - sizeof(ARENA_INUSE);
pInUse->magic = ARENA_INUSE_MAGIC;
/* Shrink the block */
@@ -1854,7 +2116,7 @@ PVOID WINAPI RtlReAllocateHeap( HANDLE heap, ULONG flags, PVOID ptr, SIZE_T size
{
/* The next block is free and large enough */
ARENA_FREE *pFree = (ARENA_FREE *)pNext;
- list_remove( &pFree->entry );
+ HEAP_RemoveFreeBlock( heap, pFree );
pArena->size += (pFree->size & ARENA_SIZE_MASK) + sizeof(*pFree);
if (!HEAP_Commit( subheap, pArena, rounded_size )) goto oom;
notify_realloc( pArena + 1, oldActualSize, size );
@@ -1872,10 +2134,9 @@ PVOID WINAPI RtlReAllocateHeap( HANDLE heap, ULONG flags, PVOID ptr, SIZE_T size
/* Build the in-use arena */
- list_remove( &pNew->entry );
+ HEAP_RemoveFreeBlock( heap, pNew );
pInUse = (ARENA_INUSE *)pNew;
- pInUse->size = (pInUse->size & ~ARENA_FLAG_FREE)
- + sizeof(ARENA_FREE) - sizeof(ARENA_INUSE);
+ pInUse->size = pInUse->size + sizeof(ARENA_FREE) - sizeof(ARENA_INUSE);
pInUse->magic = ARENA_INUSE_MAGIC;
HEAP_ShrinkBlock( newsubheap, pInUse, rounded_size );
--
2.9.3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment