Last active
April 30, 2023 22:01
-
-
Save Sam-Belliveau/18d4d9f1bf42c4ac630eba38e355005e to your computer and use it in GitHub Desktop.
Hypothetical data structure called Dense Heap. Involves packing data close to each other in a vector and using links to efficiently backfill it. This only works as all data types are guaranteed to be the same size.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2023 Sam Belliveau. All rights reserved. | |
// | |
// Permission is hereby granted, free of charge, to any person obtaining a copy | |
// of this software and associated documentation files (the "Software"), to deal | |
// in the Software without restriction, including without limitation the rights | |
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
// copies of the Software, and to permit persons to whom the Software is | |
// furnished to do so, subject to the following conditions: | |
// | |
// The above copyright notice and this permission notice shall be included in all | |
// copies or substantial portions of the Software. | |
// | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
// SOFTWARE. | |
use std::{ | |
cell::{Cell, UnsafeCell}, | |
hint::unreachable_unchecked, | |
marker::PhantomData, | |
mem::{replace, ManuallyDrop}, | |
ops::{Deref, DerefMut, Drop}, | |
}; | |
enum DHeapNode<T: Sized> { | |
/// Edge is always the last element of the vector. When the | |
/// head points to the edge, new memory must be allocated. | |
Edge(), | |
/// Empty represents a previously occupied slot that has | |
/// been freed. It points to the previous head when | |
/// it was freed, creating a chain of free blocks for | |
/// future allocations. | |
Empty(usize), | |
/// Holding represents a memory slot in use by a DBox<_>. | |
/// The memory is owned by the DBox<_> pointing to it, | |
/// which is why it is wrapped in a ManuallyDrop<_>. The DBox<_> | |
/// is guaranteed to drop before the DHeap<_>. | |
Holding(ManuallyDrop<T>), | |
/// When calling DBox.into_inner(), memory is moved out of the | |
/// DHeap<_> before the DBox<_> has dropped. This serves as an indicator | |
/// for the DBox<_> not to panic when it finds its memory moved during | |
/// the dropping process. | |
Moved(), | |
} | |
use DHeapNode::*; | |
pub struct DHeap<T: Sized> { | |
buffer: UnsafeCell<Vec<DHeapNode<T>>>, | |
head: Cell<usize>, | |
} | |
impl<T> DHeap<T> { | |
pub fn with_capacity(capacity: usize) -> Self { | |
assert!(capacity > 1); | |
DHeap { | |
buffer: { | |
let mut memory = Vec::with_capacity(capacity); | |
memory.push(Edge()); | |
memory.into() | |
}, | |
head: Cell::new(0), | |
} | |
} | |
} | |
pub struct DBox<'a, T> { | |
heap: &'a DHeap<T>, | |
index: usize, | |
_marker: PhantomData<T>, | |
} | |
impl<T> DHeap<T> { | |
fn memory(&self) -> &mut Vec<DHeapNode<T>> { | |
unsafe { &mut *self.buffer.get() } | |
} | |
/// Allocates memory for the given value `v` in the `DHeap` and returns a `DBox` pointing to it. | |
/// | |
/// This function is marked `unsafe` because it may potentially invalidate existing references | |
/// if the underlying vector needs to be resized. However, `DBox` instances will still function correctly. | |
/// | |
/// When the end of the free block list is reached, a new element is pushed during allocation. If this | |
/// new element requires the vector to grow, any existing references to elements within the dense heap | |
/// might become invalid. This risk should be carefully considered when using this heap. | |
/// | |
/// One approach to mitigate this risk is to use safe_new(). | |
/// | |
/// # Safety | |
/// | |
/// Users must ensure that no references to elements within the dense heap are held when calling this function. | |
/// If references are held, they may become invalid after the function call. | |
pub unsafe fn new(&self, v: T) -> DBox<T> { | |
let index = self.head.get(); | |
match self.memory()[index] { | |
Edge() => { | |
// The implementation's weak point lies in this push operation, which is unavoidable. | |
// When the end of the free block list is reached, a new element must be pushed | |
// during allocation. If the new element causes the vector to grow, it leads to a problem: | |
// any references to elements within the dense heap become invalid. | |
// It's crucial to carefully consider this risk when using this heap. | |
self.head.set(self.size()); | |
self.memory().push(Edge()); | |
} | |
Empty(head) => self.head.set(head), | |
_ => panic!("invalid head pointer! [corrupted memory]"), | |
} | |
self.memory()[index] = Holding(ManuallyDrop::new(v)); | |
DBox { | |
heap: self, | |
index: index, | |
_marker: PhantomData, | |
} | |
} | |
/// Provides a safe alternative to `DHeap::new()` by attempting to allocate | |
/// memory without resizing the underlying vector. | |
/// | |
/// This function ensures that no existing references will be invalidated during | |
/// the allocation process, as it only allocates memory when there is available | |
/// capacity within the reserved memory. However, if the reserved memory is | |
/// exhausted, an error is returned. | |
/// | |
/// # Returns | |
/// | |
/// - `Ok(DBox<T>)` if the allocation was successful. | |
/// - `Err(&'static str)` if there is no available capacity within the reserved memory. | |
pub fn safe_new(&self, v: T) -> Result<DBox<T>, &'static str> { | |
if self.memory().len() == self.memory().capacity() { | |
Err("out of reserved memory!") | |
} else { | |
// SAFETY: The vector is not resized, so no existing references are invalidated. | |
unsafe { Ok(self.new(v)) } | |
} | |
} | |
/// Retrieves the current memory usage of the `DHeap`. | |
/// | |
/// This function returns the number of elements in the underlying vector, | |
/// which represents the total memory occupied by the `DHeap`. | |
/// | |
/// # Returns | |
/// | |
/// - A `usize` representing the memory usage of the `DHeap`. | |
pub fn size(&self) -> usize { | |
self.memory().len() | |
} | |
} | |
impl<'a, T> DBox<'a, T> { | |
fn cell(&self) -> &mut DHeapNode<T> { | |
&mut self.heap.memory()[self.index] | |
} | |
/// Consumes the `DBox` and retrieves the inner value `T`. | |
/// | |
/// This function replaces the `DBox`'s memory cell with a `Moved` state, indicating | |
/// that the memory has been moved out of the `DHeap` before the `DBox` is dropped. | |
/// After replacing the cell, it returns the inner value of the `DBox`. | |
/// | |
/// # Returns | |
/// | |
/// - The inner value `T` contained within the `DBox`. | |
pub fn into_inner(self) -> T { | |
match replace(self.cell(), Moved()) { | |
Holding(value) => ManuallyDrop::into_inner(value), | |
_ => panic!("use after free! [corrupted memory]"), | |
} | |
} | |
} | |
impl<'a, T> Drop for DBox<'a, T> { | |
fn drop(&mut self) { | |
match self.cell() { | |
Moved() => {} | |
Holding(value) => { | |
// SAFETY: The memory cell is immediately replaced with an empty cell after dropping. | |
unsafe { ManuallyDrop::drop(value) } | |
} | |
_ => panic!("double free! [corrupted memory]"), | |
} | |
*self.cell() = Empty(self.heap.head.replace(self.index)); | |
} | |
} | |
impl<'a, T> Deref for DBox<'a, T> { | |
type Target = T; | |
fn deref(&self) -> &Self::Target { | |
if let Holding(value) = &*self.cell() { | |
value.deref() | |
} else { | |
// SAFETY: | |
// This code is frequently executed, so we use unsafe code to bypass the match. | |
// This should never be reached unless memory corruption occurs, but the | |
// compiler isn't aware of this guarantee. | |
unsafe { unreachable_unchecked() } | |
} | |
} | |
} | |
impl<'a, T> DerefMut for DBox<'a, T> { | |
fn deref_mut(&mut self) -> &mut Self::Target { | |
if let Holding(value) = &mut *self.cell() { | |
value.deref_mut() | |
} else { | |
// SAFETY: | |
// This code is frequently executed, so we use unsafe code to bypass the match. | |
// This should never be reached unless memory corruption occurs, but the | |
// compiler isn't aware of this guarantee. | |
unsafe { unreachable_unchecked() } | |
} | |
} | |
} | |
impl<'a, T> AsRef<T> for DBox<'a, T> { | |
fn as_ref(&self) -> &T { | |
self.deref() | |
} | |
} | |
impl<'a, T> AsMut<T> for DBox<'a, T> { | |
fn as_mut(&mut self) -> &mut T { | |
self.deref_mut() | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn create_dheap() { | |
let heap: DHeap<i32> = DHeap::with_capacity(16); | |
assert_eq!(heap.size(), 1); | |
} | |
#[test] | |
fn allocate_and_deallocate() { | |
let heap: DHeap<i32> = DHeap::with_capacity(16); | |
let val = 42; | |
let dbox = heap.safe_new(val).unwrap(); | |
assert_eq!(heap.size(), 2); | |
assert_eq!(*dbox, val); | |
let inner_val = dbox.into_inner(); | |
assert_eq!(inner_val, val); | |
assert_eq!(heap.size(), 2); | |
} | |
#[test] | |
fn multiple_allocations() { | |
let heap: DHeap<i32> = DHeap::with_capacity(16); | |
let vals = vec![1, 2, 3, 4, 5]; | |
let mut dboxes = Vec::new(); | |
for val in &vals { | |
dboxes.push(heap.safe_new(*val).unwrap()); | |
} | |
for (i, dbox) in dboxes.iter().enumerate() { | |
assert_eq!(**dbox, vals[i]); | |
} | |
assert_eq!(heap.size(), 6); | |
dboxes.clear(); | |
assert_eq!(heap.size(), 6); | |
for val in &vals { | |
dboxes.push(heap.safe_new(*val).unwrap()); | |
} | |
for (i, dbox) in dboxes.iter().enumerate() { | |
assert_eq!(**dbox, vals[i]); | |
} | |
assert_eq!(heap.size(), 6); | |
for dbox in dboxes { | |
dbox.into_inner(); | |
} | |
assert_eq!(heap.size(), 6); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment