Skip to content

Instantly share code, notes, and snippets.

@amnn
Created February 25, 2024 18:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save amnn/261503d3e0d657f2dcc7084ff0f8f08f to your computer and use it in GitHub Desktop.
Save amnn/261503d3e0d657f2dcc7084ff0f8f08f to your computer and use it in GitHub Desktop.
BigVector.move
/// BigVector is an arbitrary sized vector-like data structure,
/// implemented using an on-chain B+ Tree.
///
/// Elements in the vector are distributed between a fixed size
/// `tail`, stored in a `vector`, and an arbitrary size B+ Tree for
/// the rest of the data. Nodes in the B+ Tree are stored as
/// individual dynamic fields hanging off the `BigVector`.
///
/// This data structure supports:
///
/// - Fast (practically constant, worst-case logarithmic) push and pop
/// from the tail, swap remove, and remove from the interior.
///
/// - Bulk popping slices from the back.
///
/// - Forward and backward iteration from arbitrary points.
///
module big_vector::big_vector {
use sui::dynamic_field as df;
use sui::object::{Self, UID};
use sui::tx_context::TxContext;
use std::option::{Self, Option};
use std::vector;
struct BigVector<E: store> has key, store {
id: UID,
/// How deep the tree structure is.
depth: u8,
/// Total number of elements that this vector contains (across
/// both the tree structure and the tail.)
length: u64,
/// Max size of leaf nodes (counted in number of elements, `E`).
max_slice_size: u64,
/// Max size of interior nodes (counted in number of child
/// nodes hanging off it).
max_partitions: u64,
/// ID of the root of the tree structure. Value of
/// `TAIL_SLICE` means there's no root.
root_id: u64,
/// The last node ID that was allocated (for nodes that are
/// hanging off this big vector, as dynamic fields).
last_id: u64,
/// The tail of the vector. This is always non-empty, but is
/// held in an `Option` to make it easy to replace with a new
/// empty slice.
tail: Option<Slice<E>>
}
struct Node has store, drop {
offsets: vector<u64>,
children: vector<u64>,
}
struct Slice<E: store> has store, drop {
/// Previous slice in the intrusive doubly-linked list data structure
prev: u64,
/// Next slice in the intrusive doubly-linked list data structure
next: u64,
vals: vector<E>
}
struct SliceRef has copy, drop, store { id: u64 }
// === Error Codes ===
/// BigVector is not empty.
const ENotEmpty: u64 = 0;
/// Max Slice Size provided is too small.
const ESliceTooSmall: u64 = 1;
/// Max Slice Size provided is too big.
const ESliceTooBig: u64 = 2;
/// Max Partition Size is too small.
const EPartitionTooSmall: u64 = 3;
/// Max Partition Size is too big.
const EPartitionTooBig: u64 = 4;
/// Attempt to access an index out of bounds.
const EOutOfBounds: u64 = 5;
/// Internal Error: TODO Docs
const EInternalEmptyTail: u64 = 6;
/// Internal Error: TODO Docs
const EInternalBadSplit: u64 = 7;
// === Constants ===
/// Sentinel representing the last slice in the vector, which is
/// stored inline.
const TAIL_SLICE: u64 = 0;
/// Leaf nodes of `BigVector` can't be bigger than this.
const MAX_SLICE_SIZE: u64 = 262_144;
/// Internal nodes of `BigVector` can't be bigger than this.
const MAX_PARTITIONS: u64 = 4096;
// === Constructors ==
/// Construct a new, empty `BigVector`. `max_slice_size` contains
/// the maximum size of its leaf nodes, and `max_partitions`
/// contains the maximum fan-out of its interior nodes.
public fun new<E: store>(
max_slice_size: u64,
max_partitions: u64,
ctx: &mut TxContext,
): BigVector<E> {
assert!(0 < max_slice_size, ESliceTooSmall);
assert!(max_slice_size <= MAX_SLICE_SIZE, ESliceTooBig);
assert!(1 < max_partitions, EPartitionTooSmall);
assert!(max_partitions <= MAX_PARTITIONS, EPartitionTooBig);
BigVector {
id: object::new(ctx),
depth: 0,
length: 0,
max_slice_size,
max_partitions,
root_id: TAIL_SLICE,
last_id: TAIL_SLICE,
tail: option::some(Slice {
prev: TAIL_SLICE,
next: TAIL_SLICE,
vals: vector[],
})
}
}
// === Destructors ===
/// Destroy `self` as long as it is empty, even if its elements
/// are not droppable. Fails if `self` is not empty.
public fun destroy_empty<E: store>(self: BigVector<E>) {
let BigVector {
id,
depth: _,
length,
max_slice_size: _,
max_partitions: _,
root_id: _,
last_id: _,
tail,
} = self;
let Slice {
prev: _,
next: _,
vals,
} = option::destroy_some(tail);
assert!(length == 0, ENotEmpty);
vector::destroy_empty(vals);
object::delete(id);
}
/// Destroy `self`, even if it contains elements, as long as they
/// are droppable.
public fun drop<E: store + drop>(self: BigVector<E>) {
let BigVector {
id,
depth,
length: _,
max_slice_size: _,
max_partitions: _,
root_id,
last_id: _,
tail,
} = self;
let Slice {
prev: _,
next: _,
vals: _,
} = option::destroy_some(tail);
drop_node<E>(&mut id, depth, root_id);
object::delete(id);
}
// === BigVector Accessors ===
/// Whether `self` contains no elements.
public fun is_empty<E: store>(self: &BigVector<E>): bool {
self.length == 0
}
/// The number of elements contained in `self`.
public fun length<E: store>(self: &BigVector<E>): u64 {
self.length
}
/// Access the `ix`-th element of `self`. Fails if `ix` is
/// out-of-bounds for `self.
public fun borrow<E: store>(self: &BigVector<E>, ix: u64): &E {
let (offset, ref) = slice_around(self, ix);
let slice = borrow_slice(self, ref);
slice_borrow(slice, ix - offset)
}
/// Mutably access the `ix`-th element of `self`. Fails if `ix`
/// is out-of-bounds for `self.
public fun borrow_mut<E: store>(self: &mut BigVector<E>, ix: u64): &mut E {
let (offset, ref) = slice_around(self, ix);
let slice = borrow_slice_mut(self, ref);
slice_borrow_mut(slice, ix - offset)
}
// === BigVector Mutators ===
/// Add an element to the end of `self`.
///
/// Runs in worst-case O(log_P(N)) time, where N is the size of
/// the vector, and P is its max partition size, but in practise,
/// runs in constant time as the last slice of the vector is held
/// inline in the data structure.
public fun push_back<E: store>(self: &mut BigVector<E>, v: E) {
self.length = self.length + 1;
let tail = option::borrow_mut(&mut self.tail);
if (vector::length(&tail.vals) >= self.max_slice_size) {
tail = push_tail(self);
};
vector::push_back(&mut tail.vals, v);
}
/// Remove one element from the end of `self` and return it.
///
/// Runs in worst-case O(log_P(N)) time, where N is the size of
/// the vector, and P is its max partition size, but in practise,
/// runs in constant time as the last slice of the vector is held
/// inline in the data structure.
///
/// Fails if `self` is empty.
public fun pop_back<E: store>(self: &mut BigVector<E>): E {
assert!(self.length > 0, EOutOfBounds);
self.length = self.length - 1;
let tail = option::borrow_mut(&mut self.tail);
if (vector::is_empty(&tail.vals)) {
tail = pop_tail(self);
};
vector::pop_back(&mut tail.vals)
}
/// Remove a whole slice of elements from the end of `self`.
///
/// Runs in worst-case O(log_P(N)) time, where N is the size of
/// the vector, and P is its max partition size.
///
/// The slices returned can be variable in size, but are
/// guaranteed to be non-empty.
///
/// Fails if the vector itself is empty.
public fun pop_slice<E: store>(self: &mut BigVector<E>): vector<E> {
assert!(self.length > 0, EOutOfBounds);
if (vector::is_empty(&option::borrow(&self.tail).vals)) {
pop_tail(self);
};
let Slice {
prev,
next,
vals,
} = option::swap(&mut self.tail, Slice {
prev: TAIL_SLICE,
next: TAIL_SLICE,
vals: vector[],
});
let prev_slice = borrow_slice_mut(self, SliceRef { id: prev });
prev_slice.next = next;
let next_slice = borrow_slice_mut(self, SliceRef { id: next });
next_slice.prev = prev;
self.length = self.length - vector::length(&vals);
vals
}
/// Remove the element at `ix` from `self` by first swapping it
/// with the last element and then popping it.
///
/// Runs in worst case O(log_P(N)) time, where N is the size of
/// the vector, and P is the max partition size.
///
/// Fails if `ix` is out-of-bounds for the vector.
public fun swap_remove<E: store>(self: &mut BigVector<E>, ix: u64): E {
assert!(ix < self.length, EOutOfBounds);
let initial_length = self.length;
self.length = self.length - 1;
let tail = option::borrow_mut(&mut self.tail);
if (vector::is_empty(&tail.vals)) {
tail = pop_tail(self);
};
let tail_off = initial_length - vector::length(&tail.vals);
if (ix > tail_off) {
// The index being removed is in the tail already.
vector::swap_remove(&mut tail.vals, ix - tail_off)
} else {
let last = vector::pop_back(&mut tail.vals);
let (offset, ref) = slice_around(self, ix);
let slice = borrow_slice_mut(self, ref);
let ix = ix - offset;
let len = vector::length(&slice.vals);
let val = vector::swap_remove(&mut slice.vals, ix);
vector::push_back(&mut slice.vals, last);
vector::swap(&mut slice.vals, ix, len - 1);
val
}
}
/// Remove the element at `ix` from `self`, returning it.
///
/// Runs in worst case O(log_P(N) + S) time, where N is the size
/// of the vector, P is the max partition size, and S is the max
/// slice size.
///
/// Fails if `ix` is out-of-bounds for the vector.
public fun remove<E: store>(self: &mut BigVector<E>, ix: u64): E {
abort 0
}
// === SliceRef ===
/// Reference to the first slice in the vector.
public fun first_slice<E: store>(self: &BigVector<E>): SliceRef {
SliceRef { id: option::borrow(&self.tail).next }
}
/// Reference to the last slice in the vector.
public fun last_slice<E: store>(_self: &BigVector<E>): SliceRef {
SliceRef { id: TAIL_SLICE }
}
/// Return a reference to the slice that contains `ix` along with
/// the slice's offset in the vector.
///
/// Fails if `ix` is out-of-bounds for this vector.
public fun slice_around<E: store>(
self: &BigVector<E>,
ix: u64,
): (u64, SliceRef) {
assert!(ix < self.length, EOutOfBounds);
let tail = option::borrow(&self.tail);
let tail_off = self.length - vector::length(&tail.vals);
if (ix >= tail_off) {
return (tail_off, SliceRef { id: TAIL_SLICE })
};
let (node_id, offset, depth) = (self.root_id, 0, self.depth);
while (depth > 0) {
let node: &Node = df::borrow(&self.id, node_id);
let (local_offset, child_id) = node_bisect(node, ix - offset);
node_id = child_id;
offset = offset + local_offset;
depth = depth - 1;
};
(offset, SliceRef { id: node_id })
}
/// Borrow a slice from this vector.
public fun borrow_slice<E: store>(
self: &BigVector<E>,
ref: SliceRef
): &Slice<E> {
if (ref.id == TAIL_SLICE) {
option::borrow(&self.tail)
} else {
df::borrow(&self.id, ref.id)
}
}
/// Borrow a slice from this vector, mutably.
public fun borrow_slice_mut<E: store>(
self: &mut BigVector<E>,
ref: SliceRef
): &mut Slice<E> {
if (ref.id == TAIL_SLICE) {
option::borrow_mut(&mut self.tail)
} else {
df::borrow_mut(&mut self.id, ref.id)
}
}
// === Slice Accessors ===
/// The reference to the next slice in (forward) iteration order,
/// wrapping around (the next slice of the last slice is the first
/// slice).
public fun slice_next<E: store>(self: &Slice<E>): SliceRef {
SliceRef { id: self.next }
}
/// The reference to the previous slice in (forward) iteration
/// order, wrapping around (the previous slice of the first slice
/// is the last slice).
public fun slice_prev<E: store>(self: &Slice<E>): SliceRef {
SliceRef { id: self.prev }
}
/// The number of elements contained in this slice.
public fun slice_length<E: store>(self: &Slice<E>): u64 {
vector::length(&self.vals)
}
/// Access an element from the slice at index `ix`. Fails if `ix`
/// is out of `slice`'s bounds.
public fun slice_borrow<E: store>(self: &Slice<E>, ix: u64): &E {
assert!(ix < vector::length(&self.vals), EOutOfBounds);
vector::borrow(&self.vals, ix)
}
/// Mutably access an element from the slice at index `ix`. Fails
/// if `ix` is out of `slice`'s bounds.
public fun slice_borrow_mut<E: store>(
self: &mut Slice<E>,
ix: u64,
): &mut E {
assert!(ix < vector::length(&self.vals), EOutOfBounds);
vector::borrow_mut(&mut self.vals, ix)
}
// === Private Helpers ===
/// Recursively `drop` the nodes under the node at id `node`.
fun drop_node<E: store + drop>(id: &mut UID, depth: u8, node: u64) {
if (node == TAIL_SLICE) {
// Sentinel ID -- nothing to clean-up.
return
} else if (depth == 0) {
let Slice<E> {
prev: _,
next: _,
vals: _,
} = df::remove(id, node);
} else {
let Node {
offsets: _,
children,
} = df::remove(id, node);
while (!vector::is_empty(&children)) {
drop_node<E>(id, depth - 1, vector::pop_back(&mut children));
}
}
}
/// Push the contents of the current tail of `self` into its tree
/// structure, replacing it with a fresh, empty tail. Returns a
/// reference to that fresh, empty tail.
///
/// Fails if the current tail is empty.
fun push_tail<E: store>(self: &mut BigVector<E>): &mut Slice<E> {
let old_tail = option::swap(&mut self.tail, Slice {
prev: TAIL_SLICE,
next: TAIL_SLICE,
vals: vector[],
});
assert!(!vector::is_empty(&old_tail.vals), EInternalEmptyTail);
let tail_off = self.length - vector::length(&old_tail.vals);
let old_next = old_tail.next;
old_tail.next = TAIL_SLICE;
let old_tail = alloc_node(self, old_tail);
let new_tail = option::borrow_mut(&mut self.tail);
new_tail.prev = old_tail;
new_tail.next = old_next;
if (self.root_id == TAIL_SLICE) {
// Empty, new node becomes root.
self.root_id = old_tail;
} else if (self.depth == 0) {
// One other slice, turn into a binary node
let new_root = binary_node(self.root_id, tail_off, old_tail);
self.depth = 1;
self.root_id = alloc_node(self, new_root);
} else {
// Tree already has nodes that need to be traversed.
let node_id = self.root_id;
let depth = self.depth;
let parents = vector[];
let offsets = vector[];
// Traverse down to the node that points to slices,
// remembering parent nodes and last offsets, to fix up
// the insertion offset when recursing back up.
while (depth > 1) {
let node: &Node = df::borrow(&self.id, node_id);
let (off, child) = node_last(node);
vector::push_back(&mut parents, node_id);
vector::push_back(&mut offsets, off);
tail_off = tail_off - off;
node_id = child;
depth = depth - 1;
};
// Try and insert `to_insert` at the end of the node, at
// `tail_off`. If the node is already full, it needs to
// be split, and the resulting node needs to be inserted
// into its parent.
let to_insert = old_tail;
loop {
let node: &mut Node = df::borrow_mut(&mut self.id, node_id);
// If the node has space, we can complete the
// insertion without disturbing its parents
if (vector::length(&node.children) < self.max_partitions) {
node_push(node, tail_off, to_insert);
break
};
// Node was full, so we need to split it, requiring a
// recursive push on its parents (or a new root).
let (pivot, rest) = node_split(node);
node_push(&mut rest, tail_off - pivot, to_insert);
to_insert = alloc_node(self, rest);
// If we have run out of parents to recurse into,
// we've hit the root -- this push has increased the
// overall depth of the tree.
if (vector::is_empty(&parents)) {
let new_root = binary_node(node_id, pivot, to_insert);
self.root_id = alloc_node(self, new_root);
self.depth = self.depth + 1;
break
} else {
node_id = vector::pop_back(&mut parents);
tail_off = vector::pop_back(&mut offsets) + pivot;
}
}
};
option::borrow_mut(&mut self.tail)
}
/// Replace the contents of the current tail of `self` with the
/// last tail in its tree structure (which is removed from the
/// tree structure in the process). Returns a reference to the
/// new tail slice.
///
/// Fails if the current tail is non-empty, or the tree structure
/// is empty.
fun pop_tail<E: store>(self: &mut BigVector<E>): &mut Slice<E> {
abort 0
}
// === Private Helpers: Node API ===
/// Allocate a numeric ID for `node` from `self`'s allocator, and
/// associate the node with `self` at that ID using a dynamic
/// field. Returns the ID allocated.
fun alloc_node<E: store, N: store>(
self: &mut BigVector<E>,
node: N,
): u64 {
let id = self.last_id + 1;
self.last_id = id;
df::add(&mut self.id, id, node);
id
}
/// Construct a new node containing two children (`left` and
/// `right`) separated by a `pivot`.
fun binary_node(left: u64, pivot: u64, right: u64): Node {
Node {
offsets: vector[pivot],
children: vector[left, right],
}
}
/// The last offset in the node, and the associated child
/// node/slice ID.
fun node_last(self: &Node): (u64, u64) {
let last = vector::length(&self.offsets);
(
*vector::borrow(&self.offsets, last - 1),
*vector::borrow(&self.children, last),
)
}
/// Binary search through the partitions in `self: Node` to return
/// the offset (first return) and ID (second return) of the child
/// node that contains `ix`.
///
/// All indices strictly below the first offset are mapped to the
/// first child, and all indices greater than or equal to the last
/// offset are mapped to the last child.
fun node_bisect(self: &Node, ix: u64): (u64, u64) {
let (lo, hi) = (0, vector::length(&self.offsets));
// Invariant: offsets[0, lo) <= ix < offsets[hi, ..)
while (lo < hi) {
let mid = (hi - lo) / 2 + lo;
if (ix < *vector::borrow(&self.offsets, mid)) {
hi = mid;
} else {
lo = mid + 1;
}
};
if (lo == 0) {
(0, *vector::borrow(&self.children, 0))
} else {
(
*vector::borrow(&self.offsets, lo - 1),
*vector::borrow(&self.children, lo),
)
}
}
/// Splits the contents of `self: Node` in half, keeping the lower
/// half in `self` and returning the pivot (the offset between
/// halves) and the upper half.
///
/// The pivot is given as an offset relative to `self` (so if it
/// is to be inserted in `self`'s parent, it must be shifted by
/// `self`'s offset first).
///
/// Offsets in the upper half are shifted down, treating the pivot
/// as their origin.
///
/// Fails if the node has no offsets (which is a degenerate case,
/// anyway).
fun node_split(self: &mut Node): (u64, Node) {
// Can't split a node if it doesn't have any offsets, because
// we need at least one to act as the pivot.
assert!(vector::length(&self.offsets) > 0, EInternalBadSplit);
let total = vector::length(&self.children);
let keep = total - total / 2;
let offsets = vector[];
let children = vector[];
loop {
vector::push_back(
&mut children,
vector::pop_back(&mut self.children),
);
if (vector::length(&self.children) <= keep) {
break
};
vector::push_back(
&mut offsets,
vector::pop_back(&mut self.offsets)
);
};
let pivot = vector::pop_back(&mut self.offsets);
vector::reverse(&mut offsets);
vector::reverse(&mut children);
// Shift all the offsets in the upper half down so that they
// are relative to the upper half's origin.
let (ix, len) = (0, vector::length(&offsets));
while (ix < len) {
let offset = *vector::borrow(&offsets, ix);
*vector::borrow_mut(&mut offsets, ix) = offset - pivot;
ix = ix + 1;
};
(pivot, Node { offsets, children })
}
fun node_push(self: &mut Node, off: u64, child: u64) {
vector::push_back(&mut self.offsets, off);
vector::push_back(&mut self.children, child);
}
// === Tests ===
#[test]
fun test_node_split_trivial() {
let node = binary_node(1, 42, 2);
let (pivot, rest) = node_split(&mut node);
assert!(node.children == vector[1], 0);
assert!(rest.children == vector[2], 0);
assert!(pivot == 42, 0);
}
#[test]
fun test_node_split_even() {
let node = Node {
offsets: vector[42, 43, 44],
children: vector[1, 2, 3, 4],
};
let (pivot, rest) = node_split(&mut node);
assert!(node.children == vector[1, 2], 0);
assert!(rest.children == vector[3, 4], 0);
assert!(node.offsets == vector[42], 0);
assert!(pivot == 43, 0);
assert!(rest.offsets == vector[1], 0);
}
#[test]
fun test_node_split_odd() {
let node = Node {
offsets: vector[42, 43, 44, 45],
children: vector[1, 2, 3, 4, 5],
};
let (pivot, rest) = node_split(&mut node);
assert!(node.children == vector[1, 2, 3], 0);
assert!(rest.children == vector[4, 5], 0);
assert!(node.offsets == vector[42, 43], 0);
assert!(pivot == 44, 0);
assert!(rest.offsets == vector[1], 0);
}
#[test]
fun test_node_bisect() {
let node = Node {
offsets: vector[1, 3, 8],
children: vector[0, 1, 2, 3],
};
// 0 1 2 3 4 5 6 7 8 9 A
let exp_o = vector[0, 1, 1, 3, 3, 3, 3, 3, 8, 8, 8];
let exp_c = vector[0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3];
let ix = 0;
while (ix < vector::length(&exp_o)) {
let eo = *vector::borrow(&exp_o, ix);
let ec = *vector::borrow(&exp_c, ix);
let (o, c) = node_bisect(&node, ix);
assert!(eo == o, ix);
assert!(ec == c, ix);
ix = ix + 1;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment