Skip to content

Instantly share code, notes, and snippets.

@lancejpollard
Forked from felipecrv/btree.js
Created January 3, 2021 21:27
Show Gist options
  • Save lancejpollard/5d8391052ec73295b1c0a48744c578fb to your computer and use it in GitHub Desktop.
Save lancejpollard/5d8391052ec73295b1c0a48744c578fb to your computer and use it in GitHub Desktop.
In-memory B+ Tree implemented in Javascript with Flow type annotations
/* @flow */
const KEY_KIND_STRING = 1;
const KEY_KIND_NUMBER = 2;
const KEY_KIND_BOOL = 3;
const KEY_KIND_RECORD = 4;
type KeyKind = 1 | 2 | 3 | 4;
class KeyValue<K, V> {
key: ?K;
value: ?V;
constructor(key: ?K, value: ?V) {
this.key = key;
this.value = value;
}
};
type KeyValues<K, V> = Array<KeyValue<K, V>>;
type Node<K, V> = InnerNode<K, V> | LeafNode<K, V>;
type Atom = number | string | boolean;
function isAtomic(kind: KeyKind): boolean {
return kind < Btree.KEY_KIND_RECORD;
}
function ltAtom(a: any, b: any): boolean {
// NULLs are considered smaller than any other key
if (a === null || a === undefined) {
return b !== null && b !== undefined;
}
if (b === null || b === undefined) {
return false;
}
return a < b;
}
function ltRecord(a: any, b: any): boolean {
// NULLs are considered smaller than any other key
if (a === null || a === undefined) {
return b !== null && b !== undefined;
}
if (b === null || b === undefined) {
return false;
}
return a.lt(b);
}
function lt(kind: KeyKind, a: any, b: any): boolean {
return isAtomic(kind) ? ltAtom(a, b) : ltRecord(a, b);
}
function eqAtom(a: any, b: any): boolean {
if (a === null || a === undefined) {
return b === null || b === undefined;
}
return a === b;
}
function eqRecord(a: any, b: any): boolean {
if (a === null || a === undefined) {
return b === null || b === undefined;
}
if (b !== null && b !== undefined) {
return a.eq(b);
}
return false;
}
function eq(kind: KeyKind, a: any, b: any): boolean {
return isAtomic(kind) ? eqAtom(a, b) : eqRecord(a, b);
}
function findUpperBoundIndex<V>(keyKind: KeyKind, values: KeyValues<any, V>, key: any): number {
const _lt = isAtomic(keyKind) ? ltAtom : ltRecord;
let lo = 0;
let hi = values.length;
while (lo < hi) {
// Invariants:
// - answer in [lo, hi] and lo < hi
// - values.length / 2 <= mid <= values.length
let mid = Math.floor((lo + hi) / 2);
if (_lt(values[mid].key, key)) {
lo = mid + 1;
} else {
hi = mid;
}
}
return lo;
}
function findUpperBoundKeyIndex<V>(keyKind: KeyKind, keys: Array<any>, key: any): number {
const _lt = isAtomic(keyKind) ? ltAtom : ltRecord;
let lo = 0;
let hi = keys.length;
while (lo < hi) {
// Invariants:
// - answer in [lo, hi] and lo < hi
// - values.length / 2 <= mid <= values.length
let mid = Math.floor((lo + hi) / 2);
if (_lt(keys[mid], key)) {
lo = mid + 1;
} else {
hi = mid;
}
}
return lo;
}
class InnerNode<K, V> {
// k0, k1, .., kn-2
keys: Array<?K>;
// [.., k0], [.., k1], .., [.., kn-2], [.., kn-1]
children: Array<Node<K, V>>;
constructor(nodes: Array<Node<K, V>>) {
this.children = nodes;
this.keys = new Array(nodes.length - 1);
for (let i = 0; i < nodes.length - 1; i++) {
this.keys[i] = nodes[i].maxKey();
}
}
// Recursive: O(log N)
minKey(): ?K {
return this.children[0].minKey();
}
// Recursive: O(log N)
maxKey(): ?K {
return this.children[this.children.length - 1].maxKey();
}
// Recursive
size() {
let len = 0;
const first = this.children[0];
if (first.values) {
const leaves: Array<LeafNode<K, V>> = (this.children: any);
for (let i = 0; i < leaves.length; i++) {
len += leaves[i].values.length;
}
} else {
const nodes: Array<InnerNode<K, V>> = (this.children: any);
for (let i = 0; i < nodes.length; i++) {
len += nodes[i].size();
}
}
return len;
}
}
class LeafNode<K, V> {
values: KeyValues<K, V>;
constructor(values: KeyValues<K, V>) {
this.values = values;
}
// O(1)
minKey(): ?K {
return this.values[0].key;
}
// O(1)
maxKey(): ?K {
return this.values[this.values.length - 1].key;
}
}
class Btree<K, V> {
static KEY_KIND_STRING: KeyKind;
static KEY_KIND_NUMBER: KeyKind;
static KEY_KIND_BOOL: KeyKind;
static KEY_KIND_RECORD: KeyKind;
static KeyValue: Class<KeyValue<K, V>>;
static InnerNode: Class<InnerNode<K, V>>;
static LeafNode: Class<LeafNode<K, V>>;
static detail: {
isAtomic: typeof(isAtomic),
lt: typeof(lt),
eq: typeof(eq),
findUpperBoundIndex: typeof(findUpperBoundIndex),
findUpperBoundKeyIndex: typeof(findUpperBoundKeyIndex),
};
root: Node<K, V>;
keyKind: KeyKind;
maxNodeSize: number;
constructor(keyKind: KeyKind, maxNodeSize: number = 16) {
this.root = new LeafNode([]);
this.keyKind = keyKind;
this.maxNodeSize = maxNodeSize;
}
// Returns the leaf that can contain `key`
findLeaf(key: K): LeafNode<K, V> {
let node = this.root;
// While a leaf is not found...
while (!node.values) {
const inner: InnerNode<K, V> = (node: any);
const i = findUpperBoundKeyIndex(this.keyKind, inner.keys, key);
node = inner.children[i];
}
// node is a LeafNode
return (node: any);
}
find(key: K): ?KeyValue<K, V> {
const leaf = this.findLeaf(key);
const i = findUpperBoundIndex(this.keyKind, leaf.values, key);
const pair = leaf.values[i];
return (pair && eq(this.keyKind, key, pair.key)) ? pair : undefined;
}
partitionLeaf(leaf: LeafNode<K, V>): Array<Node<K, V>> {
const leftSize = Math.ceil(leaf.values.length / 2);
const leftValues = leaf.values.slice(0, leftSize);
const rightValues = leaf.values.slice(leftSize, leaf.values.length);
const left = new LeafNode(leftValues);
const right = new LeafNode(rightValues);
return [left, right];
}
partitionInner(inner: InnerNode<K, V>): Array<Node<K, V>> {
const leftSize = Math.ceil(inner.children.length / 2);
const leftChildren = inner.children.slice(0, leftSize);
const rightChildren = inner.children.slice(leftSize, inner.children.length);
const left = new InnerNode(leftChildren);
const right = new InnerNode(rightChildren);
return [left, right];
}
add(key: ?K, value: V) {
// Variables representing the current state in the search for the leaf node
// where the key will be inserted:
//
// - indexOnParent: the position of `node` in `parent`
// - parent: the parent of `node`
// - node: The node in the path towards the leaf node where the key will be inserted.
let indexOnParent = -1;
let parent: InnerNode<K, V> = (null: any);
let node = this.root;
// Check if the root is full and split it. Be either an inner node or a leaf
// node. A root split is the only event that makes the B-tree grow in
// height. That's how it's kept balanced.
if (node.values) { // root is a leaf
const leaf: LeafNode<K, V> = (node: any);
if (leaf.values.length >= this.maxNodeSize) {
const partitions = this.partitionLeaf(leaf);
parent = new InnerNode(partitions);
this.root = parent;
indexOnParent = lt(this.keyKind, key, parent.keys[0]) ? 0 : 1;
node = parent.children[indexOnParent];
}
} else { // root is an inner node
const inner: InnerNode<K, V> = (node: any);
if (inner.children.length >= this.maxNodeSize) {
const partitions = this.partitionInner(inner);
parent = new InnerNode(partitions);
this.root = parent;
indexOnParent = lt(this.keyKind, key, parent.keys[0]) ? 0 : 1;
node = parent.children[indexOnParent];
}
}
// While a leaf is not found...
while (!node.values) {
// If the node is full, we split it and reconfigure the parent. Here's an
// illustration of what the `parent` looks like before and after the
// split:
//
// k0 k1 k2
// [...k0] [...k...k1] [...k2] [...k3]
// ^ node
//
// k0 k k1 k2
// [...k0] [...k] [...k1] [...k2] [...k3]
// ^ left ^ right
//
let inner: InnerNode<K, V> = (node: any);
if (inner.children.length >= this.maxNodeSize) {
// Invariant: parent is not null.
// Proof: if parent is null, node is the root. But node is full and we
// checked before if the root was full to split it. So node is not the
// root. It has a parent.
const [left, right] = this.partitionInner(inner);
const maxLeftKey = left.maxKey(); // k in the illustration
parent.keys.splice(indexOnParent, 0, maxLeftKey);
parent.children.splice(indexOnParent, 1, left, right);
// The indexOnParent on parent might have shifted
indexOnParent = lt(this.keyKind, key, maxLeftKey) ? indexOnParent : indexOnParent + 1;
node = parent.children[indexOnParent];
inner = (node: any);
}
// Advance the search
indexOnParent = findUpperBoundKeyIndex(this.keyKind, inner.keys, key);
parent = inner;
node = parent.children[indexOnParent];
}
// Found a leaf. Add the the new key-value pair to it.
let leaf: LeafNode<K, V> = (node: any);
const i = findUpperBoundIndex(this.keyKind, leaf.values, key);
leaf.values.splice(i, 0, new KeyValue(key, value));
// Split the leaf if it overflowed after insertion
if (leaf.values.length > this.maxNodeSize) {
const [left, right] = this.partitionLeaf(leaf);
const maxLeftKey = left.maxKey();
parent.keys.splice(indexOnParent, 0, maxLeftKey);
parent.children.splice(indexOnParent, 1, left, right);
}
}
// Pre-conditions for the fuse* and borrow* methods
// 1. parent has more than one children (that's why fusing/borrowing is
// possible)
// 2. node has 1 children, thus node.keys is empty (that's why we're
// fusing/borrowing
// Additional pre-conditions:
// - next has less than 3 children
// - indexOnParent < parent.children.length - 1 since the index of next on
// parent is indexOnParent + 1
fuseWithNext(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
next: InnerNode<K, V>) {
node.keys = [node.children[0].maxKey()].concat(next.keys);
node.children = node.children.concat(next.children);
// Fix keys/children in the parent
if (indexOnParent + 1 < parent.keys.length) {
parent.keys[indexOnParent] = parent.keys[indexOnParent + 1];
parent.keys.splice(indexOnParent + 1, 1);
} else {
parent.keys[indexOnParent] = node.children[node.children.length - 1].maxKey();
parent.keys.splice(parent.keys.length - 1, 1);
}
parent.children.splice(indexOnParent + 1, 1);
}
// Additional pre-conditions:
// - prev has less than 3 children
// - indexOnParent > 0 since the index of prev on parent is indexOnParent - 1
fuseWithPrev(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
prev: InnerNode<K, V>) {
node.keys = prev.keys;
node.keys.push(prev.children[prev.children.length - 1].maxKey());
const child = node.children[0];
node.children = prev.children;
node.children.push(child);
// Fix keys/children in the parent
parent.keys.splice(indexOnParent - 1, 1);
parent.children.splice(indexOnParent - 1, 1);
}
// Additional pre-conditions:
// - next has >= 3 children
// - indexOnParent < parent.children.length - 1 since the index of next on
// parent is indexOnParent + 1
borrowFromNext(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
next: InnerNode<K, V>) {
const borrowCount = Math.ceil(next.children.length / 2);
// Copy keys/children from next
node.keys.push(node.children[node.children.length - 1].maxKey());
node.keys = node.keys.concat(next.keys.slice(0, borrowCount - 1));
const borrowed = next.children.slice(0, borrowCount);
node.children = node.children.concat(borrowed);
// Remove the borrowed keys/children from next
next.keys = next.keys.slice(borrowCount, next.keys.length);
next.children = next.children.slice(borrowCount, next.children.length);
// Fix keys/children in the parent
parent.keys[indexOnParent] = node.maxKey();
}
// Additional pre-conditions:
// - prev has >= 3 children
// - indexOnParent > 0 since the index of prev on parent is indexOnParent - 1
borrowFromPrev(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
prev: InnerNode<K, V>) {
const borrowStart = Math.ceil(prev.children.length / 2);
// Copy keys/children from prev
const borrowedKeys = prev.keys.slice(borrowStart, prev.keys.length);
borrowedKeys.push(prev.maxKey());
node.keys = borrowedKeys.concat(node.keys);
const borrowed = prev.children.slice(borrowStart, prev.children.length);
node.children = borrowed.concat(node.children);
// Remove the borrowed keys/children from prev
prev.keys = prev.keys.slice(0, borrowStart - 1);
prev.children = prev.children.slice(0, borrowStart);
// Fix keys/children in the parent
parent.keys[indexOnParent - 1] = prev.children[prev.children.length - 1].maxKey();
}
removeFirst(key: ?K): ?KeyValue<K, V> {
// While the root is an inner node containing only 1 children...
while (!this.root.values) {
const inner: InnerNode<K, V> = (this.root: any);
if (inner.children.length > 1) {
break;
}
// ...reduce the height of the tree.
this.root = inner.children[0];
}
let parent: InnerNode<K, V> = (null: any);
let indexOnParent = -1;
let node = this.root;
while (!node.values) {
const inner: InnerNode<K, V> = (node: any);
if (inner.children.length === 1) {
// Loop invariant: parent.keys is non-empty (i.e. parent.children.length >= 2)
const prev: ?InnerNode<K, V> = indexOnParent > 0 ?
(parent.children[indexOnParent - 1]: any) : null;
const next: ?InnerNode<K, V> = indexOnParent < parent.children.length - 1 ?
(parent.children[indexOnParent + 1]: any) : null;
// Try to fuse with a sibling or borrow nodes from it. Fusing is
// preferred over borrowing. The next sibling is preferred over the
// previous sibling. Either prev or next are non-null since the parent
// has more than one children.
let fused = false;
if (next) {
if (next.children.length < 3) {
this.fuseWithNext(parent, inner, indexOnParent, next);
fused = true;
}
} else if ((prev: any).children.length < 3) {
this.fuseWithPrev(parent, inner, indexOnParent, (prev: any));
fused = true;
}
if (!fused) {
if (next) {
this.borrowFromNext(parent, inner, indexOnParent, next);
} else {
this.borrowFromPrev(parent, inner, indexOnParent, (prev: any));
}
}
}
// Advance the search
indexOnParent = findUpperBoundKeyIndex(this.keyKind, inner.keys, key);
parent = inner;
node = parent.children[indexOnParent];
}
// Found a leaf. Remove the key-value pair if it exists.
let leaf: LeafNode<K, V> = (node: any);
const i = findUpperBoundIndex(this.keyKind, leaf.values, key);
const pair = leaf.values[i];
if (!pair || !eq(this.keyKind, key, pair.key)) {
return undefined;
}
leaf.values.splice(i, 1);
// Fix parent
if (leaf.values.length === 0 && parent) {
parent.keys.splice(Math.min(indexOnParent, parent.keys.length - 1), 1);
parent.children.splice(indexOnParent, 1);
}
return pair;
}
size() {
if (this.root.values) {
const leaf: LeafNode<K, V> = (this.root: any);
return leaf.values.length;
}
const inner: InnerNode<K, V> = (this.root: any);
return inner.size();
}
}
Btree.KEY_KIND_STRING = KEY_KIND_STRING;
Btree.KEY_KIND_NUMBER = KEY_KIND_NUMBER;
Btree.KEY_KIND_BOOL = KEY_KIND_BOOL;
Btree.KEY_KIND_RECORD = KEY_KIND_RECORD;
Btree.KeyValue = KeyValue;
Btree.InnerNode = InnerNode;
Btree.LeafNode = LeafNode;
Btree.detail = {
isAtomic,
lt,
eq,
findUpperBoundIndex,
findUpperBoundKeyIndex,
};
module.exports = Btree;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment