Skip to content

Instantly share code, notes, and snippets.

@felipecrv
Created April 1, 2018 13:32
Show Gist options
  • Star 13 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save felipecrv/236f82183bbb27bd01033f94fe42e69b to your computer and use it in GitHub Desktop.
Save felipecrv/236f82183bbb27bd01033f94fe42e69b to your computer and use it in GitHub Desktop.
In-memory B+ Tree implemented in Javascript with Flow type annotations
/* @flow */
const KEY_KIND_STRING = 1;
const KEY_KIND_NUMBER = 2;
const KEY_KIND_BOOL = 3;
const KEY_KIND_RECORD = 4;
type KeyKind = 1 | 2 | 3 | 4;
class KeyValue<K, V> {
key: ?K;
value: ?V;
constructor(key: ?K, value: ?V) {
this.key = key;
this.value = value;
}
};
type KeyValues<K, V> = Array<KeyValue<K, V>>;
type Node<K, V> = InnerNode<K, V> | LeafNode<K, V>;
type Atom = number | string | boolean;
function isAtomic(kind: KeyKind): boolean {
return kind < Btree.KEY_KIND_RECORD;
}
function ltAtom(a: any, b: any): boolean {
// NULLs are considered smaller than any other key
if (a === null || a === undefined) {
return b !== null && b !== undefined;
}
if (b === null || b === undefined) {
return false;
}
return a < b;
}
function ltRecord(a: any, b: any): boolean {
// NULLs are considered smaller than any other key
if (a === null || a === undefined) {
return b !== null && b !== undefined;
}
if (b === null || b === undefined) {
return false;
}
return a.lt(b);
}
function lt(kind: KeyKind, a: any, b: any): boolean {
return isAtomic(kind) ? ltAtom(a, b) : ltRecord(a, b);
}
function eqAtom(a: any, b: any): boolean {
if (a === null || a === undefined) {
return b === null || b === undefined;
}
return a === b;
}
function eqRecord(a: any, b: any): boolean {
if (a === null || a === undefined) {
return b === null || b === undefined;
}
if (b !== null && b !== undefined) {
return a.eq(b);
}
return false;
}
function eq(kind: KeyKind, a: any, b: any): boolean {
return isAtomic(kind) ? eqAtom(a, b) : eqRecord(a, b);
}
function findUpperBoundIndex<V>(keyKind: KeyKind, values: KeyValues<any, V>, key: any): number {
const _lt = isAtomic(keyKind) ? ltAtom : ltRecord;
let lo = 0;
let hi = values.length;
while (lo < hi) {
// Invariants:
// - answer in [lo, hi] and lo < hi
// - values.length / 2 <= mid <= values.length
let mid = Math.floor((lo + hi) / 2);
if (_lt(values[mid].key, key)) {
lo = mid + 1;
} else {
hi = mid;
}
}
return lo;
}
function findUpperBoundKeyIndex<V>(keyKind: KeyKind, keys: Array<any>, key: any): number {
const _lt = isAtomic(keyKind) ? ltAtom : ltRecord;
let lo = 0;
let hi = keys.length;
while (lo < hi) {
// Invariants:
// - answer in [lo, hi] and lo < hi
// - values.length / 2 <= mid <= values.length
let mid = Math.floor((lo + hi) / 2);
if (_lt(keys[mid], key)) {
lo = mid + 1;
} else {
hi = mid;
}
}
return lo;
}
class InnerNode<K, V> {
// k0, k1, .., kn-2
keys: Array<?K>;
// [.., k0], [.., k1], .., [.., kn-2], [.., kn-1]
children: Array<Node<K, V>>;
constructor(nodes: Array<Node<K, V>>) {
this.children = nodes;
this.keys = new Array(nodes.length - 1);
for (let i = 0; i < nodes.length - 1; i++) {
this.keys[i] = nodes[i].maxKey();
}
}
// Recursive: O(log N)
minKey(): ?K {
return this.children[0].minKey();
}
// Recursive: O(log N)
maxKey(): ?K {
return this.children[this.children.length - 1].maxKey();
}
// Recursive
size() {
let len = 0;
const first = this.children[0];
if (first.values) {
const leaves: Array<LeafNode<K, V>> = (this.children: any);
for (let i = 0; i < leaves.length; i++) {
len += leaves[i].values.length;
}
} else {
const nodes: Array<InnerNode<K, V>> = (this.children: any);
for (let i = 0; i < nodes.length; i++) {
len += nodes[i].size();
}
}
return len;
}
}
class LeafNode<K, V> {
values: KeyValues<K, V>;
constructor(values: KeyValues<K, V>) {
this.values = values;
}
// O(1)
minKey(): ?K {
return this.values[0].key;
}
// O(1)
maxKey(): ?K {
return this.values[this.values.length - 1].key;
}
}
class Btree<K, V> {
static KEY_KIND_STRING: KeyKind;
static KEY_KIND_NUMBER: KeyKind;
static KEY_KIND_BOOL: KeyKind;
static KEY_KIND_RECORD: KeyKind;
static KeyValue: Class<KeyValue<K, V>>;
static InnerNode: Class<InnerNode<K, V>>;
static LeafNode: Class<LeafNode<K, V>>;
static detail: {
isAtomic: typeof(isAtomic),
lt: typeof(lt),
eq: typeof(eq),
findUpperBoundIndex: typeof(findUpperBoundIndex),
findUpperBoundKeyIndex: typeof(findUpperBoundKeyIndex),
};
root: Node<K, V>;
keyKind: KeyKind;
maxNodeSize: number;
constructor(keyKind: KeyKind, maxNodeSize: number = 16) {
this.root = new LeafNode([]);
this.keyKind = keyKind;
this.maxNodeSize = maxNodeSize;
}
// Returns the leaf that can contain `key`
findLeaf(key: K): LeafNode<K, V> {
let node = this.root;
// While a leaf is not found...
while (!node.values) {
const inner: InnerNode<K, V> = (node: any);
const i = findUpperBoundKeyIndex(this.keyKind, inner.keys, key);
node = inner.children[i];
}
// node is a LeafNode
return (node: any);
}
find(key: K): ?KeyValue<K, V> {
const leaf = this.findLeaf(key);
const i = findUpperBoundIndex(this.keyKind, leaf.values, key);
const pair = leaf.values[i];
return (pair && eq(this.keyKind, key, pair.key)) ? pair : undefined;
}
partitionLeaf(leaf: LeafNode<K, V>): Array<Node<K, V>> {
const leftSize = Math.ceil(leaf.values.length / 2);
const leftValues = leaf.values.slice(0, leftSize);
const rightValues = leaf.values.slice(leftSize, leaf.values.length);
const left = new LeafNode(leftValues);
const right = new LeafNode(rightValues);
return [left, right];
}
partitionInner(inner: InnerNode<K, V>): Array<Node<K, V>> {
const leftSize = Math.ceil(inner.children.length / 2);
const leftChildren = inner.children.slice(0, leftSize);
const rightChildren = inner.children.slice(leftSize, inner.children.length);
const left = new InnerNode(leftChildren);
const right = new InnerNode(rightChildren);
return [left, right];
}
add(key: ?K, value: V) {
// Variables representing the current state in the search for the leaf node
// where the key will be inserted:
//
// - indexOnParent: the position of `node` in `parent`
// - parent: the parent of `node`
// - node: The node in the path towards the leaf node where the key will be inserted.
let indexOnParent = -1;
let parent: InnerNode<K, V> = (null: any);
let node = this.root;
// Check if the root is full and split it. Be either an inner node or a leaf
// node. A root split is the only event that makes the B-tree grow in
// height. That's how it's kept balanced.
if (node.values) { // root is a leaf
const leaf: LeafNode<K, V> = (node: any);
if (leaf.values.length >= this.maxNodeSize) {
const partitions = this.partitionLeaf(leaf);
parent = new InnerNode(partitions);
this.root = parent;
indexOnParent = lt(this.keyKind, key, parent.keys[0]) ? 0 : 1;
node = parent.children[indexOnParent];
}
} else { // root is an inner node
const inner: InnerNode<K, V> = (node: any);
if (inner.children.length >= this.maxNodeSize) {
const partitions = this.partitionInner(inner);
parent = new InnerNode(partitions);
this.root = parent;
indexOnParent = lt(this.keyKind, key, parent.keys[0]) ? 0 : 1;
node = parent.children[indexOnParent];
}
}
// While a leaf is not found...
while (!node.values) {
// If the node is full, we split it and reconfigure the parent. Here's an
// illustration of what the `parent` looks like before and after the
// split:
//
// k0 k1 k2
// [...k0] [...k...k1] [...k2] [...k3]
// ^ node
//
// k0 k k1 k2
// [...k0] [...k] [...k1] [...k2] [...k3]
// ^ left ^ right
//
let inner: InnerNode<K, V> = (node: any);
if (inner.children.length >= this.maxNodeSize) {
// Invariant: parent is not null.
// Proof: if parent is null, node is the root. But node is full and we
// checked before if the root was full to split it. So node is not the
// root. It has a parent.
const [left, right] = this.partitionInner(inner);
const maxLeftKey = left.maxKey(); // k in the illustration
parent.keys.splice(indexOnParent, 0, maxLeftKey);
parent.children.splice(indexOnParent, 1, left, right);
// The indexOnParent on parent might have shifted
indexOnParent = lt(this.keyKind, key, maxLeftKey) ? indexOnParent : indexOnParent + 1;
node = parent.children[indexOnParent];
inner = (node: any);
}
// Advance the search
indexOnParent = findUpperBoundKeyIndex(this.keyKind, inner.keys, key);
parent = inner;
node = parent.children[indexOnParent];
}
// Found a leaf. Add the the new key-value pair to it.
let leaf: LeafNode<K, V> = (node: any);
const i = findUpperBoundIndex(this.keyKind, leaf.values, key);
leaf.values.splice(i, 0, new KeyValue(key, value));
// Split the leaf if it overflowed after insertion
if (leaf.values.length > this.maxNodeSize) {
const [left, right] = this.partitionLeaf(leaf);
const maxLeftKey = left.maxKey();
parent.keys.splice(indexOnParent, 0, maxLeftKey);
parent.children.splice(indexOnParent, 1, left, right);
}
}
// Pre-conditions for the fuse* and borrow* methods
// 1. parent has more than one children (that's why fusing/borrowing is
// possible)
// 2. node has 1 children, thus node.keys is empty (that's why we're
// fusing/borrowing
// Additional pre-conditions:
// - next has less than 3 children
// - indexOnParent < parent.children.length - 1 since the index of next on
// parent is indexOnParent + 1
fuseWithNext(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
next: InnerNode<K, V>) {
node.keys = [node.children[0].maxKey()].concat(next.keys);
node.children = node.children.concat(next.children);
// Fix keys/children in the parent
if (indexOnParent + 1 < parent.keys.length) {
parent.keys[indexOnParent] = parent.keys[indexOnParent + 1];
parent.keys.splice(indexOnParent + 1, 1);
} else {
parent.keys[indexOnParent] = node.children[node.children.length - 1].maxKey();
parent.keys.splice(parent.keys.length - 1, 1);
}
parent.children.splice(indexOnParent + 1, 1);
}
// Additional pre-conditions:
// - prev has less than 3 children
// - indexOnParent > 0 since the index of prev on parent is indexOnParent - 1
fuseWithPrev(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
prev: InnerNode<K, V>) {
node.keys = prev.keys;
node.keys.push(prev.children[prev.children.length - 1].maxKey());
const child = node.children[0];
node.children = prev.children;
node.children.push(child);
// Fix keys/children in the parent
parent.keys.splice(indexOnParent - 1, 1);
parent.children.splice(indexOnParent - 1, 1);
}
// Additional pre-conditions:
// - next has >= 3 children
// - indexOnParent < parent.children.length - 1 since the index of next on
// parent is indexOnParent + 1
borrowFromNext(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
next: InnerNode<K, V>) {
const borrowCount = Math.ceil(next.children.length / 2);
// Copy keys/children from next
node.keys.push(node.children[node.children.length - 1].maxKey());
node.keys = node.keys.concat(next.keys.slice(0, borrowCount - 1));
const borrowed = next.children.slice(0, borrowCount);
node.children = node.children.concat(borrowed);
// Remove the borrowed keys/children from next
next.keys = next.keys.slice(borrowCount, next.keys.length);
next.children = next.children.slice(borrowCount, next.children.length);
// Fix keys/children in the parent
parent.keys[indexOnParent] = node.maxKey();
}
// Additional pre-conditions:
// - prev has >= 3 children
// - indexOnParent > 0 since the index of prev on parent is indexOnParent - 1
borrowFromPrev(
parent: InnerNode<K, V>,
node: InnerNode<K, V>,
indexOnParent: number,
prev: InnerNode<K, V>) {
const borrowStart = Math.ceil(prev.children.length / 2);
// Copy keys/children from prev
const borrowedKeys = prev.keys.slice(borrowStart, prev.keys.length);
borrowedKeys.push(prev.maxKey());
node.keys = borrowedKeys.concat(node.keys);
const borrowed = prev.children.slice(borrowStart, prev.children.length);
node.children = borrowed.concat(node.children);
// Remove the borrowed keys/children from prev
prev.keys = prev.keys.slice(0, borrowStart - 1);
prev.children = prev.children.slice(0, borrowStart);
// Fix keys/children in the parent
parent.keys[indexOnParent - 1] = prev.children[prev.children.length - 1].maxKey();
}
removeFirst(key: ?K): ?KeyValue<K, V> {
// While the root is an inner node containing only 1 children...
while (!this.root.values) {
const inner: InnerNode<K, V> = (this.root: any);
if (inner.children.length > 1) {
break;
}
// ...reduce the height of the tree.
this.root = inner.children[0];
}
let parent: InnerNode<K, V> = (null: any);
let indexOnParent = -1;
let node = this.root;
while (!node.values) {
const inner: InnerNode<K, V> = (node: any);
if (inner.children.length === 1) {
// Loop invariant: parent.keys is non-empty (i.e. parent.children.length >= 2)
const prev: ?InnerNode<K, V> = indexOnParent > 0 ?
(parent.children[indexOnParent - 1]: any) : null;
const next: ?InnerNode<K, V> = indexOnParent < parent.children.length - 1 ?
(parent.children[indexOnParent + 1]: any) : null;
// Try to fuse with a sibling or borrow nodes from it. Fusing is
// preferred over borrowing. The next sibling is preferred over the
// previous sibling. Either prev or next are non-null since the parent
// has more than one children.
let fused = false;
if (next) {
if (next.children.length < 3) {
this.fuseWithNext(parent, inner, indexOnParent, next);
fused = true;
}
} else if ((prev: any).children.length < 3) {
this.fuseWithPrev(parent, inner, indexOnParent, (prev: any));
fused = true;
}
if (!fused) {
if (next) {
this.borrowFromNext(parent, inner, indexOnParent, next);
} else {
this.borrowFromPrev(parent, inner, indexOnParent, (prev: any));
}
}
}
// Advance the search
indexOnParent = findUpperBoundKeyIndex(this.keyKind, inner.keys, key);
parent = inner;
node = parent.children[indexOnParent];
}
// Found a leaf. Remove the key-value pair if it exists.
let leaf: LeafNode<K, V> = (node: any);
const i = findUpperBoundIndex(this.keyKind, leaf.values, key);
const pair = leaf.values[i];
if (!pair || !eq(this.keyKind, key, pair.key)) {
return undefined;
}
leaf.values.splice(i, 1);
// Fix parent
if (leaf.values.length === 0 && parent) {
parent.keys.splice(Math.min(indexOnParent, parent.keys.length - 1), 1);
parent.children.splice(indexOnParent, 1);
}
return pair;
}
size() {
if (this.root.values) {
const leaf: LeafNode<K, V> = (this.root: any);
return leaf.values.length;
}
const inner: InnerNode<K, V> = (this.root: any);
return inner.size();
}
}
Btree.KEY_KIND_STRING = KEY_KIND_STRING;
Btree.KEY_KIND_NUMBER = KEY_KIND_NUMBER;
Btree.KEY_KIND_BOOL = KEY_KIND_BOOL;
Btree.KEY_KIND_RECORD = KEY_KIND_RECORD;
Btree.KeyValue = KeyValue;
Btree.InnerNode = InnerNode;
Btree.LeafNode = LeafNode;
Btree.detail = {
isAtomic,
lt,
eq,
findUpperBoundIndex,
findUpperBoundKeyIndex,
};
module.exports = Btree;
@felipecrv
Copy link
Author

This implementation allows empty nodes and less than half-full nodes, but due to top-down splitting/fusing/borrowing (in contrast with classical bottom-up fusing/splitting) it guarantees O(log N) tree height where N is the number of insertions.

More details in Deletion Without Rebalancing in Multiway Search Trees by Siddhartha Sen and Robert E. Tarjan from Microsoft Research.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment