Created
July 17, 2017 16:34
-
-
Save ejona86/1b0b8f791b72d94abd3bd84d7dcab978 to your computer and use it in GitHub Desktop.
Persistent map data structures
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Persistent (copy-on-write) B+tree of order 3. 2-3 trees are normally | |
* b-trees, not b+trees. Like all b+trees, values are only stored on the leaves. | |
* | |
* <p>B+tree generally has lower insertion cost (in terms of memory allocated) | |
* than b-tree, even when considering it's increased depth. That is not | |
* considering b-tree's lower replacement cost for values stored in internal | |
* nodes. | |
*/ | |
public class PersistentBPlus3Tree { | |
private final Node root; | |
// Height of internal nodes | |
private final int height; | |
public PersistentBPlus3Tree() { | |
this(null, 0); | |
} | |
private PersistentBPlus3Tree(Node root, int height) { | |
this.root = root; | |
this.height = height; | |
} | |
public Object find(long key) { | |
if (root == null) { | |
return null; | |
} | |
Node node = root; | |
for (int i = 0; i < height; i++) { | |
node = node.findInternal(key); | |
if (node == null) { | |
return null; | |
} | |
} | |
return node.findLeaf(key); | |
} | |
public PersistentBPlus3Tree insert(long key, Object value) { | |
Overflow overflow = new Overflow(); | |
Node node = root.insert(key, value, overflow, height); | |
if (!overflow.isSet()) { | |
return new PersistentBPlus3Tree(node, height); | |
} else { | |
return new PersistentBPlus3Tree(new Node(node, overflow.key, overflow.node, 0, null), height + 1); | |
} | |
} | |
// key == 0 is reserved for "not there" | |
private static class Node { | |
private final long key1; | |
private final long key2; | |
private final Object value1; | |
private final Object value2; | |
private final Object value3; | |
private Node(Object value1, long key1, Object value2, long key2, Object value3) { | |
this.key1 = key1; | |
this.key2 = key2; | |
this.value1 = value1; | |
this.value2 = value2; | |
this.value3 = value3; | |
} | |
private Object findLeaf(long key) { | |
if (key == key1) { | |
return value1; | |
} else if (key == key2) { | |
return value2; | |
} else { | |
assert value3 == null; | |
return null; | |
} | |
} | |
private Node findInternal(long key) { | |
if (key < key1) { | |
return (Node) value1; | |
} else if (key < key2 || key2 == 0) { | |
return (Node) value2; | |
} else { | |
return (Node) value3; | |
} | |
} | |
private Node insert(long key, Object value, Overflow overflow, int recurse) { | |
if (recurse == 0) { | |
return insertLeaf(key, value, overflow); | |
} else { | |
return insertInternal(key, value, overflow, recurse); | |
} | |
} | |
private Node insertLeaf(long key, Object value, Overflow overflow) { | |
// Replacements | |
if (key == key1) { | |
return new Node(value, key, value2, key2, null); | |
} else if (key == key2) { | |
return new Node(value1, key1, value, key, null); | |
} | |
// Insertions | |
if (key2 == 0) { | |
// space available | |
if (key < key1) { | |
return new Node(value, key, value1, key1, null); | |
} else { | |
return new Node(value1, key1, value, key, null); | |
} | |
} else { | |
if (key < key1) { | |
overflow.set(key1, this); | |
return new Node(value, key, null, 0, null); | |
} else if (key < key2) { | |
overflow.set(key2, new Node(value2, key2, null, 0, null)); | |
return new Node(value1, key1, value, key, null); | |
} else { | |
overflow.set(key, new Node(value, key, null, 0, null)); | |
return this; | |
} | |
} | |
} | |
private Node insertInternal(long key, Object value, Overflow overflow, int recurse) { | |
Overflow childOverflow = new Overflow(); | |
if (key < key1) { | |
Node node = ((Node) value1).insert(key, value, childOverflow, recurse - 1); | |
if (!childOverflow.isSet()) { | |
return new Node(node, key1, value2, key2, value3); | |
} else { | |
return create( | |
node, childOverflow.key, childOverflow.node, key1, value2, key2, value3, overflow); | |
} | |
} else if (key < key2 || key2 == 0) { | |
Node node = ((Node) value2).insert(key, value, childOverflow, recurse - 1); | |
if (!childOverflow.isSet()) { | |
return new Node(value1, key1, node, key2, value3); | |
} else { | |
return create( | |
value1, key1, node, childOverflow.key, childOverflow.node, key2, value3, overflow); | |
} | |
} else { | |
Node node = ((Node) value3).insert(key, value, childOverflow, recurse - 1); | |
if (!childOverflow.isSet()) { | |
return new Node(value1, key1, value2, key2, node); | |
} else { | |
return split( | |
value1, key1, node, childOverflow.key, childOverflow.node, key2, value3, overflow); | |
} | |
} | |
} | |
private static Node create( | |
Object value1, | |
long key1, Object value2, | |
long key2, Object value3, | |
long key3, Object value4, | |
Overflow overflow) { | |
if (key3 == 0) { | |
return new Node(value1, key1, value2, key2, value3); | |
} else { | |
return split(value1, key1, value2, key2, value3, key3, value4, overflow); | |
} | |
} | |
private static Node split( | |
Object value1, | |
long key1, Object value2, | |
long key2, Object value3, | |
long key3, Object value4, | |
Overflow overflow) { | |
overflow.set(key2, new Node(value3, key3, value4, 0, null)); | |
return new Node(value1, key1, value2, 0, null); | |
} | |
} | |
private static class Overflow { | |
long key; | |
Node node; | |
public boolean isSet() { | |
return key != 0; | |
} | |
public void set(long key, Node node) { | |
this.key = key; | |
this.node = node; | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Arrays; | |
/** | |
* A persistent (copy-on-write) hash tree/trie. Collisions are handled | |
* linearly. Delete is not supported, but replacement is. The implementation | |
* favors simplicity and low memory allocation during insertion. Although the | |
* asymptotics are good, it is optimized for small sizes like less than 20; | |
* "unbelievably large" would be 100. | |
* | |
* <p>Inspired by popcnt-based compression seen in Ideal Hash Trees, Phil | |
* Bagwell (2000). The rest of the implementation is ignorant of/ignores the | |
* paper. | |
*/ | |
public class PersistentHashArrayMappedTrie<K,V> { | |
private final Node<K,V> root; | |
public PersistentHashArrayMappedTrie() { | |
this(null); | |
} | |
private PersistentHashArrayMappedTrie(Node<K,V> root) { | |
this.root = root; | |
} | |
public V get(K key) { | |
if (root == null) { | |
return null; | |
} | |
return root.get(key, key.hashCode(), 0); | |
} | |
public PersistentHashArrayMappedTrie<K,V> put(K key, V value) { | |
if (root == null) { | |
return new PersistentHashArrayMappedTrie<K,V>(new Leaf<K,V>(key, value)); | |
} else { | |
return new PersistentHashArrayMappedTrie<K,V>(root.put(key, value, key.hashCode(), 0)); | |
} | |
} | |
private static class Leaf<K,V> implements Node<K,V> { | |
private final K key; | |
private final V value; | |
public Leaf(K key, V value) { | |
this.key = key; | |
this.value = value; | |
} | |
@Override | |
public V get(K key, int hash, int bitsConsumed) { | |
if (this.key == key) { | |
return value; | |
} else { | |
return null; | |
} | |
} | |
@Override | |
public Node<K,V> put(K key, V value, int hash, int bitsConsumed) { | |
int thisHash = key.hashCode(); | |
if (thisHash != hash) { | |
// Insert | |
return CompressedIndex.combine( | |
new Leaf<K,V>(key, value), hash, this, thisHash, bitsConsumed); | |
} else if (this.key == key) { | |
// Replace | |
return new Leaf<K,V>(key, value); | |
} else { | |
// Hash collision | |
return new CollisionLeaf<K,V>(this.key, this.value, key, value); | |
} | |
} | |
} | |
private static class CollisionLeaf<K,V> implements Node<K,V> { | |
// All keys must have same hash, but not have the same reference | |
private final K[] keys; | |
private final V[] values; | |
@SuppressWarnings("unchecked") | |
public CollisionLeaf(K key1, V value1, K key2, V value2) { | |
this((K[]) new Object[] {key1, key2}, (V[]) new Object[] {value1, value2}); | |
assert key1 != key2; | |
assert key1.hashCode() == key2.hashCode(); | |
} | |
private CollisionLeaf(K[] keys, V[] values) { | |
this.keys = keys; | |
this.values = values; | |
} | |
@Override | |
public V get(K key, int hash, int bitsConsumed) { | |
for (int i = 0; i < keys.length; i++) { | |
if (keys[i] == key) { | |
return values[i]; | |
} | |
} | |
return null; | |
} | |
@Override | |
public Node<K,V> put(K key, V value, int hash, int bitsConsumed) { | |
int thisHash = keys[0].hashCode(); | |
int keyIndex; | |
if (thisHash != hash) { | |
// Insert | |
return CompressedIndex.combine( | |
new Leaf<K,V>(key, value), hash, this, thisHash, bitsConsumed); | |
} else if ((keyIndex = indexOfKey(key)) != -1) { | |
// Replace | |
K[] newKeys = Arrays.copyOf(keys, keys.length); | |
V[] newValues = Arrays.copyOf(values, keys.length); | |
newKeys[keyIndex] = key; | |
newValues[keyIndex] = value; | |
return new CollisionLeaf<K,V>(newKeys, newValues); | |
} else { | |
// Yet another hash collision | |
K[] newKeys = Arrays.copyOf(keys, keys.length + 1); | |
V[] newValues = Arrays.copyOf(values, keys.length + 1); | |
newKeys[keys.length] = key; | |
newValues[keys.length] = value; | |
return new CollisionLeaf<K,V>(newKeys, newValues); | |
} | |
} | |
// -1 if not found | |
private int indexOfKey(K key) { | |
for (int i = 0; i < keys.length; i++) { | |
if (keys[i] == key) { | |
return i; | |
} | |
} | |
return -1; | |
} | |
} | |
private static class CompressedIndex<K,V> implements Node<K,V> { | |
private static final int BITS = 5; | |
private static final int BITS_MASK = 0x1F; | |
private final int bitmap; | |
private final Node<K,V>[] values; | |
private CompressedIndex(int bitmap, Node<K,V>[] values) { | |
this.bitmap = bitmap; | |
this.values = values; | |
} | |
@Override | |
public V get(K key, int hash, int bitsConsumed) { | |
int indexBit = indexBit(hash, bitsConsumed); | |
if ((bitmap & indexBit) == 0) { | |
return null; | |
} | |
return values[compressedIndex(indexBit)].get(key, hash, bitsConsumed + BITS); | |
} | |
@Override | |
public Node<K,V> put(K key, V value, int hash, int bitsConsumed) { | |
int indexBit = indexBit(hash, bitsConsumed); | |
int compressedIndex = compressedIndex(indexBit); | |
if ((bitmap & indexBit) == 0) { | |
// Insert | |
int newBitmap = bitmap | indexBit; | |
@SuppressWarnings("unchecked") | |
Node<K,V>[] newValues = (Node<K,V>[]) new Node<?,?>[values.length + 1]; | |
System.arraycopy(values, 0, newValues, 0, compressedIndex); | |
newValues[compressedIndex] = new Leaf<K,V>(key, value); | |
System.arraycopy( | |
values, compressedIndex, newValues, compressedIndex+1, values.length - compressedIndex); | |
return new CompressedIndex<K,V>(newBitmap, newValues); | |
} else { | |
// Replace | |
Node<K,V>[] newValues = Arrays.copyOf(values, values.length); | |
newValues[compressedIndex] = | |
values[compressedIndex].put(key, value, hash, bitsConsumed + BITS); | |
return new CompressedIndex<K,V>(bitmap, newValues); | |
} | |
} | |
private static <K,V> Node<K,V> combine( | |
Node<K,V> node1, int hash1, Node<K,V> node2, int hash2, int bitsConsumed) { | |
assert hash1 != hash2; | |
int indexBit1 = indexBit(hash1, bitsConsumed); | |
int indexBit2 = indexBit(hash2, bitsConsumed); | |
if (indexBit1 == indexBit2) { | |
Node<K,V> node = combine(node1, hash1, node2, hash2, bitsConsumed + BITS); | |
@SuppressWarnings("unchecked") | |
Node<K,V>[] values = (Node<K,V>[]) new Node<?,?>[] {node}; | |
return new CompressedIndex<K,V>(indexBit1, values); | |
} else { | |
// Make node1 the smallest | |
if (hash1 > hash2) { | |
Node<K,V> nodeCopy = node1; | |
node1 = node2; | |
node2 = nodeCopy; | |
} | |
@SuppressWarnings("unchecked") | |
Node<K,V>[] values = (Node<K,V>[]) new Node<?,?>[] {node1, node2}; | |
return new CompressedIndex<K,V>(indexBit1 | indexBit2, values); | |
} | |
} | |
private static int indexBit(int hash, int bitsConsumed) { | |
int uncompressedIndex = (hash >>> bitsConsumed) & BITS_MASK; | |
return 1 << uncompressedIndex; | |
} | |
private int compressedIndex(int indexBit) { | |
return Integer.bitCount(bitmap & (indexBit - 1)); | |
} | |
} | |
private interface Node<K,V> { | |
public V get(K key, int hash, int bitsConsumed); | |
public Node<K,V> put(K key, V value, int hash, int bitsConsumed); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment