Created
October 12, 2021 20:01
-
-
Save Groostav/d454f1711ffd98dbe6323ff7a112d79a to your computer and use it in GitHub Desktop.
a first crack at an implementation of a reasonably compact matrix with fast indexOf(row) and contains(row) operations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.empowerops.jargon.model.opt2 | |
import java.lang.IndexOutOfBoundsException | |
/** | |
* Why, why on gods green earth would you implement this geoff!? | |
* | |
* Ok, after studying the access patterns, we need fast contains and fast indexOf. | |
* fast contains is easily provided by hashMap, | |
* to get indexOf, you want a NavigableSet (indexOf(elem) == headSet(elem).size) | |
* note that you need a navigableSet on insertionOrder (which doesnt exist) | |
* | |
* I spent some time searching for custom collections, | |
* and there seems to be a fairly acceptable trend that 'if you're going to store objects, | |
* you can be noisy with pointers' Theres lots of self balancing trees that would probably | |
* get me the above requirements, but they do tend to over-allocate records and require | |
* lots of custom node type. | |
* | |
* I believe, and have tried to show here, that because we are storing `DoubleArray`s | |
* (IE `double[]`, notably _not `Array<Double>` aka `Double[]`_), we can do this compactly. | |
*/ | |
class DoubleArrayHashList( | |
/** | |
* non-linearized array of double-precision-fp-rows with index-metadata, encoded as longs. | |
* | |
* using https://gregstoll.com/~gregstoll/floattohex/ to encode doubles | |
* ``` | |
* 0x0: null //indicates unoccupied | |
* 0x1: [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] // indicates <1.2, 0.0> is at index 1; <0.0, 1.3> is at index 2 | |
* 0x2: [ 0L, 0x3FF199999999999A, 0x0..0 ] // note 1.1 is 0x3FF19..9A; implicitly a record <0L, 1.1, 0.0> => indicates that <1.1, 0.0> is in the list at index 0 | |
* 0x3: null | |
* ``` | |
* | |
* terminology: | |
* - 0x1 is a binNumber (binNo), and is a masked hashcode. | |
* - [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] is a bin | |
* - [ 2L, 0x3FF4C..CD, 0x0..0 ] is a record | |
*/ | |
private val binByHash: Array<LongArray?>, | |
private val hashByInsertionIndex: IntArray, | |
override val size: Int, | |
val width: Int | |
): AbstractCollection<DoubleArray>() { | |
private val mask get() = binByHash.size - 1 | |
private val recordLength get() = NODE_HEADER_METADATA_CELL_COUNT + width | |
companion object { | |
val EMPTY = DoubleArrayHashList( | |
emptyArray<LongArray?>(), | |
intArrayOf(), | |
0, | |
0 | |
) | |
val LOAD_FACTOR = 0.6 | |
val NODE_HEADER_METADATA_CELL_COUNT = 1 | |
val INDEX_OFFSET = 0 | |
val ELEMENT_START_OFFSET = 1 | |
fun of(vararg elements: DoubleArray): DoubleArrayHashList = EMPTY.plusAll(elements.asIterable()) | |
} | |
override operator fun contains(element: DoubleArray) = indexOf(element) != -1 | |
operator fun get(index: Int): DoubleArray { | |
if (index < 0 || index >= size) throw IndexOutOfBoundsException("index=$index; size=$size") | |
val binNo = hashByInsertionIndex[index] and mask | |
foreachRecord(binNo) { elemIndex, element -> | |
if (index == elemIndex) return element | |
} | |
throw NoSuchElementException("index=$index in size=$size") | |
} | |
fun indexOf(element: DoubleArray): Int { | |
val binNo = hash(element) and mask | |
foreachRecord(binNo) { index, candidateElement -> | |
if(element.contentEquals(candidateElement)){ | |
return index | |
} | |
} | |
return -1 | |
} | |
private inline fun foreachRecord(binNo: Int, block: (index: Int, elem: DoubleArray) -> Unit){ | |
val bin = binByHash[binNo]!! | |
for(elementIndex in 0 until binByHash.elementCount(binNo)){ | |
val recordStart = elementIndex * recordLength | |
val index = bin[recordStart + INDEX_OFFSET].toInt() | |
val element = bin.sliceToDoubleArray(recordStart + ELEMENT_START_OFFSET, recordStart + ELEMENT_START_OFFSET + width) | |
block(index, element) | |
} | |
} | |
override fun iterator() = object: Iterator<DoubleArray> { | |
var current = 0 | |
override fun hasNext() = current < size | |
override fun next() = get(current++) | |
} | |
fun plusAll(other: Iterable<DoubleArray>) = other.fold(EMPTY, DoubleArrayHashList::plus) | |
operator fun plus(newElement: DoubleArray): DoubleArrayHashList { | |
require(width == 0 || newElement.size == width) | |
val width = newElement.size | |
val insertedIndex = size | |
return when { | |
this === EMPTY -> { | |
val contentsByHash = arrayOfNulls<LongArray>(16) | |
val hash = hash(newElement) | |
val binNo = hash % contentsByHash.size | |
contentsByHash.set(binNo, 0, newElement) | |
DoubleArrayHashList(contentsByHash, intArrayOf(hash), 1, width) | |
} | |
(size + 1).toFloat() < binByHash.size * LOAD_FACTOR -> { | |
val newContentsByHash = binByHash.copyOf() | |
val newHashesByInsertionIndex = hashByInsertionIndex.copyOf(hashByInsertionIndex.size + 1) | |
val newElementHash = hash(newElement) | |
val mask = newContentsByHash.size - 1 | |
val binNo = newElementHash and mask | |
newContentsByHash.set(binNo, insertedIndex, newElement) | |
newHashesByInsertionIndex[insertedIndex] = newElementHash | |
DoubleArrayHashList(newContentsByHash, newHashesByInsertionIndex, size + 1, width) | |
} | |
else -> { //we need to double the size, copy things over, add the new element. | |
val oldCapacity = binByHash.size | |
val oldSize = size | |
val newContentsByBinNo = arrayOfNulls<LongArray>(oldCapacity * 2) | |
val newHashesByInsertionIndex = IntArray(size + 1) | |
check(newContentsByBinNo.size % 16 == 0) { "must be an even multiple of 16 else masking wont work!" } | |
val oldMask = binByHash.size - 1 | |
val newMask = newContentsByBinNo.size - 1 | |
var copyIndex = 0 | |
while(copyIndex < oldSize){ | |
val hash = hashByInsertionIndex[copyIndex] | |
val record = get(copyIndex) | |
val newBinNo = hash and newMask | |
newContentsByBinNo.set(newBinNo, copyIndex, record) | |
newHashesByInsertionIndex[copyIndex] = hash | |
copyIndex += 1 | |
} | |
val newElementHash = hash(newElement) | |
newContentsByBinNo.set(newElementHash and newMask, insertedIndex, newElement) | |
newHashesByInsertionIndex[insertedIndex] = newElementHash | |
DoubleArrayHashList(newContentsByBinNo, newHashesByInsertionIndex, size + 1, width) | |
} | |
} | |
} | |
// in my own testing there is a _ton_ of colission, not sure why, | |
// looked into hash map, notice they dont use the default hashcode, | |
// they very quickly stack the top 16bits on the lower 16 bits to improve spread; | |
// so im doing the same and its helped pretty enormously | |
private fun hash(newElement: DoubleArray) = newElement.contentHashCode().let { it xor (it ushr 16) } | |
private fun Array<LongArray?>.elementCount(binNo: Int) = (this[binNo]?.size ?: 0) / (NODE_HEADER_METADATA_CELL_COUNT + width) | |
private fun LongArray.sliceToDoubleArray(start: Int, endExclusive: Int): DoubleArray { | |
val result = DoubleArray(width) | |
for(index in 0 until endExclusive-start){ | |
val offset = start + index | |
val doubleValue = java.lang.Double.longBitsToDouble(this[offset]) | |
result[index] = doubleValue | |
} | |
// System.arraycopy(this, start, result, 0, endExclusive-start) | |
return result | |
} | |
private fun Array<LongArray?>.set(binNo: Int, indexHead: Int, element: DoubleArray){ | |
require(width == 0 || element.size == width) | |
val width = element.size | |
val recordLength = width + NODE_HEADER_METADATA_CELL_COUNT | |
when(val occupant = this[binNo]) { | |
null -> { | |
val singleRecordBin = LongArray(recordLength) | |
singleRecordBin[0] = indexHead.toLong() | |
for(index in 0 until width) { | |
singleRecordBin[NODE_HEADER_METADATA_CELL_COUNT + index] = java.lang.Double.doubleToLongBits(element[index]) | |
} | |
this[binNo] = singleRecordBin | |
} | |
else -> { | |
val newRecord = LongArray(recordLength) | |
newRecord[0] = indexHead.toLong() | |
for(index in 0 until width) { | |
val newBits = java.lang.Double.doubleToLongBits(element[index]) | |
newRecord[ELEMENT_START_OFFSET + index] = newBits | |
} | |
val newOccupant = occupant + newRecord | |
this[binNo] = newOccupant | |
} | |
} | |
} | |
override fun toString(): String { | |
return "DoubleArrayHashList(binByHash=${binByHash.contentToString()}, hashByInsertionIndex=${hashByInsertionIndex.contentToString()}, size=$size, width=$width, mask=$mask, recordLength=$recordLength)" | |
} | |
} | |
private object EmptyIterator: Iterator<Nothing>{ | |
override fun hasNext() = false | |
override fun next() = throw NoSuchElementException() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment