Skip to content

Instantly share code, notes, and snippets.

@Groostav
Created October 12, 2021 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Groostav/d454f1711ffd98dbe6323ff7a112d79a to your computer and use it in GitHub Desktop.
Save Groostav/d454f1711ffd98dbe6323ff7a112d79a to your computer and use it in GitHub Desktop.
a first crack at an implementation of a reasonably compact matrix with fast indexOf(row) and contains(row) operations
package com.empowerops.jargon.model.opt2
import java.lang.IndexOutOfBoundsException
/**
* Why, why on gods green earth would you implement this geoff!?
*
* Ok, after studying the access patterns, we need fast contains and fast indexOf.
* fast contains is easily provided by hashMap,
* to get indexOf, you want a NavigableSet (indexOf(elem) == headSet(elem).size)
* note that you need a navigableSet on insertionOrder (which doesnt exist)
*
* I spent some time searching for custom collections,
* and there seems to be a fairly acceptable trend that 'if you're going to store objects,
* you can be noisy with pointers' Theres lots of self balancing trees that would probably
* get me the above requirements, but they do tend to over-allocate records and require
* lots of custom node type.
*
* I believe, and have tried to show here, that because we are storing `DoubleArray`s
* (IE `double[]`, notably _not `Array<Double>` aka `Double[]`_), we can do this compactly.
*/
class DoubleArrayHashList(
/**
* non-linearized array of double-precision-fp-rows with index-metadata, encoded as longs.
*
* using https://gregstoll.com/~gregstoll/floattohex/ to encode doubles
* ```
* 0x0: null //indicates unoccupied
* 0x1: [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] // indicates <1.2, 0.0> is at index 1; <0.0, 1.3> is at index 2
* 0x2: [ 0L, 0x3FF199999999999A, 0x0..0 ] // note 1.1 is 0x3FF19..9A; implicitly a record <0L, 1.1, 0.0> => indicates that <1.1, 0.0> is in the list at index 0
* 0x3: null
* ```
*
* terminology:
* - 0x1 is a binNumber (binNo), and is a masked hashcode.
* - [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] is a bin
* - [ 2L, 0x3FF4C..CD, 0x0..0 ] is a record
*/
private val binByHash: Array<LongArray?>,
private val hashByInsertionIndex: IntArray,
override val size: Int,
val width: Int
): AbstractCollection<DoubleArray>() {
private val mask get() = binByHash.size - 1
private val recordLength get() = NODE_HEADER_METADATA_CELL_COUNT + width
companion object {
val EMPTY = DoubleArrayHashList(
emptyArray<LongArray?>(),
intArrayOf(),
0,
0
)
val LOAD_FACTOR = 0.6
val NODE_HEADER_METADATA_CELL_COUNT = 1
val INDEX_OFFSET = 0
val ELEMENT_START_OFFSET = 1
fun of(vararg elements: DoubleArray): DoubleArrayHashList = EMPTY.plusAll(elements.asIterable())
}
override operator fun contains(element: DoubleArray) = indexOf(element) != -1
operator fun get(index: Int): DoubleArray {
if (index < 0 || index >= size) throw IndexOutOfBoundsException("index=$index; size=$size")
val binNo = hashByInsertionIndex[index] and mask
foreachRecord(binNo) { elemIndex, element ->
if (index == elemIndex) return element
}
throw NoSuchElementException("index=$index in size=$size")
}
fun indexOf(element: DoubleArray): Int {
val binNo = hash(element) and mask
foreachRecord(binNo) { index, candidateElement ->
if(element.contentEquals(candidateElement)){
return index
}
}
return -1
}
private inline fun foreachRecord(binNo: Int, block: (index: Int, elem: DoubleArray) -> Unit){
val bin = binByHash[binNo]!!
for(elementIndex in 0 until binByHash.elementCount(binNo)){
val recordStart = elementIndex * recordLength
val index = bin[recordStart + INDEX_OFFSET].toInt()
val element = bin.sliceToDoubleArray(recordStart + ELEMENT_START_OFFSET, recordStart + ELEMENT_START_OFFSET + width)
block(index, element)
}
}
override fun iterator() = object: Iterator<DoubleArray> {
var current = 0
override fun hasNext() = current < size
override fun next() = get(current++)
}
fun plusAll(other: Iterable<DoubleArray>) = other.fold(EMPTY, DoubleArrayHashList::plus)
operator fun plus(newElement: DoubleArray): DoubleArrayHashList {
require(width == 0 || newElement.size == width)
val width = newElement.size
val insertedIndex = size
return when {
this === EMPTY -> {
val contentsByHash = arrayOfNulls<LongArray>(16)
val hash = hash(newElement)
val binNo = hash % contentsByHash.size
contentsByHash.set(binNo, 0, newElement)
DoubleArrayHashList(contentsByHash, intArrayOf(hash), 1, width)
}
(size + 1).toFloat() < binByHash.size * LOAD_FACTOR -> {
val newContentsByHash = binByHash.copyOf()
val newHashesByInsertionIndex = hashByInsertionIndex.copyOf(hashByInsertionIndex.size + 1)
val newElementHash = hash(newElement)
val mask = newContentsByHash.size - 1
val binNo = newElementHash and mask
newContentsByHash.set(binNo, insertedIndex, newElement)
newHashesByInsertionIndex[insertedIndex] = newElementHash
DoubleArrayHashList(newContentsByHash, newHashesByInsertionIndex, size + 1, width)
}
else -> { //we need to double the size, copy things over, add the new element.
val oldCapacity = binByHash.size
val oldSize = size
val newContentsByBinNo = arrayOfNulls<LongArray>(oldCapacity * 2)
val newHashesByInsertionIndex = IntArray(size + 1)
check(newContentsByBinNo.size % 16 == 0) { "must be an even multiple of 16 else masking wont work!" }
val oldMask = binByHash.size - 1
val newMask = newContentsByBinNo.size - 1
var copyIndex = 0
while(copyIndex < oldSize){
val hash = hashByInsertionIndex[copyIndex]
val record = get(copyIndex)
val newBinNo = hash and newMask
newContentsByBinNo.set(newBinNo, copyIndex, record)
newHashesByInsertionIndex[copyIndex] = hash
copyIndex += 1
}
val newElementHash = hash(newElement)
newContentsByBinNo.set(newElementHash and newMask, insertedIndex, newElement)
newHashesByInsertionIndex[insertedIndex] = newElementHash
DoubleArrayHashList(newContentsByBinNo, newHashesByInsertionIndex, size + 1, width)
}
}
}
// in my own testing there is a _ton_ of colission, not sure why,
// looked into hash map, notice they dont use the default hashcode,
// they very quickly stack the top 16bits on the lower 16 bits to improve spread;
// so im doing the same and its helped pretty enormously
private fun hash(newElement: DoubleArray) = newElement.contentHashCode().let { it xor (it ushr 16) }
private fun Array<LongArray?>.elementCount(binNo: Int) = (this[binNo]?.size ?: 0) / (NODE_HEADER_METADATA_CELL_COUNT + width)
private fun LongArray.sliceToDoubleArray(start: Int, endExclusive: Int): DoubleArray {
val result = DoubleArray(width)
for(index in 0 until endExclusive-start){
val offset = start + index
val doubleValue = java.lang.Double.longBitsToDouble(this[offset])
result[index] = doubleValue
}
// System.arraycopy(this, start, result, 0, endExclusive-start)
return result
}
private fun Array<LongArray?>.set(binNo: Int, indexHead: Int, element: DoubleArray){
require(width == 0 || element.size == width)
val width = element.size
val recordLength = width + NODE_HEADER_METADATA_CELL_COUNT
when(val occupant = this[binNo]) {
null -> {
val singleRecordBin = LongArray(recordLength)
singleRecordBin[0] = indexHead.toLong()
for(index in 0 until width) {
singleRecordBin[NODE_HEADER_METADATA_CELL_COUNT + index] = java.lang.Double.doubleToLongBits(element[index])
}
this[binNo] = singleRecordBin
}
else -> {
val newRecord = LongArray(recordLength)
newRecord[0] = indexHead.toLong()
for(index in 0 until width) {
val newBits = java.lang.Double.doubleToLongBits(element[index])
newRecord[ELEMENT_START_OFFSET + index] = newBits
}
val newOccupant = occupant + newRecord
this[binNo] = newOccupant
}
}
}
override fun toString(): String {
return "DoubleArrayHashList(binByHash=${binByHash.contentToString()}, hashByInsertionIndex=${hashByInsertionIndex.contentToString()}, size=$size, width=$width, mask=$mask, recordLength=$recordLength)"
}
}
private object EmptyIterator: Iterator<Nothing>{
override fun hasNext() = false
override fun next() = throw NoSuchElementException()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment