Groostav/DoubleArrayHashList.kt

## DoubleArrayHashList.kt
package com.empowerops.jargon.model.opt2

import java.lang.IndexOutOfBoundsException

/**
 * Why, why on gods green earth would you implement this geoff!?
 *
 * Ok, after studying the access patterns, we need fast contains and fast indexOf.
 * fast contains is easily provided by hashMap,
 * to get indexOf, you want a NavigableSet (indexOf(elem) == headSet(elem).size)
 * note that you need a navigableSet on insertionOrder (which doesnt exist)
 *
 * I spent some time searching for custom collections,
 * and there seems to be a fairly acceptable trend that 'if you're going to store objects,
 * you can be noisy with pointers' Theres lots of self balancing trees that would probably
 * get me the above requirements, but they do tend to over-allocate records and require
 * lots of custom node type.
 *
 * I believe, and have tried to show here, that because we are storing `DoubleArray`s
 * (IE `double[]`, notably _not `Array<Double>` aka `Double[]`_), we can do this compactly.
 */
class DoubleArrayHashList(
        /**
         * non-linearized array of double-precision-fp-rows with index-metadata, encoded as longs.
         *
         * using https://gregstoll.com/~gregstoll/floattohex/ to encode doubles
         * ```
         * 0x0: null //indicates unoccupied
         * 0x1: [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] // indicates <1.2, 0.0> is at index 1; <0.0, 1.3> is at index 2
         * 0x2: [ 0L, 0x3FF199999999999A, 0x0..0 ] // note 1.1 is 0x3FF19..9A; implicitly a record <0L, 1.1, 0.0> => indicates that <1.1, 0.0> is in the list at index 0
         * 0x3: null
         * ```
         *
         * terminology:
         * - 0x1 is a binNumber (binNo), and is a masked hashcode.
         * - [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] is a bin
         * - [ 2L, 0x3FF4C..CD, 0x0..0 ] is a record
         */
        private val binByHash: Array<LongArray?>,
        private val hashByInsertionIndex: IntArray,
        override val size: Int,
        val width: Int
): AbstractCollection<DoubleArray>() {

    private val mask get() = binByHash.size - 1
    private val recordLength get() = NODE_HEADER_METADATA_CELL_COUNT + width

    companion object {
        val EMPTY = DoubleArrayHashList(
                emptyArray<LongArray?>(),
                intArrayOf(),
                0,
                0
        )

        val LOAD_FACTOR = 0.6

        val NODE_HEADER_METADATA_CELL_COUNT = 1
        val INDEX_OFFSET = 0
        val ELEMENT_START_OFFSET = 1

        fun of(vararg elements: DoubleArray): DoubleArrayHashList = EMPTY.plusAll(elements.asIterable())
    }

    override operator fun contains(element: DoubleArray) = indexOf(element) != -1

    operator fun get(index: Int): DoubleArray {
        if (index < 0 || index >= size) throw IndexOutOfBoundsException("index=$index; size=$size")
        val binNo = hashByInsertionIndex[index] and mask

        foreachRecord(binNo) { elemIndex, element ->
            if (index == elemIndex) return element
        }

        throw NoSuchElementException("index=$index in size=$size")
    }

    fun indexOf(element: DoubleArray): Int {
        val binNo = hash(element) and mask

        foreachRecord(binNo) { index, candidateElement ->
            if(element.contentEquals(candidateElement)){
                return index
            }
        }

        return -1
    }

    private inline fun foreachRecord(binNo: Int, block: (index: Int, elem: DoubleArray) -> Unit){

        val bin = binByHash[binNo]!!

        for(elementIndex in 0 until binByHash.elementCount(binNo)){
            val recordStart = elementIndex * recordLength
            val index = bin[recordStart + INDEX_OFFSET].toInt()
            val element = bin.sliceToDoubleArray(recordStart + ELEMENT_START_OFFSET, recordStart + ELEMENT_START_OFFSET + width)

            block(index, element)
        }

    }

    override fun iterator() = object: Iterator<DoubleArray> {
        var current = 0
        override fun hasNext() = current < size
        override fun next() = get(current++)
    }

    fun plusAll(other: Iterable<DoubleArray>) = other.fold(EMPTY, DoubleArrayHashList::plus)

    operator fun plus(newElement: DoubleArray): DoubleArrayHashList {
        require(width == 0 || newElement.size == width)
        val width = newElement.size

        val insertedIndex = size

        return when {
            this === EMPTY -> {
                val contentsByHash = arrayOfNulls<LongArray>(16)
                val hash = hash(newElement)
                val binNo = hash % contentsByHash.size
                contentsByHash.set(binNo, 0, newElement)
                DoubleArrayHashList(contentsByHash, intArrayOf(hash), 1, width)
            }
            (size + 1).toFloat() < binByHash.size * LOAD_FACTOR -> {
                val newContentsByHash = binByHash.copyOf()
                val newHashesByInsertionIndex = hashByInsertionIndex.copyOf(hashByInsertionIndex.size + 1)

                val newElementHash = hash(newElement)
                val mask = newContentsByHash.size - 1
                val binNo = newElementHash and mask

                newContentsByHash.set(binNo, insertedIndex, newElement)

                newHashesByInsertionIndex[insertedIndex] = newElementHash
                DoubleArrayHashList(newContentsByHash, newHashesByInsertionIndex, size + 1, width)
            }
            else -> { //we need to double the size, copy things over, add the new element.
                val oldCapacity = binByHash.size
                val oldSize = size
                val newContentsByBinNo = arrayOfNulls<LongArray>(oldCapacity * 2)
                val newHashesByInsertionIndex = IntArray(size + 1)

                check(newContentsByBinNo.size % 16 == 0) { "must be an even multiple of 16 else masking wont work!" }
                val oldMask = binByHash.size - 1
                val newMask = newContentsByBinNo.size - 1

                var copyIndex = 0
                while(copyIndex < oldSize){
                    val hash = hashByInsertionIndex[copyIndex]
                    val record = get(copyIndex)
                    val newBinNo = hash and newMask

                    newContentsByBinNo.set(newBinNo, copyIndex, record)
                    newHashesByInsertionIndex[copyIndex] = hash

                    copyIndex += 1
                }

                val newElementHash = hash(newElement)
                newContentsByBinNo.set(newElementHash and newMask, insertedIndex, newElement)
                newHashesByInsertionIndex[insertedIndex] = newElementHash

                DoubleArrayHashList(newContentsByBinNo, newHashesByInsertionIndex, size + 1, width)
            }
        }
    }

    // in my own testing there is a _ton_ of colission, not sure why,
    // looked into hash map, notice they dont use the default hashcode,
    // they very quickly stack the top 16bits on the lower 16 bits to improve spread;
    // so im doing the same and its helped pretty enormously
    private fun hash(newElement: DoubleArray) = newElement.contentHashCode().let { it xor (it ushr 16) }

    private fun Array<LongArray?>.elementCount(binNo: Int) = (this[binNo]?.size ?: 0) / (NODE_HEADER_METADATA_CELL_COUNT + width)

    private fun LongArray.sliceToDoubleArray(start: Int, endExclusive: Int): DoubleArray {

        val result = DoubleArray(width)

        for(index in 0 until endExclusive-start){
            val offset = start + index
            val doubleValue = java.lang.Double.longBitsToDouble(this[offset])
            result[index] = doubleValue
        }

//        System.arraycopy(this, start, result, 0, endExclusive-start)

        return result
    }

    private fun Array<LongArray?>.set(binNo: Int, indexHead: Int, element: DoubleArray){
        require(width == 0 || element.size == width)
        val width = element.size
        val recordLength = width + NODE_HEADER_METADATA_CELL_COUNT

        when(val occupant = this[binNo]) {
            null -> {
                val singleRecordBin = LongArray(recordLength)

                singleRecordBin[0] = indexHead.toLong()
                for(index in 0 until width) {
                    singleRecordBin[NODE_HEADER_METADATA_CELL_COUNT + index] = java.lang.Double.doubleToLongBits(element[index])
                }

                this[binNo] = singleRecordBin
            }
            else -> {
                val newRecord = LongArray(recordLength)

                newRecord[0] = indexHead.toLong()
                for(index in 0 until width) {
                    val newBits = java.lang.Double.doubleToLongBits(element[index])
                    newRecord[ELEMENT_START_OFFSET + index] = newBits
                }

                val newOccupant = occupant + newRecord

                this[binNo] = newOccupant
            }
        }
    }

    override fun toString(): String {
        return "DoubleArrayHashList(binByHash=${binByHash.contentToString()}, hashByInsertionIndex=${hashByInsertionIndex.contentToString()}, size=$size, width=$width, mask=$mask, recordLength=$recordLength)"
    }
}

private object EmptyIterator: Iterator<Nothing>{
    override fun hasNext() = false
    override fun next() = throw NoSuchElementException()
}
	package com.empowerops.jargon.model.opt2

	import java.lang.IndexOutOfBoundsException

	/**
	* Why, why on gods green earth would you implement this geoff!?
	*
	* Ok, after studying the access patterns, we need fast contains and fast indexOf.
	* fast contains is easily provided by hashMap,
	* to get indexOf, you want a NavigableSet (indexOf(elem) == headSet(elem).size)
	* note that you need a navigableSet on insertionOrder (which doesnt exist)
	*
	* I spent some time searching for custom collections,
	* and there seems to be a fairly acceptable trend that 'if you're going to store objects,
	* you can be noisy with pointers' Theres lots of self balancing trees that would probably
	* get me the above requirements, but they do tend to over-allocate records and require
	* lots of custom node type.
	*
	* I believe, and have tried to show here, that because we are storing `DoubleArray`s
	* (IE `double[]`, notably _not `Array<Double>` aka `Double[]`_), we can do this compactly.
	*/
	class DoubleArrayHashList(
	/**
	* non-linearized array of double-precision-fp-rows with index-metadata, encoded as longs.
	*
	* using https://gregstoll.com/~gregstoll/floattohex/ to encode doubles
	* ```
	* 0x0: null //indicates unoccupied
	* 0x1: [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] // indicates <1.2, 0.0> is at index 1; <0.0, 1.3> is at index 2
	* 0x2: [ 0L, 0x3FF199999999999A, 0x0..0 ] // note 1.1 is 0x3FF19..9A; implicitly a record <0L, 1.1, 0.0> => indicates that <1.1, 0.0> is in the list at index 0
	* 0x3: null
	* ```
	*
	* terminology:
	* - 0x1 is a binNumber (binNo), and is a masked hashcode.
	* - [ 2L, 0x3FF4C..CD, 0x0..0, 1L, 0x0..0, 0x3FF3..3 ] is a bin
	* - [ 2L, 0x3FF4C..CD, 0x0..0 ] is a record
	*/
	private val binByHash: Array<LongArray?>,
	private val hashByInsertionIndex: IntArray,
	override val size: Int,
	val width: Int
	): AbstractCollection<DoubleArray>() {

	private val mask get() = binByHash.size - 1
	private val recordLength get() = NODE_HEADER_METADATA_CELL_COUNT + width

	companion object {
	val EMPTY = DoubleArrayHashList(
	emptyArray<LongArray?>(),
	intArrayOf(),
	0,
	0
	)

	val LOAD_FACTOR = 0.6

	val NODE_HEADER_METADATA_CELL_COUNT = 1
	val INDEX_OFFSET = 0
	val ELEMENT_START_OFFSET = 1

	fun of(vararg elements: DoubleArray): DoubleArrayHashList = EMPTY.plusAll(elements.asIterable())
	}

	override operator fun contains(element: DoubleArray) = indexOf(element) != -1

	operator fun get(index: Int): DoubleArray {
	if (index < 0 \|\| index >= size) throw IndexOutOfBoundsException("index=$index; size=$size")
	val binNo = hashByInsertionIndex[index] and mask

	foreachRecord(binNo) { elemIndex, element ->
	if (index == elemIndex) return element
	}

	throw NoSuchElementException("index=$index in size=$size")
	}

	fun indexOf(element: DoubleArray): Int {
	val binNo = hash(element) and mask

	foreachRecord(binNo) { index, candidateElement ->
	if(element.contentEquals(candidateElement)){
	return index
	}
	}

	return -1
	}

	private inline fun foreachRecord(binNo: Int, block: (index: Int, elem: DoubleArray) -> Unit){

	val bin = binByHash[binNo]!!

	for(elementIndex in 0 until binByHash.elementCount(binNo)){
	val recordStart = elementIndex * recordLength
	val index = bin[recordStart + INDEX_OFFSET].toInt()
	val element = bin.sliceToDoubleArray(recordStart + ELEMENT_START_OFFSET, recordStart + ELEMENT_START_OFFSET + width)

	block(index, element)
	}

	}

	override fun iterator() = object: Iterator<DoubleArray> {
	var current = 0
	override fun hasNext() = current < size
	override fun next() = get(current++)
	}

	fun plusAll(other: Iterable<DoubleArray>) = other.fold(EMPTY, DoubleArrayHashList::plus)

	operator fun plus(newElement: DoubleArray): DoubleArrayHashList {
	require(width == 0 \|\| newElement.size == width)
	val width = newElement.size

	val insertedIndex = size

	return when {
	this === EMPTY -> {
	val contentsByHash = arrayOfNulls<LongArray>(16)
	val hash = hash(newElement)
	val binNo = hash % contentsByHash.size
	contentsByHash.set(binNo, 0, newElement)
	DoubleArrayHashList(contentsByHash, intArrayOf(hash), 1, width)
	}
	(size + 1).toFloat() < binByHash.size * LOAD_FACTOR -> {
	val newContentsByHash = binByHash.copyOf()
	val newHashesByInsertionIndex = hashByInsertionIndex.copyOf(hashByInsertionIndex.size + 1)

	val newElementHash = hash(newElement)
	val mask = newContentsByHash.size - 1
	val binNo = newElementHash and mask

	newContentsByHash.set(binNo, insertedIndex, newElement)

	newHashesByInsertionIndex[insertedIndex] = newElementHash
	DoubleArrayHashList(newContentsByHash, newHashesByInsertionIndex, size + 1, width)
	}
	else -> { //we need to double the size, copy things over, add the new element.
	val oldCapacity = binByHash.size
	val oldSize = size
	val newContentsByBinNo = arrayOfNulls<LongArray>(oldCapacity * 2)
	val newHashesByInsertionIndex = IntArray(size + 1)

	check(newContentsByBinNo.size % 16 == 0) { "must be an even multiple of 16 else masking wont work!" }
	val oldMask = binByHash.size - 1
	val newMask = newContentsByBinNo.size - 1

	var copyIndex = 0
	while(copyIndex < oldSize){
	val hash = hashByInsertionIndex[copyIndex]
	val record = get(copyIndex)
	val newBinNo = hash and newMask

	newContentsByBinNo.set(newBinNo, copyIndex, record)
	newHashesByInsertionIndex[copyIndex] = hash

	copyIndex += 1
	}

	val newElementHash = hash(newElement)
	newContentsByBinNo.set(newElementHash and newMask, insertedIndex, newElement)
	newHashesByInsertionIndex[insertedIndex] = newElementHash

	DoubleArrayHashList(newContentsByBinNo, newHashesByInsertionIndex, size + 1, width)
	}
	}
	}

	// in my own testing there is a _ton_ of colission, not sure why,
	// looked into hash map, notice they dont use the default hashcode,
	// they very quickly stack the top 16bits on the lower 16 bits to improve spread;
	// so im doing the same and its helped pretty enormously
	private fun hash(newElement: DoubleArray) = newElement.contentHashCode().let { it xor (it ushr 16) }

	private fun Array<LongArray?>.elementCount(binNo: Int) = (this[binNo]?.size ?: 0) / (NODE_HEADER_METADATA_CELL_COUNT + width)

	private fun LongArray.sliceToDoubleArray(start: Int, endExclusive: Int): DoubleArray {

	val result = DoubleArray(width)

	for(index in 0 until endExclusive-start){
	val offset = start + index
	val doubleValue = java.lang.Double.longBitsToDouble(this[offset])
	result[index] = doubleValue
	}

	// System.arraycopy(this, start, result, 0, endExclusive-start)

	return result
	}

	private fun Array<LongArray?>.set(binNo: Int, indexHead: Int, element: DoubleArray){
	require(width == 0 \|\| element.size == width)
	val width = element.size
	val recordLength = width + NODE_HEADER_METADATA_CELL_COUNT

	when(val occupant = this[binNo]) {
	null -> {
	val singleRecordBin = LongArray(recordLength)

	singleRecordBin[0] = indexHead.toLong()
	for(index in 0 until width) {
	singleRecordBin[NODE_HEADER_METADATA_CELL_COUNT + index] = java.lang.Double.doubleToLongBits(element[index])
	}

	this[binNo] = singleRecordBin
	}
	else -> {
	val newRecord = LongArray(recordLength)

	newRecord[0] = indexHead.toLong()
	for(index in 0 until width) {
	val newBits = java.lang.Double.doubleToLongBits(element[index])
	newRecord[ELEMENT_START_OFFSET + index] = newBits
	}

	val newOccupant = occupant + newRecord

	this[binNo] = newOccupant
	}
	}
	}

	override fun toString(): String {
	return "DoubleArrayHashList(binByHash=${binByHash.contentToString()}, hashByInsertionIndex=${hashByInsertionIndex.contentToString()}, size=$size, width=$width, mask=$mask, recordLength=$recordLength)"
	}
	}

	private object EmptyIterator: Iterator<Nothing>{
	override fun hasNext() = false
	override fun next() = throw NoSuchElementException()
	}