lukashaertel/learnhmm.kt

## learnhmm.kt
package ktsandbox

import javax.xml.crypto.Data

/**
 * A data point, where first is the hidden state, and second is the observed
 * state.
 */
typealias DataPoint = Pair<Int, Int>

/**
 * Learns a HMM by grid searching the state matrix.
 * @param emission The emission probabilities
 * @param trainingSet The set of training sequences, as given by lists of
 * pairs of hidden state to observed state
 */
fun learnHMM(
        emission: Matrix,
        trainingSet: Set<List<DataPoint>>,
        resolution: Int): HMM {

    // Get hidden state count
    val hs = emission.columns

    // Prepare training sequences
    val prepared = trainingSet.map {
        it.map { it.first }.toIntArray() to it.map { it.second }.toIntArray()
    }

    // Return from the grid searched components the matrix with the highest
    // likeliness to fit all training data
    return markovMatrices(1, hs, resolution).flatMap { start ->
        markovMatrices(hs, hs, resolution).flatMap { transition ->
            markovMatrices(1, hs, resolution).map { end ->
                hmm(start, transition, emission, end.trn)
            }
        }
    }.minBy { hmm ->
        prepared.sumByDouble {
            it.first.sumByDouble { s ->
                // XXX Implement with matrices instead of indices

                // Predict the state by F/B algorithm
                val predicted = hmm.evalState(s, *it.second)
                        .values
                        .withIndex()
                        .maxBy { it.value }!!
                        .index

                // If  prediction correct, return zero error, one otherwise
                if (predicted == s) 0.0 else 1.0
            }
        }
    }!!
}

/**
 * Enumerates all [n] lists that sum up to [s].
 * @param n The length of the resulting lists
 * @param s The sum that is to be reached
 * @return Returns an iterable of lists of integers
 */
fun listsOfSum(n: Int, s: Int): Iterable<List<Int>> {
    if (n == 1)
        return listOf(listOf(s))
    else
        return (0..s).flatMap { i ->
            listsOfSum(n - 1, s - i).map { j ->
                j + i
            }
        }
}

/**
 * Returns all Markov matrices, that is, matrices where rows sum up to a
 * probability of 1.
 */
fun markovMatrices(rows: Int, columns: Int, resolution: Int): Iterable<Matrix> {
    /**
     * Enumerates all possible value combinations in the given search domain.
     */
    fun rowSpace() =
            listsOfSum(columns, resolution).map {
                it.map { it.toDouble() / resolution }
            }

    /**
     * Enumerates all matrix value combinations in the given search domain.
     */
    fun valueSpace(remainingRows: Int): Iterable<List<Double>> {
        if (remainingRows == 1)
            return rowSpace()
        else
            return rowSpace().flatMap { l ->
                valueSpace(remainingRows - 1).map { r ->
                    l + r
                }
            }
    }

    // Iterate value space and transform into matrices
    return valueSpace(rows).map { Matrix(columns, it.toDoubleArray()) }
}

/**
 * Assigner for matrix creation
 */
class Assigner<T, U> {
    // Backing for the assigner
    private val backing = mutableMapOf<Pair<T, U>, Double>()

    /**
     * Assigns column [t] row [u] the [value].
     * @param t The column
     * @param u The row
     * @param value The value to assign
     */
    fun assign(t: T, u: U, value: Double) {
        backing[t to u] = value
    }

    /**
     * Builds a matrix using the given mapping.
     * @param discretized The discretized collection
     */
    fun finalize(discretized: Discretized<T, U>): Matrix {
        error("Not implemented")
    }
}

/**
 * Discretized arbitrary list of data points.
 * @property list The original list
 * @property discretized The discretized list
 * @property firstMap Resolution of first to discrete
 * @property secondMap Resolution of second to discrete
 */
data class Discretized<T, U>(
        val list: List<Pair<T, U>>,
        val discretized: List<DataPoint>,
        val firstMap: Map<T, Int>,
        val secondMap: Map<U, Int>) {


    /**
     * Builds the matrix usign an assigner.
     */
    fun buildMatrix(assignBlock: Assigner<T, U>.() -> Unit): Matrix {
        val assigner = Assigner<T, U>()
        assigner.assignBlock()
        return assigner.finalize(this)
    }

    /**
     * Applies the discretization on another list
     */
    fun apply(list: List<Pair<T, U>>) = list.map {
        firstMap[it.first]!! to secondMap[it.second]!!
    }

}

fun <T, U> List<Pair<T, U>>.discretize(): Discretized<T, U> {
    val firstMap = mutableMapOf<T, Int>()
    val secondMap = mutableMapOf<U, Int>()
    val result = mutableListOf<DataPoint>()

    var tRun = 0
    var uRun = 0

    for ((t, u) in this) {
        val ti = firstMap.computeIfAbsent(t, { tRun++ })
        val ui = secondMap.computeIfAbsent(u, { uRun++ })
        result += ti to ui
    }

    return Discretized(this,
            result.toList(),
            firstMap.toMap(),
            secondMap.toMap())
}

fun main(args: Array<String>) {
    // First example
    val a = listOf(
            "rainy" to "umbrella",
            "sunny" to "umbrella",
            "sunny" to "no umbrella",
            "sunny" to "no umbrella",
            "sunny" to "no umbrella",
            "sunny" to "no umbrella",
            "rainy" to "no umbrella",
            "rainy" to "umbrella")

    // Second example
    val b = listOf(
            "rainy" to "umbrella",
            "sunny" to "no umbrella",
            "sunny" to "no umbrella",
            "sunny" to "no umbrella",
            "sunny" to "no umbrella",
            "rainy" to "umbrella")

    // Discretization for all data
    val x = (a + b).discretize()

    // Emission for all data
    val e = x.buildMatrix {
        assign("rainy", "umbrella", 1.0)
        assign("sunny", "umbrella", 0.0)
        assign("rainy", "no umbrella", 0.0)
        assign("sunny", "no umbrella", 1.0)
    }

    // Learn HMM on discretized individual examples
    val h = learnHMM(e, setOf(x.apply(a), x.apply(b)), 10)

    println(h)

}
	package ktsandbox

	import javax.xml.crypto.Data

	/**
	* A data point, where first is the hidden state, and second is the observed
	* state.
	*/
	typealias DataPoint = Pair<Int, Int>

	/**
	* Learns a HMM by grid searching the state matrix.
	* @param emission The emission probabilities
	* @param trainingSet The set of training sequences, as given by lists of
	* pairs of hidden state to observed state
	*/
	fun learnHMM(
	emission: Matrix,
	trainingSet: Set<List<DataPoint>>,
	resolution: Int): HMM {

	// Get hidden state count
	val hs = emission.columns

	// Prepare training sequences
	val prepared = trainingSet.map {
	it.map { it.first }.toIntArray() to it.map { it.second }.toIntArray()
	}

	// Return from the grid searched components the matrix with the highest
	// likeliness to fit all training data
	return markovMatrices(1, hs, resolution).flatMap { start ->
	markovMatrices(hs, hs, resolution).flatMap { transition ->
	markovMatrices(1, hs, resolution).map { end ->
	hmm(start, transition, emission, end.trn)
	}
	}
	}.minBy { hmm ->
	prepared.sumByDouble {
	it.first.sumByDouble { s ->
	// XXX Implement with matrices instead of indices

	// Predict the state by F/B algorithm
	val predicted = hmm.evalState(s, *it.second)
	.values
	.withIndex()
	.maxBy { it.value }!!
	.index

	// If prediction correct, return zero error, one otherwise
	if (predicted == s) 0.0 else 1.0
	}
	}
	}!!
	}

	/**
	* Enumerates all [n] lists that sum up to [s].
	* @param n The length of the resulting lists
	* @param s The sum that is to be reached
	* @return Returns an iterable of lists of integers
	*/
	fun listsOfSum(n: Int, s: Int): Iterable<List<Int>> {
	if (n == 1)
	return listOf(listOf(s))
	else
	return (0..s).flatMap { i ->
	listsOfSum(n - 1, s - i).map { j ->
	j + i
	}
	}
	}

	/**
	* Returns all Markov matrices, that is, matrices where rows sum up to a
	* probability of 1.
	*/
	fun markovMatrices(rows: Int, columns: Int, resolution: Int): Iterable<Matrix> {
	/**
	* Enumerates all possible value combinations in the given search domain.
	*/
	fun rowSpace() =
	listsOfSum(columns, resolution).map {
	it.map { it.toDouble() / resolution }
	}

	/**
	* Enumerates all matrix value combinations in the given search domain.
	*/
	fun valueSpace(remainingRows: Int): Iterable<List<Double>> {
	if (remainingRows == 1)
	return rowSpace()
	else
	return rowSpace().flatMap { l ->
	valueSpace(remainingRows - 1).map { r ->
	l + r
	}
	}
	}

	// Iterate value space and transform into matrices
	return valueSpace(rows).map { Matrix(columns, it.toDoubleArray()) }
	}

	/**
	* Assigner for matrix creation
	*/
	class Assigner<T, U> {
	// Backing for the assigner
	private val backing = mutableMapOf<Pair<T, U>, Double>()

	/**
	* Assigns column [t] row [u] the [value].
	* @param t The column
	* @param u The row
	* @param value The value to assign
	*/
	fun assign(t: T, u: U, value: Double) {
	backing[t to u] = value
	}

	/**
	* Builds a matrix using the given mapping.
	* @param discretized The discretized collection
	*/
	fun finalize(discretized: Discretized<T, U>): Matrix {
	error("Not implemented")
	}
	}

	/**
	* Discretized arbitrary list of data points.
	* @property list The original list
	* @property discretized The discretized list
	* @property firstMap Resolution of first to discrete
	* @property secondMap Resolution of second to discrete
	*/
	data class Discretized<T, U>(
	val list: List<Pair<T, U>>,
	val discretized: List<DataPoint>,
	val firstMap: Map<T, Int>,
	val secondMap: Map<U, Int>) {


	/**
	* Builds the matrix usign an assigner.
	*/
	fun buildMatrix(assignBlock: Assigner<T, U>.() -> Unit): Matrix {
	val assigner = Assigner<T, U>()
	assigner.assignBlock()
	return assigner.finalize(this)
	}

	/**
	* Applies the discretization on another list
	*/
	fun apply(list: List<Pair<T, U>>) = list.map {
	firstMap[it.first]!! to secondMap[it.second]!!
	}

	}

	fun <T, U> List<Pair<T, U>>.discretize(): Discretized<T, U> {
	val firstMap = mutableMapOf<T, Int>()
	val secondMap = mutableMapOf<U, Int>()
	val result = mutableListOf<DataPoint>()

	var tRun = 0
	var uRun = 0

	for ((t, u) in this) {
	val ti = firstMap.computeIfAbsent(t, { tRun++ })
	val ui = secondMap.computeIfAbsent(u, { uRun++ })
	result += ti to ui
	}

	return Discretized(this,
	result.toList(),
	firstMap.toMap(),
	secondMap.toMap())
	}

	fun main(args: Array<String>) {
	// First example
	val a = listOf(
	"rainy" to "umbrella",
	"sunny" to "umbrella",
	"sunny" to "no umbrella",
	"sunny" to "no umbrella",
	"sunny" to "no umbrella",
	"sunny" to "no umbrella",
	"rainy" to "no umbrella",
	"rainy" to "umbrella")

	// Second example
	val b = listOf(
	"rainy" to "umbrella",
	"sunny" to "no umbrella",
	"sunny" to "no umbrella",
	"sunny" to "no umbrella",
	"sunny" to "no umbrella",
	"rainy" to "umbrella")

	// Discretization for all data
	val x = (a + b).discretize()

	// Emission for all data
	val e = x.buildMatrix {
	assign("rainy", "umbrella", 1.0)
	assign("sunny", "umbrella", 0.0)
	assign("rainy", "no umbrella", 0.0)
	assign("sunny", "no umbrella", 1.0)
	}

	// Learn HMM on discretized individual examples
	val h = learnHMM(e, setOf(x.apply(a), x.apply(b)), 10)

	println(h)

	}