Skip to content

Instantly share code, notes, and snippets.

@lukashaertel
Created February 23, 2017 11:40
Show Gist options
  • Save lukashaertel/e3d337ab13c9a900c7f72c2336075b3d to your computer and use it in GitHub Desktop.
Save lukashaertel/e3d337ab13c9a900c7f72c2336075b3d to your computer and use it in GitHub Desktop.
package ktsandbox
import javax.xml.crypto.Data
/**
* A data point, where first is the hidden state, and second is the observed
* state.
*/
typealias DataPoint = Pair<Int, Int>
/**
* Learns a HMM by grid searching the state matrix.
* @param emission The emission probabilities
* @param trainingSet The set of training sequences, as given by lists of
* pairs of hidden state to observed state
*/
fun learnHMM(
emission: Matrix,
trainingSet: Set<List<DataPoint>>,
resolution: Int): HMM {
// Get hidden state count
val hs = emission.columns
// Prepare training sequences
val prepared = trainingSet.map {
it.map { it.first }.toIntArray() to it.map { it.second }.toIntArray()
}
// Return from the grid searched components the matrix with the highest
// likeliness to fit all training data
return markovMatrices(1, hs, resolution).flatMap { start ->
markovMatrices(hs, hs, resolution).flatMap { transition ->
markovMatrices(1, hs, resolution).map { end ->
hmm(start, transition, emission, end.trn)
}
}
}.minBy { hmm ->
prepared.sumByDouble {
it.first.sumByDouble { s ->
// XXX Implement with matrices instead of indices
// Predict the state by F/B algorithm
val predicted = hmm.evalState(s, *it.second)
.values
.withIndex()
.maxBy { it.value }!!
.index
// If prediction correct, return zero error, one otherwise
if (predicted == s) 0.0 else 1.0
}
}
}!!
}
/**
* Enumerates all [n] lists that sum up to [s].
* @param n The length of the resulting lists
* @param s The sum that is to be reached
* @return Returns an iterable of lists of integers
*/
fun listsOfSum(n: Int, s: Int): Iterable<List<Int>> {
if (n == 1)
return listOf(listOf(s))
else
return (0..s).flatMap { i ->
listsOfSum(n - 1, s - i).map { j ->
j + i
}
}
}
/**
* Returns all Markov matrices, that is, matrices where rows sum up to a
* probability of 1.
*/
fun markovMatrices(rows: Int, columns: Int, resolution: Int): Iterable<Matrix> {
/**
* Enumerates all possible value combinations in the given search domain.
*/
fun rowSpace() =
listsOfSum(columns, resolution).map {
it.map { it.toDouble() / resolution }
}
/**
* Enumerates all matrix value combinations in the given search domain.
*/
fun valueSpace(remainingRows: Int): Iterable<List<Double>> {
if (remainingRows == 1)
return rowSpace()
else
return rowSpace().flatMap { l ->
valueSpace(remainingRows - 1).map { r ->
l + r
}
}
}
// Iterate value space and transform into matrices
return valueSpace(rows).map { Matrix(columns, it.toDoubleArray()) }
}
/**
* Assigner for matrix creation
*/
class Assigner<T, U> {
// Backing for the assigner
private val backing = mutableMapOf<Pair<T, U>, Double>()
/**
* Assigns column [t] row [u] the [value].
* @param t The column
* @param u The row
* @param value The value to assign
*/
fun assign(t: T, u: U, value: Double) {
backing[t to u] = value
}
/**
* Builds a matrix using the given mapping.
* @param discretized The discretized collection
*/
fun finalize(discretized: Discretized<T, U>): Matrix {
error("Not implemented")
}
}
/**
* Discretized arbitrary list of data points.
* @property list The original list
* @property discretized The discretized list
* @property firstMap Resolution of first to discrete
* @property secondMap Resolution of second to discrete
*/
data class Discretized<T, U>(
val list: List<Pair<T, U>>,
val discretized: List<DataPoint>,
val firstMap: Map<T, Int>,
val secondMap: Map<U, Int>) {
/**
* Builds the matrix usign an assigner.
*/
fun buildMatrix(assignBlock: Assigner<T, U>.() -> Unit): Matrix {
val assigner = Assigner<T, U>()
assigner.assignBlock()
return assigner.finalize(this)
}
/**
* Applies the discretization on another list
*/
fun apply(list: List<Pair<T, U>>) = list.map {
firstMap[it.first]!! to secondMap[it.second]!!
}
}
fun <T, U> List<Pair<T, U>>.discretize(): Discretized<T, U> {
val firstMap = mutableMapOf<T, Int>()
val secondMap = mutableMapOf<U, Int>()
val result = mutableListOf<DataPoint>()
var tRun = 0
var uRun = 0
for ((t, u) in this) {
val ti = firstMap.computeIfAbsent(t, { tRun++ })
val ui = secondMap.computeIfAbsent(u, { uRun++ })
result += ti to ui
}
return Discretized(this,
result.toList(),
firstMap.toMap(),
secondMap.toMap())
}
fun main(args: Array<String>) {
// First example
val a = listOf(
"rainy" to "umbrella",
"sunny" to "umbrella",
"sunny" to "no umbrella",
"sunny" to "no umbrella",
"sunny" to "no umbrella",
"sunny" to "no umbrella",
"rainy" to "no umbrella",
"rainy" to "umbrella")
// Second example
val b = listOf(
"rainy" to "umbrella",
"sunny" to "no umbrella",
"sunny" to "no umbrella",
"sunny" to "no umbrella",
"sunny" to "no umbrella",
"rainy" to "umbrella")
// Discretization for all data
val x = (a + b).discretize()
// Emission for all data
val e = x.buildMatrix {
assign("rainy", "umbrella", 1.0)
assign("sunny", "umbrella", 0.0)
assign("rainy", "no umbrella", 0.0)
assign("sunny", "no umbrella", 1.0)
}
// Learn HMM on discretized individual examples
val h = learnHMM(e, setOf(x.apply(a), x.apply(b)), 10)
println(h)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment