Skip to content

Instantly share code, notes, and snippets.

@Dobiasd
Last active March 7, 2019 13:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dobiasd/1eb0a903c61d7dd489283d118b102ea8 to your computer and use it in GitHub Desktop.
Save Dobiasd/1eb0a903c61d7dd489283d118b102ea8 to your computer and use it in GitHub Desktop.
import java.util.*
import kotlin.math.max
import kotlin.math.min
import kotlin.math.roundToInt
import kotlin.random.Random
// 10 buckets for interval [0, 1000], outliers will be clamped
class Histogram(
private val data: MutableMap<Int, Int> = mutableMapOf()
) {
fun add(value: Int) {
val bucket = max(0, min(1000, value)) / 100
data[bucket] = data.getOrDefault(bucket, 0) + 1
}
// cumulative distribution function
class CDF(
histogram: Histogram
) {
private val data: SortedMap<Int, Double> = deriveCDFData(histogram)
fun getQuantile(value: Double): Double? {
val bucket = (10 * value).roundToInt()
return data[bucket]
}
private fun deriveCDFData(histogram: Histogram): SortedMap<Int, Double> {
val data = histogram.data
val sum = data.values.sum()
val pdfData = data.toSortedMap().mapValues { it.value.toDouble() / sum }
val cdfData: MutableMap<Int, Double> = mutableMapOf()
var acc = 0.0
pdfData.forEach {
acc += pdfData.getOrDefault(it.key, 0.0)
cdfData[it.key] = acc
}
return cdfData.toSortedMap()
}
}
}
fun main() {
val hist = Histogram()
List(10000) {
Random.nextInt(0, 1000)
}.forEach {
hist.add(it)
}
val cdf = (Histogram::CDF)(hist)
println(cdf.getQuantile(0.3))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment