-
-
Save Dobiasd/1eb0a903c61d7dd489283d118b102ea8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.* | |
import kotlin.math.max | |
import kotlin.math.min | |
import kotlin.math.roundToInt | |
import kotlin.random.Random | |
// 10 buckets for interval [0, 1000], outliers will be clamped | |
class Histogram( | |
private val data: MutableMap<Int, Int> = mutableMapOf() | |
) { | |
fun add(value: Int) { | |
val bucket = max(0, min(1000, value)) / 100 | |
data[bucket] = data.getOrDefault(bucket, 0) + 1 | |
} | |
// cumulative distribution function | |
class CDF( | |
histogram: Histogram | |
) { | |
private val data: SortedMap<Int, Double> = deriveCDFData(histogram) | |
fun getQuantile(value: Double): Double? { | |
val bucket = (10 * value).roundToInt() | |
return data[bucket] | |
} | |
private fun deriveCDFData(histogram: Histogram): SortedMap<Int, Double> { | |
val data = histogram.data | |
val sum = data.values.sum() | |
val pdfData = data.toSortedMap().mapValues { it.value.toDouble() / sum } | |
val cdfData: MutableMap<Int, Double> = mutableMapOf() | |
var acc = 0.0 | |
pdfData.forEach { | |
acc += pdfData.getOrDefault(it.key, 0.0) | |
cdfData[it.key] = acc | |
} | |
return cdfData.toSortedMap() | |
} | |
} | |
} | |
fun main() { | |
val hist = Histogram() | |
List(10000) { | |
Random.nextInt(0, 1000) | |
}.forEach { | |
hist.add(it) | |
} | |
val cdf = (Histogram::CDF)(hist) | |
println(cdf.getQuantile(0.3)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment