Skip to content

Instantly share code, notes, and snippets.

@Dobiasd
Created March 6, 2019 20:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Dobiasd/6cc56e940f3b135dd9f6f04cb53c4ce2 to your computer and use it in GitHub Desktop.
Save Dobiasd/6cc56e940f3b135dd9f6f04cb53c4ce2 to your computer and use it in GitHub Desktop.
import java.util.*
import kotlin.math.max
import kotlin.math.min
import kotlin.math.roundToInt
import kotlin.random.Random
// 10 buckets for interval [0, 1000], outliers will be clamped
class Histogram(
private val data: MutableMap<Int, Int> = mutableMapOf()
) {
fun add(value: Int) {
val bucket = max(0, min(1000, value)) / 100
data[bucket] = data.getOrDefault(bucket, 0) + 1
}
fun deriveCDF(): CDF {
val sum = data.values.sum()
val pdfData = data.toSortedMap().mapValues { it.value.toDouble() / sum }
var acc = 0.0
val builder = CDF.CDFBuilder()
pdfData.forEach {
acc += pdfData.getOrDefault(it.key, 0.0)
builder.withData(it.key, acc)
}
return builder.build()
}
}
// cumulative distribution function
class CDF private constructor(
private val data: SortedMap<Int, Double>
) {
class CDFBuilder {
private val cdfData = mutableMapOf<Int, Double>()
fun withData(a: Int, b: Double): CDFBuilder {
// verification and stuff
cdfData[a] = b
return this
}
fun build(): CDF {
return CDF(cdfData.toSortedMap())
}
}
fun getQuantile(value: Double): Double? {
val bucket = (10 * value).roundToInt()
return data[bucket]
}
}
fun main() {
val hist = Histogram()
List(10000) {
Random.nextInt(0, 1000)
}.forEach {
hist.add(it)
}
val cdf = hist.deriveCDF()
println(cdf.getQuantile(0.3))
// Somebody can just build invalid CDFs from anywhere without providing a Histogram.
val someCDFBuilder = CDF.CDFBuilder()
someCDFBuilder.withData(1, 2.34)
val invalidCDF = someCDFBuilder.build()
println(invalidCDF.getQuantile(0.3))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment