Last active
August 29, 2015 13:58
-
-
Save kalaidin/9974919 to your computer and use it in GitHub Desktop.
Percentiles with frugal streaming
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
frugal <- function(stream) { | |
m <- 0 | |
for (val in stream) { | |
if (val > m) | |
m = m + 1 | |
else if (val < m) | |
m = m - 1 | |
} | |
return(m) | |
} | |
frugal_1u <- function(stream, m = 0, q = 0.5) { | |
for (val in stream) { | |
if (val > m && runif(1) > 1 - q) | |
m = m + 1 | |
else if (val < m && runif(1) > q) | |
m = m - 1 | |
} | |
return(m) | |
} | |
constantly_one <- function(x) { | |
return(1.0) | |
} | |
frugal_2u <- function(stream, m = 0, q = 0.5, f = constantly_one) { | |
# http://blog.aggregateknowledge.com/2013/09/16/sketch-of-the-day-frugal-streaming/ | |
step <- 1 | |
sign <- 1 | |
for (item in stream) { | |
if (item > m && runif(1) > 1 - q) { | |
# Increment the step size if and only if the estimate keeps moving in | |
# the same direction. Step size is incremented by the result of applying | |
# the specified step function to the previous step size. | |
step <- step + ifelse(sign > 0, f(step), -1 * f(step)) | |
# Increment the estimate by step size if step is positive. Otherwise, | |
# increment the step size by one. | |
m <- m + ifelse(step > 0, step, 1) | |
# Mark that the estimate increased this step | |
sign <- 1 | |
# If the estimate overshot the item in the stream, pull the estimate back | |
# and re-adjust the step size. | |
if (m > item) { | |
step <- step + (item - m) | |
m <- item | |
} | |
} | |
# If the item is less than the stream, follow all of the same steps as | |
# above, with signs reversed. | |
else if (item < m && runif(1) > q) { | |
step <- step + ifelse(sign < 0, f(step), -1 * f(step)) | |
m <- m - ifelse(step > 0, step, 1) | |
sign <- -1 | |
if (m < item) { | |
step <- step + (m - item) | |
m <- item | |
} | |
} | |
# Damp down the step size to avoid oscillation. | |
if ((m - item) * sign < 0 && step > 1) { | |
step <- 1 | |
} | |
} | |
return(m) | |
} | |
stream <- rnorm(10000, 100, 10) | |
quantile(stream, 0.5) | |
frugal(stream) | |
frugal_1u(stream) | |
frugal_2u(stream) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment