Skip to content

Instantly share code, notes, and snippets.

@juanplopes
Created January 22, 2014 13:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save juanplopes/8558895 to your computer and use it in GitHub Desktop.
Save juanplopes/8558895 to your computer and use it in GitHub Desktop.
Using Count-Min Sketch to find stream's quantiles with small memory footprint.
QuantileSketch sketch = new QuantileSketch();
for (int i = 0; i < 100000000; i++) {
double x1 = Math.random();
double x2 = Math.random();
//random normal distribution with mean=0 and stdev=1
sketch.offer(abs(sqrt(-2 * log(x1)) * cos(2 * Math.PI * x2)));
}
for (int i = 0; i <= 100; i++) {
System.out.println(i + "%: " + sketch.quantile(i / 100.0));
}
public class QuantileSketch {
private final double e;
private final int width;
private final double M[];
private final int mask;
private volatile long total = 0;
public QuantileSketch() {
this(16);
}
public QuantileSketch(int log2w) {
this.width = 1 << log2w;
this.M = new double[width * 2];
this.mask = width - 1;
this.e = Math.E / width;
}
public void offer(double value) {
for (long k = makeK(value); k != 0; k += k & -k)
innerAdd(k);
total++;
}
private long makeK(double value) {
return -(long) (value * 1e3) ^ Long.MAX_VALUE;
}
private double unmakeK(long k) {
return (-k ^ Long.MAX_VALUE) / 1000.0;
}
private double queryK(long k) {
double sum = 0;
for (; k != 0; k -= k & -k) {
sum += innerGet(k);
}
return sum;
}
public double quantile(double value) {
return rawQuantile(value * total);
}
private double rawQuantile(double sum) {
long begin = 0, count = -1;
while (count != 0) {
long step = count >>> 1;
long it = begin + step;
if (queryK(it) < sum) {
begin = it + 1;
count -= step + 1;
} else {
count = step;
}
}
return unmakeK(begin);
}
private void innerAdd(long v) {
M[hash(v, 0)]++;
M[width + hash(v, 1)]++;
}
private double innerGet(long v) {
return Math.min(
M[hash(v, 0)],
M[width + hash(v, 1)])
- e * total / 2; //removing bias
}
private int hash(long v, int i) {
//use your preferred hash function
return MurmurHash.hashLong(v, i) & mask;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment