Created
January 22, 2014 13:41
-
-
Save juanplopes/8558895 to your computer and use it in GitHub Desktop.
Using Count-Min Sketch to find stream's quantiles with small memory footprint.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
QuantileSketch sketch = new QuantileSketch(); | |
for (int i = 0; i < 100000000; i++) { | |
double x1 = Math.random(); | |
double x2 = Math.random(); | |
//random normal distribution with mean=0 and stdev=1 | |
sketch.offer(abs(sqrt(-2 * log(x1)) * cos(2 * Math.PI * x2))); | |
} | |
for (int i = 0; i <= 100; i++) { | |
System.out.println(i + "%: " + sketch.quantile(i / 100.0)); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class QuantileSketch { | |
private final double e; | |
private final int width; | |
private final double M[]; | |
private final int mask; | |
private volatile long total = 0; | |
public QuantileSketch() { | |
this(16); | |
} | |
public QuantileSketch(int log2w) { | |
this.width = 1 << log2w; | |
this.M = new double[width * 2]; | |
this.mask = width - 1; | |
this.e = Math.E / width; | |
} | |
public void offer(double value) { | |
for (long k = makeK(value); k != 0; k += k & -k) | |
innerAdd(k); | |
total++; | |
} | |
private long makeK(double value) { | |
return -(long) (value * 1e3) ^ Long.MAX_VALUE; | |
} | |
private double unmakeK(long k) { | |
return (-k ^ Long.MAX_VALUE) / 1000.0; | |
} | |
private double queryK(long k) { | |
double sum = 0; | |
for (; k != 0; k -= k & -k) { | |
sum += innerGet(k); | |
} | |
return sum; | |
} | |
public double quantile(double value) { | |
return rawQuantile(value * total); | |
} | |
private double rawQuantile(double sum) { | |
long begin = 0, count = -1; | |
while (count != 0) { | |
long step = count >>> 1; | |
long it = begin + step; | |
if (queryK(it) < sum) { | |
begin = it + 1; | |
count -= step + 1; | |
} else { | |
count = step; | |
} | |
} | |
return unmakeK(begin); | |
} | |
private void innerAdd(long v) { | |
M[hash(v, 0)]++; | |
M[width + hash(v, 1)]++; | |
} | |
private double innerGet(long v) { | |
return Math.min( | |
M[hash(v, 0)], | |
M[width + hash(v, 1)]) | |
- e * total / 2; //removing bias | |
} | |
private int hash(long v, int i) { | |
//use your preferred hash function | |
return MurmurHash.hashLong(v, i) & mask; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment