Skip to content

Instantly share code, notes, and snippets.

@m-manu
Created January 21, 2017 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save m-manu/9342c90c04da2c6af022a382a4815869 to your computer and use it in GitHub Desktop.
Save m-manu/9342c90c04da2c6af022a382a4815869 to your computer and use it in GitHub Desktop.
Compute statistical averages (mean, median and mode) from a stream of numbers. Optimized for 'fetch'.
package manu.sandbox.utils;
import java.util.*;
public class AverageCalculator {
private static class FrequencyComparator implements Comparator<Double> {
private final Map<Double, Integer> histogram;
private FrequencyComparator(Map<Double, Integer> histogram) {
this.histogram = histogram;
}
@Override
public int compare(Double a, Double b) {
Integer sa = this.histogram.get(b), sb = this.histogram.get(a);
if (sa == null || sb == null || sa.equals(sb)) {
return b.compareTo(a);
}
return sa.compareTo(sb);
}
}
private final PriorityQueue<Double> minHeap, maxHeap;
private final Map<Double, Integer> histogram;
private final PriorityQueue<Double> heap;
private int count = 0;
private double mean;
public AverageCalculator() {
this(100);
}
public AverageCalculator(int initSize) {
minHeap = new PriorityQueue<>(initSize);
maxHeap = new PriorityQueue<>(initSize, Collections.reverseOrder());
histogram = new HashMap<>(initSize);
heap = new PriorityQueue<>(initSize, new FrequencyComparator(histogram));
}
public AverageCalculator insertBulk(Iterable<? extends Number> collection) {
for (Number e : collection) insert(e.doubleValue());
return this;
}
public void insert(double e) {
Integer frequency = histogram.get(e);
heap.remove(e);
histogram.put(e, frequency == null ? 1 : frequency + 1);
heap.add(e);
mean += (e - mean) / (count + 1);
if (count == 0) {
minHeap.add(e);
} else if (count == 1) {
if (e > minHeap.peek()) {
maxHeap.add(e);
} else {
maxHeap.add(minHeap.poll());
minHeap.add(e);
}
} else {
if (e > minHeap.peek()) {
minHeap.add(e);
} else if (e < maxHeap.peek()) {
maxHeap.add(e);
} else {
if (minHeap.size() > maxHeap.size())
maxHeap.add(e);
else
minHeap.add(e);
}
if (maxHeap.size() > minHeap.size())
minHeap.add(maxHeap.poll());
else
maxHeap.add(minHeap.poll());
}
count++;
}
public double getMedian() {
if (count == 0)
return Double.NaN;
if (maxHeap.size() > minHeap.size()) {
return maxHeap.peek();
} else if (maxHeap.size() < minHeap.size()) {
return minHeap.peek();
} else {
return (maxHeap.peek() + minHeap.peek()) / 2.0;
}
}
public double getMean() {
if (count == 0)
return Double.NaN;
else
return mean;
}
public double getMode() {
if (count == 0)
return Double.NaN;
return heap.peek();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment