|
import java.util.Map; |
|
import java.util.SortedMap; |
|
import java.util.TreeMap; |
|
|
|
|
|
/** Non-instantiable class containing SampleSet factories and implementations. |
|
*/ |
|
public final class Samples { |
|
|
|
/** Gets the IntBucketSampleSet factory singleton. |
|
* @return the IntBucketSampleSet SampleSetFactory singleton |
|
*/ |
|
public static SampleSetFactory getIntBucketSampleSetFactory() { |
|
return intBucketSampleSetFactory; |
|
} |
|
|
|
/** Gets the DecBucketSampleSet factory singleton. |
|
* @return the DecBucketSampleSet SampleSetFactory singleton |
|
*/ |
|
public static SampleSetFactory getDecBucketSampleSetFactory(int minOrderOfMagnitude, int decimalPrecision) { |
|
return new DecBucketSampleSetFactory(minOrderOfMagnitude, decimalPrecision); |
|
} |
|
|
|
|
|
|
|
/** the IntBucketSampleSet factory singleton */ |
|
private static final SampleSetFactory intBucketSampleSetFactory = new SampleSetFactory() { |
|
@Override |
|
public SampleSet makeSampleSet() { |
|
return new IntBucketSampleCounter(); |
|
} |
|
}; |
|
|
|
|
|
/** the DecBucketSampleSet factory singleton */ |
|
private static final class DecBucketSampleSetFactory implements SampleSetFactory { |
|
private final int minOrderOfMagnitude; |
|
private final int decimalPrecision; |
|
|
|
private DecBucketSampleSetFactory(int minOrderOfMagnitude, int decimalPrecision) { |
|
this.minOrderOfMagnitude = minOrderOfMagnitude; |
|
this.decimalPrecision = decimalPrecision; |
|
} |
|
|
|
@Override |
|
public SampleSet makeSampleSet() { |
|
return new DecBucketSampleCounter(minOrderOfMagnitude, decimalPrecision); |
|
} |
|
}; |
|
|
|
|
|
|
|
/** General abstract implementation of SampleSet that uses buckets to store the samples. |
|
* Each bucket represents a value interval and counts the number of samples |
|
* that have fallen within that interval. |
|
* With this approach the memory size grows with the order of magnitude between the smallest |
|
* and the largest data point, but not with the number of data points. |
|
* <P> |
|
* Note: This class is not thread-safe. |
|
* |
|
* @author Christer Swahn |
|
*/ |
|
static abstract class BucketSampleCounter implements SampleSet { |
|
private final SortedMap<Double,Counter> buckets = new TreeMap<Double,Counter>(); |
|
private int totalCount = 0; |
|
double minSampleValue = Double.MAX_VALUE; |
|
double maxSampleValue = -Double.MAX_VALUE; |
|
private double totalSum = 0; |
|
private int maxCounter = 0; |
|
private Double cachedApproxMean = null; |
|
private Double cachedMedian = null; |
|
private Double cachedStdDev = null; |
|
|
|
|
|
protected BucketSampleCounter() { |
|
} |
|
|
|
|
|
/** Returns the number of sample buckets currently held. */ |
|
public int getBucketCount() { |
|
return buckets.size(); |
|
} |
|
|
|
/** Returns the count value of the highest bucket counter. */ |
|
protected int getMaxCounter() { |
|
return maxCounter; |
|
} |
|
|
|
@Override |
|
public int getCount() { |
|
return totalCount; |
|
} |
|
|
|
@Override |
|
public double getSum() { |
|
return totalSum; |
|
} |
|
|
|
|
|
@Override |
|
public double getMin() { |
|
if (totalCount == 0) |
|
return 0; |
|
return minSampleValue; |
|
} |
|
|
|
@Override |
|
public double getMax() { |
|
if (totalCount == 0) |
|
return 0; |
|
return maxSampleValue; |
|
} |
|
|
|
|
|
|
|
/** Puts a sample into this bucket sample counter. |
|
* @param sampleValue the sample value to insert |
|
*/ |
|
@Override |
|
public void putSample(double sampleValue) { |
|
Double bucketMedian = getBucketMedian(sampleValue); |
|
Counter counter = buckets.get(bucketMedian); |
|
if (counter == null) { |
|
counter = new Counter(); |
|
buckets.put(bucketMedian, counter); |
|
} |
|
counter.count++; |
|
maxCounter = Math.max(maxCounter, counter.count); |
|
totalCount++; |
|
totalSum += sampleValue; |
|
minSampleValue = Math.min(minSampleValue, sampleValue); |
|
maxSampleValue = Math.max(maxSampleValue, sampleValue); |
|
// clear cached calculated values: |
|
cachedApproxMean = null; |
|
cachedMedian = null; |
|
cachedStdDev = null; |
|
} |
|
|
|
/** Gets the median value of the bucket that the specified sample value is put into. |
|
* This is the middle value of the bucket's value range, e.g. if the bucket is for |
|
* samples with values between 1 and 3, the bucket's median is 2. |
|
* <P> |
|
* This method must be implemented by concrete subclasses. |
|
* |
|
* @param sampleValue the sample value to assign a bucket |
|
* @return the median value of the sample value's bucket |
|
*/ |
|
protected abstract double getBucketMedian(double sampleValue); |
|
|
|
|
|
|
|
/** Gets the approximate mean (average) of this sample bucket set. |
|
* <P> |
|
* Note that the returned value will probably differ from the true mean since this is |
|
* an approximating sample set. |
|
* @see #getMean() |
|
*/ |
|
public double getApproxMean() { |
|
if (cachedApproxMean == null) |
|
cachedApproxMean = calcApproxMean(); |
|
return cachedApproxMean; |
|
} |
|
|
|
/** Gets the true mean (average) of this sample bucket set. |
|
* The return value is equal to getSum()/getCount() if getCount()>0. |
|
*/ |
|
@Override |
|
public double getMean() { |
|
if (totalCount == 0) |
|
return 0; |
|
return totalSum / totalCount; |
|
} |
|
|
|
/** Gets the median of this sample bucket set, which is an approximation of the true median. */ |
|
@Override |
|
public double getMedian() { |
|
if (cachedMedian == null) |
|
cachedMedian = calcMedian(); |
|
return cachedMedian; |
|
} |
|
|
|
/** Gets the so-called sample standard deviation of this sample bucket set, |
|
* which is an approximation of the true sample standard deviation. */ |
|
@Override |
|
public double getStdDev() { |
|
if (cachedStdDev == null) |
|
cachedStdDev = calcStdDev(); |
|
return cachedStdDev; |
|
} |
|
|
|
|
|
/** Calculates the mean of this sample bucket set. */ |
|
private double calcApproxMean() { |
|
if (totalCount == 0) |
|
return 0; |
|
double sum = 0; |
|
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { |
|
sum += e.getKey() * e.getValue().count; |
|
} |
|
double mean = sum / totalCount; |
|
return mean; |
|
} |
|
|
|
/** Calculates the median of this sample bucket set. */ |
|
private double calcMedian() { |
|
if (totalCount == 0) |
|
return 0; |
|
int traversedSamplesCount = 0; |
|
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { |
|
int count = e.getValue().count; |
|
traversedSamplesCount += count; |
|
if (traversedSamplesCount >= totalCount/2) { |
|
double median = e.getKey(); |
|
return median; |
|
} |
|
} |
|
assert false : "Program error finding median of " + this; |
|
return 0; // shouldn't happen |
|
} |
|
|
|
/** Calculates the so-called sample standard deviation of this sample bucket set. */ |
|
private double calcStdDev() { |
|
if (totalCount <= 1) |
|
return 0; |
|
double mean = getMean(); |
|
double varianceSum = 0; |
|
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { |
|
double diff = e.getKey() - mean; |
|
varianceSum += (diff * diff) * e.getValue().count; |
|
} |
|
int denom = totalCount - 1; |
|
// (For a "population standard deviation", i.e. if the sample values were the complete value population, |
|
// the denominator would not have been subtracted by 1.) |
|
double stdDev = Math.sqrt(varianceSum / denom); |
|
return stdDev; |
|
} |
|
|
|
|
|
|
|
@Override |
|
public String[] dumpData() { |
|
String[] dump = new String[buckets.size()]; |
|
int bucketP = 1; |
|
int bucketW = getDecMagnitude(maxSampleValue); |
|
bucketW = bucketW + ((bucketW-1) / 3) + 1 + bucketP; // take into account: a delimiter for every 3 digits, comma, and fraction digits |
|
int counterW = getDecMagnitude(maxCounter); |
|
counterW = counterW + ((counterW-1) / 3); // take into account: a delimiter for every 3 digits |
|
String format = "%," + bucketW + "." + bucketP + "f: %," + counterW + "d"; |
|
int b = 0; |
|
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { |
|
dump[b++] = String.format(format, e.getKey(), e.getValue().count); |
|
} |
|
return dump; |
|
} |
|
|
|
|
|
@Override |
|
public String toString() { |
|
String str = String.format("A=%.1f xA=%.1f SD=%.1f Md=%.1f Mi=%.1f Ma=%.1f T=%.1f C=%d bc=%d", |
|
getMean(), getApproxMean(), getStdDev(), getMedian(), |
|
getMin(), getMax(), getSum(), getCount(), getBucketCount()); |
|
return str; |
|
} |
|
|
|
|
|
/** Wrapper class to handle a mutable int in a collection. */ |
|
private static final class Counter { |
|
public int count = 0; |
|
|
|
@Override |
|
public String toString() { |
|
return String.valueOf(count); |
|
} |
|
} |
|
} |
|
|
|
|
|
/** A simple BucketSampleCounter where each bucket represents the interval [-0.5;0.5) |
|
* around an integer (i.e. the sample value rounded to the closest integer). |
|
*/ |
|
static class IntBucketSampleCounter extends BucketSampleCounter { |
|
@Override |
|
protected double getBucketMedian(double sampleValue) { |
|
double median = Math.rint(sampleValue); |
|
return median; |
|
} |
|
} |
|
|
|
|
|
/** A BucketSampleCounter where each bucket represents a value interval with |
|
* a given decimal precision. |
|
* For example, with a decimal precision of 2 (the default), for each decimal |
|
* order of magnitude (e.g. between 1.0 and 9.9, between 10 and 99 and so on) |
|
* up to 90 intervals (buckets) are stored. |
|
*/ |
|
static class DecBucketSampleCounter extends BucketSampleCounter { |
|
@SuppressWarnings("unused") |
|
private final int minOrderOfMagnitude; |
|
private final int decimalPrecision; |
|
private final double minAbsValue; |
|
|
|
/** Creates a DecBucketSampleCounter with a minimum precision of 0. */ |
|
DecBucketSampleCounter() { |
|
this(0); |
|
} |
|
|
|
/** Creates a DecBucketSampleCounter with the specified minimum order of magnitude. */ |
|
DecBucketSampleCounter(int minOrderOfMagnitude) { |
|
this(minOrderOfMagnitude, 2); |
|
} |
|
|
|
/** Creates a DecBucketSampleCounter with the specified minimum order of magnitude. */ |
|
DecBucketSampleCounter(int minOrderOfMagnitude, int decimalPrecision) { |
|
this.minOrderOfMagnitude = minOrderOfMagnitude; |
|
this.decimalPrecision = decimalPrecision; |
|
this.minAbsValue = Math.pow(10, minOrderOfMagnitude); |
|
} |
|
|
|
|
|
@Override |
|
protected double getBucketMedian(double sampleValue) { |
|
double median = roundDec(sampleValue, decimalPrecision); |
|
if (Math.abs(median) < minAbsValue) |
|
return 0; |
|
else |
|
return median; |
|
} |
|
} |
|
|
|
|
|
|
|
/** Rounds a double value to the closest double value with the specified |
|
* number of significant decimal digits. E.g: |
|
* <UL> |
|
* <LI>roundDec(111, 1) -> 100 |
|
* <LI>roundDec(0.111, 1) -> 0.1 |
|
* <LI>roundDec(111, 2) -> 110 |
|
* <LI>roundDec(111, 3) -> 111 |
|
* <LI>roundDec(111, 4) -> 111 |
|
* <LI>roundDec(-111, 2) -> -110 |
|
* </UL> |
|
* |
|
* @param value the value to round |
|
* @param digits the number of significant decimal digits, must be equal to or greater than 1 |
|
* @return the rounded value |
|
*/ |
|
public static final double roundDec(double value, int digits) { |
|
assert digits > 0 : "digits less than 1: " + digits; |
|
if (Math.abs(value) < Double.MIN_NORMAL) |
|
return 0; // value is equal to or very close to zero |
|
int mag = getDecMagnitude(value); |
|
double precisionScale = Math.pow(10, mag-digits); |
|
double result = Math.rint(value / precisionScale) * precisionScale; |
|
return result; |
|
} |
|
|
|
|
|
/** Gets the decimal order of magnitude of a value. This represents the position |
|
* of the most significant digit in the decimal representation of the value. |
|
* (Note that the decimal exponent corresponding to the value's magnitude |
|
* equals the result of this method minus 1.) |
|
* The value must not be zero. |
|
* The result is the same regardless of the sign of the passed value. |
|
* E.g: |
|
* <UL> |
|
* <LI> 0,01 -> -1 |
|
* <LI> 0,011-> -1 |
|
* <LI> 0,09 -> -1 |
|
* <LI> 0,1 -> 0 |
|
* <LI> 0,11 -> 0 |
|
* <LI> 0,9 -> 0 |
|
* <LI> 1 -> 1 |
|
* <LI> 2 -> 1 |
|
* <LI> 9 -> 1 |
|
* <LI> 10 -> 2 |
|
* <LI> 11 -> 2 |
|
* <LI> 99 -> 2 |
|
* <LI> 100 -> 3 |
|
* <LI> 101 -> 3 |
|
* </UL> |
|
* |
|
* @param value a non-zero value |
|
* @return |
|
*/ |
|
public static final int getDecMagnitude(double value) { |
|
int mag = ((int) Math.floor(Math.log10(Math.abs(value)))) + 1; |
|
return mag; |
|
} |
|
} |