Created
June 1, 2012 18:37
-
-
Save christerswahn/2854260 to your computer and use it in GitHub Desktop.
A small set of classes that enable efficient collection of data samples and provides statistical measures on them.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.Map; | |
import java.util.SortedMap; | |
import java.util.TreeMap; | |
/** Non-instantiable class containing SampleSet factories and implementations. | |
*/ | |
public final class Samples { | |
/** Gets the IntBucketSampleSet factory singleton. | |
* @return the IntBucketSampleSet SampleSetFactory singleton | |
*/ | |
public static SampleSetFactory getIntBucketSampleSetFactory() { | |
return intBucketSampleSetFactory; | |
} | |
/** Gets the DecBucketSampleSet factory singleton. | |
* @return the DecBucketSampleSet SampleSetFactory singleton | |
*/ | |
public static SampleSetFactory getDecBucketSampleSetFactory(int minOrderOfMagnitude, int decimalPrecision) { | |
return new DecBucketSampleSetFactory(minOrderOfMagnitude, decimalPrecision); | |
} | |
/** the IntBucketSampleSet factory singleton */ | |
private static final SampleSetFactory intBucketSampleSetFactory = new SampleSetFactory() { | |
@Override | |
public SampleSet makeSampleSet() { | |
return new IntBucketSampleCounter(); | |
} | |
}; | |
/** the DecBucketSampleSet factory singleton */ | |
private static final class DecBucketSampleSetFactory implements SampleSetFactory { | |
private final int minOrderOfMagnitude; | |
private final int decimalPrecision; | |
private DecBucketSampleSetFactory(int minOrderOfMagnitude, int decimalPrecision) { | |
this.minOrderOfMagnitude = minOrderOfMagnitude; | |
this.decimalPrecision = decimalPrecision; | |
} | |
@Override | |
public SampleSet makeSampleSet() { | |
return new DecBucketSampleCounter(minOrderOfMagnitude, decimalPrecision); | |
} | |
}; | |
/** General abstract implementation of SampleSet that uses buckets to store the samples. | |
* Each bucket represents a value interval and counts the number of samples | |
* that have fallen within that interval. | |
* With this approach the memory size grows with the order of magnitude between the smallest | |
* and the largest data point, but not with the number of data points. | |
* <P> | |
* Note: This class is not thread-safe. | |
* | |
* @author Christer Swahn | |
*/ | |
static abstract class BucketSampleCounter implements SampleSet { | |
private final SortedMap<Double,Counter> buckets = new TreeMap<Double,Counter>(); | |
private int totalCount = 0; | |
double minSampleValue = Double.MAX_VALUE; | |
double maxSampleValue = -Double.MAX_VALUE; | |
private double totalSum = 0; | |
private int maxCounter = 0; | |
private Double cachedApproxMean = null; | |
private Double cachedMedian = null; | |
private Double cachedStdDev = null; | |
protected BucketSampleCounter() { | |
} | |
/** Returns the number of sample buckets currently held. */ | |
public int getBucketCount() { | |
return buckets.size(); | |
} | |
/** Returns the count value of the highest bucket counter. */ | |
protected int getMaxCounter() { | |
return maxCounter; | |
} | |
@Override | |
public int getCount() { | |
return totalCount; | |
} | |
@Override | |
public double getSum() { | |
return totalSum; | |
} | |
@Override | |
public double getMin() { | |
if (totalCount == 0) | |
return 0; | |
return minSampleValue; | |
} | |
@Override | |
public double getMax() { | |
if (totalCount == 0) | |
return 0; | |
return maxSampleValue; | |
} | |
/** Puts a sample into this bucket sample counter. | |
* @param sampleValue the sample value to insert | |
*/ | |
@Override | |
public void putSample(double sampleValue) { | |
Double bucketMedian = getBucketMedian(sampleValue); | |
Counter counter = buckets.get(bucketMedian); | |
if (counter == null) { | |
counter = new Counter(); | |
buckets.put(bucketMedian, counter); | |
} | |
counter.count++; | |
maxCounter = Math.max(maxCounter, counter.count); | |
totalCount++; | |
totalSum += sampleValue; | |
minSampleValue = Math.min(minSampleValue, sampleValue); | |
maxSampleValue = Math.max(maxSampleValue, sampleValue); | |
// clear cached calculated values: | |
cachedApproxMean = null; | |
cachedMedian = null; | |
cachedStdDev = null; | |
} | |
/** Gets the median value of the bucket that the specified sample value is put into. | |
* This is the middle value of the bucket's value range, e.g. if the bucket is for | |
* samples with values between 1 and 3, the bucket's median is 2. | |
* <P> | |
* This method must be implemented by concrete subclasses. | |
* | |
* @param sampleValue the sample value to assign a bucket | |
* @return the median value of the sample value's bucket | |
*/ | |
protected abstract double getBucketMedian(double sampleValue); | |
/** Gets the approximate mean (average) of this sample bucket set. | |
* <P> | |
* Note that the returned value will probably differ from the true mean since this is | |
* an approximating sample set. | |
* @see #getMean() | |
*/ | |
public double getApproxMean() { | |
if (cachedApproxMean == null) | |
cachedApproxMean = calcApproxMean(); | |
return cachedApproxMean; | |
} | |
/** Gets the true mean (average) of this sample bucket set. | |
* The return value is equal to getSum()/getCount() if getCount()>0. | |
*/ | |
@Override | |
public double getMean() { | |
if (totalCount == 0) | |
return 0; | |
return totalSum / totalCount; | |
} | |
/** Gets the median of this sample bucket set, which is an approximation of the true median. */ | |
@Override | |
public double getMedian() { | |
if (cachedMedian == null) | |
cachedMedian = calcMedian(); | |
return cachedMedian; | |
} | |
/** Gets the so-called sample standard deviation of this sample bucket set, | |
* which is an approximation of the true sample standard deviation. */ | |
@Override | |
public double getStdDev() { | |
if (cachedStdDev == null) | |
cachedStdDev = calcStdDev(); | |
return cachedStdDev; | |
} | |
/** Calculates the mean of this sample bucket set. */ | |
private double calcApproxMean() { | |
if (totalCount == 0) | |
return 0; | |
double sum = 0; | |
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { | |
sum += e.getKey() * e.getValue().count; | |
} | |
double mean = sum / totalCount; | |
return mean; | |
} | |
/** Calculates the median of this sample bucket set. */ | |
private double calcMedian() { | |
if (totalCount == 0) | |
return 0; | |
int traversedSamplesCount = 0; | |
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { | |
int count = e.getValue().count; | |
traversedSamplesCount += count; | |
if (traversedSamplesCount >= totalCount/2) { | |
double median = e.getKey(); | |
return median; | |
} | |
} | |
assert false : "Program error finding median of " + this; | |
return 0; // shouldn't happen | |
} | |
/** Calculates the so-called sample standard deviation of this sample bucket set. */ | |
private double calcStdDev() { | |
if (totalCount <= 1) | |
return 0; | |
double mean = getMean(); | |
double varianceSum = 0; | |
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { | |
double diff = e.getKey() - mean; | |
varianceSum += (diff * diff) * e.getValue().count; | |
} | |
int denom = totalCount - 1; | |
// (For a "population standard deviation", i.e. if the sample values were the complete value population, | |
// the denominator would not have been subtracted by 1.) | |
double stdDev = Math.sqrt(varianceSum / denom); | |
return stdDev; | |
} | |
@Override | |
public String[] dumpData() { | |
String[] dump = new String[buckets.size()]; | |
int bucketP = 1; | |
int bucketW = getDecMagnitude(maxSampleValue); | |
bucketW = bucketW + ((bucketW-1) / 3) + 1 + bucketP; // take into account: a delimiter for every 3 digits, comma, and fraction digits | |
int counterW = getDecMagnitude(maxCounter); | |
counterW = counterW + ((counterW-1) / 3); // take into account: a delimiter for every 3 digits | |
String format = "%," + bucketW + "." + bucketP + "f: %," + counterW + "d"; | |
int b = 0; | |
for (Map.Entry<Double,Counter> e : buckets.entrySet()) { | |
dump[b++] = String.format(format, e.getKey(), e.getValue().count); | |
} | |
return dump; | |
} | |
@Override | |
public String toString() { | |
String str = String.format("A=%.1f xA=%.1f SD=%.1f Md=%.1f Mi=%.1f Ma=%.1f T=%.1f C=%d bc=%d", | |
getMean(), getApproxMean(), getStdDev(), getMedian(), | |
getMin(), getMax(), getSum(), getCount(), getBucketCount()); | |
return str; | |
} | |
/** Wrapper class to handle a mutable int in a collection. */ | |
private static final class Counter { | |
public int count = 0; | |
@Override | |
public String toString() { | |
return String.valueOf(count); | |
} | |
} | |
} | |
/** A simple BucketSampleCounter where each bucket represents the interval [-0.5;0.5) | |
* around an integer (i.e. the sample value rounded to the closest integer). | |
*/ | |
static class IntBucketSampleCounter extends BucketSampleCounter { | |
@Override | |
protected double getBucketMedian(double sampleValue) { | |
double median = Math.rint(sampleValue); | |
return median; | |
} | |
} | |
/** A BucketSampleCounter where each bucket represents a value interval with | |
* a given decimal precision. | |
* For example, with a decimal precision of 2 (the default), for each decimal | |
* order of magnitude (e.g. between 1.0 and 9.9, between 10 and 99 and so on) | |
* up to 90 intervals (buckets) are stored. | |
*/ | |
static class DecBucketSampleCounter extends BucketSampleCounter { | |
@SuppressWarnings("unused") | |
private final int minOrderOfMagnitude; | |
private final int decimalPrecision; | |
private final double minAbsValue; | |
/** Creates a DecBucketSampleCounter with a minimum precision of 0. */ | |
DecBucketSampleCounter() { | |
this(0); | |
} | |
/** Creates a DecBucketSampleCounter with the specified minimum order of magnitude. */ | |
DecBucketSampleCounter(int minOrderOfMagnitude) { | |
this(minOrderOfMagnitude, 2); | |
} | |
/** Creates a DecBucketSampleCounter with the specified minimum order of magnitude. */ | |
DecBucketSampleCounter(int minOrderOfMagnitude, int decimalPrecision) { | |
this.minOrderOfMagnitude = minOrderOfMagnitude; | |
this.decimalPrecision = decimalPrecision; | |
this.minAbsValue = Math.pow(10, minOrderOfMagnitude); | |
} | |
@Override | |
protected double getBucketMedian(double sampleValue) { | |
double median = roundDec(sampleValue, decimalPrecision); | |
if (Math.abs(median) < minAbsValue) | |
return 0; | |
else | |
return median; | |
} | |
} | |
/** Rounds a double value to the closest double value with the specified | |
* number of significant decimal digits. E.g: | |
* <UL> | |
* <LI>roundDec(111, 1) -> 100 | |
* <LI>roundDec(0.111, 1) -> 0.1 | |
* <LI>roundDec(111, 2) -> 110 | |
* <LI>roundDec(111, 3) -> 111 | |
* <LI>roundDec(111, 4) -> 111 | |
* <LI>roundDec(-111, 2) -> -110 | |
* </UL> | |
* | |
* @param value the value to round | |
* @param digits the number of significant decimal digits, must be equal to or greater than 1 | |
* @return the rounded value | |
*/ | |
public static final double roundDec(double value, int digits) { | |
assert digits > 0 : "digits less than 1: " + digits; | |
if (Math.abs(value) < Double.MIN_NORMAL) | |
return 0; // value is equal to or very close to zero | |
int mag = getDecMagnitude(value); | |
double precisionScale = Math.pow(10, mag-digits); | |
double result = Math.rint(value / precisionScale) * precisionScale; | |
return result; | |
} | |
/** Gets the decimal order of magnitude of a value. This represents the position | |
* of the most significant digit in the decimal representation of the value. | |
* (Note that the decimal exponent corresponding to the value's magnitude | |
* equals the result of this method minus 1.) | |
* The value must not be zero. | |
* The result is the same regardless of the sign of the passed value. | |
* E.g: | |
* <UL> | |
* <LI> 0,01 -> -1 | |
* <LI> 0,011-> -1 | |
* <LI> 0,09 -> -1 | |
* <LI> 0,1 -> 0 | |
* <LI> 0,11 -> 0 | |
* <LI> 0,9 -> 0 | |
* <LI> 1 -> 1 | |
* <LI> 2 -> 1 | |
* <LI> 9 -> 1 | |
* <LI> 10 -> 2 | |
* <LI> 11 -> 2 | |
* <LI> 99 -> 2 | |
* <LI> 100 -> 3 | |
* <LI> 101 -> 3 | |
* </UL> | |
* | |
* @param value a non-zero value | |
* @return | |
*/ | |
public static final int getDecMagnitude(double value) { | |
int mag = ((int) Math.floor(Math.log10(Math.abs(value)))) + 1; | |
return mag; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** A container that collects a set of data samples and provides statistical measures on them. | |
* <P> | |
* The general contract is that the methods shall not return any other value than 0 | |
* in case no samples' have been inserted into the set. | |
* Implementing classes may choose to throw IllegalStateException in such an illegal use case. | |
* <P> | |
* Implementing classes are not generally required to be thread-safe. | |
*/ | |
public interface SampleSet { | |
/** Puts a sample into this bucket sample counter. | |
* @param sampleValue the sample value to insert | |
*/ | |
public void putSample(double sampleValue); | |
/** Gets the total number of samples put into this sample set. | |
* @return the total number of samples put into this sample set | |
*/ | |
public int getCount(); | |
/** Gets the minimum sample value put into this sample set. */ | |
public double getMin(); | |
/** Gets the maximum sample value put into this sample set. */ | |
public double getMax(); | |
/** Gets the total sum of the sample values put into this sample set. | |
* @return the total sum of the sample values | |
*/ | |
public double getSum(); | |
/** Gets the mean (average) of this sample set. | |
* @return the mean (average) of this sample set | |
*/ | |
public double getMean(); | |
/** Gets the median of this sample set. | |
* Certain implementations may produce an approximate value. | |
* @return the median of this sample bucket set | |
*/ | |
public double getMedian(); | |
/** Gets the so-called sample standard deviation of this sample bucket set. | |
* Certain implementations may produce an approximate value. | |
* @return the sample standard deviation of this sample set | |
*/ | |
public double getStdDev(); | |
/** Returns a data dump of this sample set suitable for console output. | |
*/ | |
public String[] dumpData(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Interface of a SampleSet factory. */ | |
public interface SampleSetFactory { | |
/** Returns a newly created SampleSet instance. */ | |
public SampleSet makeSampleSet(); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.PrintStream; | |
import java.util.ArrayList; | |
import java.util.LinkedHashMap; | |
import java.util.List; | |
import java.util.Map; | |
import org.apache.log4j.Level; | |
import org.apache.log4j.Logger; | |
/** The statistics compiler. | |
* The static methods provide convenient short-hands for instantiating statistics compilers. | |
* <P> | |
* Typically one instance will be created for a measurement area, and samples will be | |
* put to it for each of the area's metrics, using a unique label per metric. | |
* <P> | |
* Usage example: | |
* <P> | |
<code><pre> | |
public class IOPacker { | |
public static final StatCompiler STAT = StatCompiler.getStatCompiler(IOPacker.class); | |
... | |
public void pack() { | |
long startTime = System.nanoTime(); | |
... | |
STAT.putSample("Pack time [ms]", (System.nanoTime()-startTime)/1e6); | |
} | |
} | |
</pre></code> | |
* <P> | |
* The compiled statistics can be logged using the following simple statement, | |
* for example just before the program exits: | |
* <P> | |
<code> | |
StatCompiler.logAllStats(LOG, Level.INFO); | |
</code> | |
* <P> | |
* The static methods are thread-safe. | |
* <P> | |
* Instances of StatCompiler are not thread-safe. | |
* | |
*/ | |
public class StatCompiler { | |
/*--- class members ---*/ | |
/** Interface of a StatisticsFormatter. */ | |
public static interface StatisticsFormatter { | |
public String toString(SampleSet sampleSet); | |
} | |
/** The default StatisticsFormatter used. */ | |
private static final class DefaultStatFormatter implements StatisticsFormatter { | |
@Override | |
public String toString(SampleSet sampleSet) { | |
String str = String.format("sum: %,7.0f; avg: %,5.1f (sd +- %,.1f);\tcount: %,d; min<med<max: %,5.1f < %,5.1f < %,5.1f", | |
sampleSet.getSum(), sampleSet.getMean(), sampleSet.getStdDev(), sampleSet.getCount(), | |
sampleSet.getMin(), sampleSet.getMedian(), sampleSet.getMax()); | |
return str; | |
} | |
} | |
private static final StatisticsFormatter defaultStatFormatter = new DefaultStatFormatter(); | |
private static SampleSetFactory defaultSampleSetFactory = Samples.getDecBucketSampleSetFactory(-1, 2); | |
private static final Map<String,StatCompiler> instances = new LinkedHashMap<String,StatCompiler>(); | |
/** Sets the SampleSetFactory to be used by default by the getStatCompiler() methods. | |
* @param factory the SampleSetFactory to use | |
*/ | |
public static void setDefaultSampleSetFactory(SampleSetFactory factory) { | |
synchronized (instances) { | |
defaultSampleSetFactory = factory; | |
} | |
} | |
/** Gets a StatCompiler that shall be registered (associated) with the specified class. | |
* This method is thread-safe (though the returned StatCompiler instance is not). | |
* @param clazz the class to associate the StatCompiler with | |
* @return a new or previously existing StatCompiler | |
*/ | |
public static StatCompiler getStatCompiler(Class<?> clazz) { | |
return getStatCompiler(clazz.getSimpleName()); | |
} | |
/** Gets a StatCompiler that shall be registered (associated) with the specified name. | |
* This method is thread-safe (though the returned StatCompiler instance is not). | |
* @param name the name to associate the StatCompiler with | |
* @return a new or previously existing StatCompiler | |
*/ | |
public static StatCompiler getStatCompiler(String name) { | |
synchronized (instances) { | |
StatCompiler sc = instances.get(name); | |
if (sc == null) { | |
sc = new StatCompiler(name, defaultSampleSetFactory); | |
sc.setStatFormatter(defaultStatFormatter); | |
instances.put(name, sc); | |
} | |
return sc; | |
} | |
} | |
/** Outputs all the registered StatCompiler instances' statistics via the provider logger. | |
* @param outputLog the logger to use for the output | |
* @param logLevel the logging level to use for the output | |
*/ | |
public static void logAllStats(Logger outputLog, Level logLevel) { | |
List<StatCompiler> statCompilers; | |
synchronized (instances) { | |
statCompilers = new ArrayList<StatCompiler>(instances.values()); | |
} | |
for (StatCompiler sc : statCompilers) { | |
sc.logStats(outputLog, logLevel); | |
} | |
} | |
/*--- instance members ---*/ | |
private final String name; | |
private final SampleSetFactory sampleSetFactory; | |
private final Map<String,SampleSet> sampleSets = new LinkedHashMap<String,SampleSet>(); | |
private StatisticsFormatter statFormatter; | |
private boolean printDataDump = false; | |
/** Creates a new StatCompiler instance. | |
* @param name the name of this StatCompiler | |
* @param sampleSetFactory the factory it will use to create a sample set for each metric | |
*/ | |
public StatCompiler(String name, SampleSetFactory sampleSetFactory) { | |
this.name = name; | |
this.sampleSetFactory = sampleSetFactory; | |
} | |
/** Gets the name of this StatCompiler. */ | |
public String getName() { | |
return name; | |
} | |
/** Gets the SampleSetFactory this StatCompiler uses. */ | |
public SampleSetFactory getSampleSetFactory() { | |
return sampleSetFactory; | |
} | |
/** Gets the StatisticsFormatter this StatCompiler uses. */ | |
public StatisticsFormatter getStatFormatter() { | |
return statFormatter; | |
} | |
/** Sets the StatisticsFormatter this StatCompiler uses. */ | |
public void setStatFormatter(StatisticsFormatter statPrinter) { | |
this.statFormatter = statPrinter; | |
} | |
/** Returns true if print-data-dump is enabled for this StatCompiler. | |
* If enabled, the sample sets' detailed data will be output along with the | |
* compiled statistics. This is disabled by default. | |
*/ | |
public boolean isPrintDataDumpEnabled() { | |
return printDataDump; | |
} | |
/** Sets print-data-dump to be enabled or disabled for this StatCompiler. | |
* If enabled, the sample sets' detailed data will be output along with the | |
* compiled statistics. This is disabled by default. | |
*/ | |
public void setPrintDataDumpEnabled(boolean printDataDump) { | |
this.printDataDump = printDataDump; | |
} | |
/** Puts a new data sample to this statistics compiler. | |
* @param label the label of the sampled metric | |
* @param value the sample value | |
*/ | |
public void putSample(String label, double value) { | |
SampleSet sampleSet = sampleSets.get(label); | |
if (sampleSet == null) { | |
sampleSet = sampleSetFactory.makeSampleSet(); | |
sampleSets.put(label, sampleSet); | |
} | |
sampleSet.putSample(value); | |
} | |
/** Outputs the statistics of this StatCompiler to the provided Logger using the specified log level. | |
*/ | |
public void logStats(Logger outputLog, Level logLevel) { | |
String label = "[" + name + "] "; | |
for (Map.Entry<String,SampleSet> e : sampleSets.entrySet()) { | |
String rowHeader = label + e.getKey() + ": "; | |
outputLog.log(logLevel, rowHeader + formatStats(e.getValue())); | |
if (printDataDump) { | |
for (String s : e.getValue().dumpData()) | |
outputLog.log(logLevel, rowHeader + s); | |
} | |
} | |
} | |
/** Outputs the statistics of this StatCompiler to the provided stream. | |
* @param out the PrintStream to print the output to (e.g. System.out) | |
*/ | |
public void printStats(PrintStream out) { | |
String label = "[" + name + "] "; | |
for (Map.Entry<String,SampleSet> e : sampleSets.entrySet()) { | |
String rowHeader = label + e.getKey() + ": "; | |
out.println(rowHeader + formatStats(e.getValue())); | |
if (printDataDump) { | |
for (String s : e.getValue().dumpData()) | |
out.println(rowHeader + s); | |
} | |
} | |
} | |
/** Returns a string representation of the provided SampleSet suitable for console display. | |
* If a StatisticPrinter is set, it is used to produce the output. Otherwise the sample set's | |
* toString() method is used. | |
* This method may be overridden by subclasses to customize the output. | |
* @param sampleSet the sample set to print | |
* @return a console-friendly string representation of the provided sample set | |
*/ | |
public String formatStats(SampleSet sampleSet) { | |
if (statFormatter != null) | |
return statFormatter.toString(sampleSet); | |
else | |
return sampleSet.toString(); | |
} | |
@Override | |
public String toString() { | |
return getClass().getSimpleName() + "@" + name; | |
} | |
@Override | |
public int hashCode() { | |
final int prime = 31; | |
int result = 1; | |
result = prime * result + ((name == null) ? 0 : name.hashCode()); | |
return result; | |
} | |
@Override | |
public boolean equals(Object obj) { | |
if (this == obj) | |
return true; | |
if (obj == null) | |
return false; | |
if (getClass() != obj.getClass()) | |
return false; | |
StatCompiler other = (StatCompiler) obj; | |
if (name == null) { | |
if (other.name != null) | |
return false; | |
} | |
else if (!name.equals(other.name)) | |
return false; | |
return true; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment