public
Created

A small set of classes that enable efficient collection of data samples and provides statistical measures on them.

  • Download Gist
SampleSet.java
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
/** A container that collects a set of data samples and provides statistical measures on them.
* <P>
* The general contract is that the methods shall not return any other value than 0
* in case no samples' have been inserted into the set.
* Implementing classes may choose to throw IllegalStateException in such an illegal use case.
* <P>
* Implementing classes are not generally required to be thread-safe.
*/
public interface SampleSet {
/** Puts a sample into this bucket sample counter.
* @param sampleValue the sample value to insert
*/
public void putSample(double sampleValue);
 
/** Gets the total number of samples put into this sample set.
* @return the total number of samples put into this sample set
*/
public int getCount();
 
/** Gets the minimum sample value put into this sample set. */
public double getMin();
 
/** Gets the maximum sample value put into this sample set. */
public double getMax();
 
/** Gets the total sum of the sample values put into this sample set.
* @return the total sum of the sample values
*/
public double getSum();
 
 
/** Gets the mean (average) of this sample set.
* @return the mean (average) of this sample set
*/
public double getMean();
 
/** Gets the median of this sample set.
* Certain implementations may produce an approximate value.
* @return the median of this sample bucket set
*/
public double getMedian();
 
/** Gets the so-called sample standard deviation of this sample bucket set.
* Certain implementations may produce an approximate value.
* @return the sample standard deviation of this sample set
*/
public double getStdDev();
 
 
/** Returns a data dump of this sample set suitable for console output.
*/
public String[] dumpData();
}
SampleSetFactory.java
Java
1 2 3 4 5
/** Interface of a SampleSet factory. */
public interface SampleSetFactory {
/** Returns a newly created SampleSet instance. */
public SampleSet makeSampleSet();
}
Samples.java
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
 
 
/** Non-instantiable class containing SampleSet factories and implementations.
*/
public final class Samples {
 
/** Gets the IntBucketSampleSet factory singleton.
* @return the IntBucketSampleSet SampleSetFactory singleton
*/
public static SampleSetFactory getIntBucketSampleSetFactory() {
return intBucketSampleSetFactory;
}
 
/** Gets the DecBucketSampleSet factory singleton.
* @return the DecBucketSampleSet SampleSetFactory singleton
*/
public static SampleSetFactory getDecBucketSampleSetFactory(int minOrderOfMagnitude, int decimalPrecision) {
return new DecBucketSampleSetFactory(minOrderOfMagnitude, decimalPrecision);
}
 
 
 
/** the IntBucketSampleSet factory singleton */
private static final SampleSetFactory intBucketSampleSetFactory = new SampleSetFactory() {
@Override
public SampleSet makeSampleSet() {
return new IntBucketSampleCounter();
}
};
 
 
/** the DecBucketSampleSet factory singleton */
private static final class DecBucketSampleSetFactory implements SampleSetFactory {
private final int minOrderOfMagnitude;
private final int decimalPrecision;
 
private DecBucketSampleSetFactory(int minOrderOfMagnitude, int decimalPrecision) {
this.minOrderOfMagnitude = minOrderOfMagnitude;
this.decimalPrecision = decimalPrecision;
}
 
@Override
public SampleSet makeSampleSet() {
return new DecBucketSampleCounter(minOrderOfMagnitude, decimalPrecision);
}
};
 
 
 
/** General abstract implementation of SampleSet that uses buckets to store the samples.
* Each bucket represents a value interval and counts the number of samples
* that have fallen within that interval.
* With this approach the memory size grows with the order of magnitude between the smallest
* and the largest data point, but not with the number of data points.
* <P>
* Note: This class is not thread-safe.
*
* @author Christer Swahn
*/
static abstract class BucketSampleCounter implements SampleSet {
private final SortedMap<Double,Counter> buckets = new TreeMap<Double,Counter>();
private int totalCount = 0;
double minSampleValue = Double.MAX_VALUE;
double maxSampleValue = -Double.MAX_VALUE;
private double totalSum = 0;
private int maxCounter = 0;
private Double cachedApproxMean = null;
private Double cachedMedian = null;
private Double cachedStdDev = null;
 
 
protected BucketSampleCounter() {
}
 
 
/** Returns the number of sample buckets currently held. */
public int getBucketCount() {
return buckets.size();
}
 
/** Returns the count value of the highest bucket counter. */
protected int getMaxCounter() {
return maxCounter;
}
 
@Override
public int getCount() {
return totalCount;
}
 
@Override
public double getSum() {
return totalSum;
}
 
 
@Override
public double getMin() {
if (totalCount == 0)
return 0;
return minSampleValue;
}
 
@Override
public double getMax() {
if (totalCount == 0)
return 0;
return maxSampleValue;
}
 
 
 
/** Puts a sample into this bucket sample counter.
* @param sampleValue the sample value to insert
*/
@Override
public void putSample(double sampleValue) {
Double bucketMedian = getBucketMedian(sampleValue);
Counter counter = buckets.get(bucketMedian);
if (counter == null) {
counter = new Counter();
buckets.put(bucketMedian, counter);
}
counter.count++;
maxCounter = Math.max(maxCounter, counter.count);
totalCount++;
totalSum += sampleValue;
minSampleValue = Math.min(minSampleValue, sampleValue);
maxSampleValue = Math.max(maxSampleValue, sampleValue);
// clear cached calculated values:
cachedApproxMean = null;
cachedMedian = null;
cachedStdDev = null;
}
 
/** Gets the median value of the bucket that the specified sample value is put into.
* This is the middle value of the bucket's value range, e.g. if the bucket is for
* samples with values between 1 and 3, the bucket's median is 2.
* <P>
* This method must be implemented by concrete subclasses.
*
* @param sampleValue the sample value to assign a bucket
* @return the median value of the sample value's bucket
*/
protected abstract double getBucketMedian(double sampleValue);
 
 
 
/** Gets the approximate mean (average) of this sample bucket set.
* <P>
* Note that the returned value will probably differ from the true mean since this is
* an approximating sample set.
* @see #getMean()
*/
public double getApproxMean() {
if (cachedApproxMean == null)
cachedApproxMean = calcApproxMean();
return cachedApproxMean;
}
 
/** Gets the true mean (average) of this sample bucket set.
* The return value is equal to getSum()/getCount() if getCount()>0.
*/
@Override
public double getMean() {
if (totalCount == 0)
return 0;
return totalSum / totalCount;
}
 
/** Gets the median of this sample bucket set, which is an approximation of the true median. */
@Override
public double getMedian() {
if (cachedMedian == null)
cachedMedian = calcMedian();
return cachedMedian;
}
 
/** Gets the so-called sample standard deviation of this sample bucket set,
* which is an approximation of the true sample standard deviation. */
@Override
public double getStdDev() {
if (cachedStdDev == null)
cachedStdDev = calcStdDev();
return cachedStdDev;
}
 
 
/** Calculates the mean of this sample bucket set. */
private double calcApproxMean() {
if (totalCount == 0)
return 0;
double sum = 0;
for (Map.Entry<Double,Counter> e : buckets.entrySet()) {
sum += e.getKey() * e.getValue().count;
}
double mean = sum / totalCount;
return mean;
}
 
/** Calculates the median of this sample bucket set. */
private double calcMedian() {
if (totalCount == 0)
return 0;
int traversedSamplesCount = 0;
for (Map.Entry<Double,Counter> e : buckets.entrySet()) {
int count = e.getValue().count;
traversedSamplesCount += count;
if (traversedSamplesCount >= totalCount/2) {
double median = e.getKey();
return median;
}
}
assert false : "Program error finding median of " + this;
return 0; // shouldn't happen
}
 
/** Calculates the so-called sample standard deviation of this sample bucket set. */
private double calcStdDev() {
if (totalCount <= 1)
return 0;
double mean = getMean();
double varianceSum = 0;
for (Map.Entry<Double,Counter> e : buckets.entrySet()) {
double diff = e.getKey() - mean;
varianceSum += (diff * diff) * e.getValue().count;
}
int denom = totalCount - 1;
// (For a "population standard deviation", i.e. if the sample values were the complete value population,
// the denominator would not have been subtracted by 1.)
double stdDev = Math.sqrt(varianceSum / denom);
return stdDev;
}
 
 
 
@Override
public String[] dumpData() {
String[] dump = new String[buckets.size()];
int bucketP = 1;
int bucketW = getDecMagnitude(maxSampleValue);
bucketW = bucketW + ((bucketW-1) / 3) + 1 + bucketP; // take into account: a delimiter for every 3 digits, comma, and fraction digits
int counterW = getDecMagnitude(maxCounter);
counterW = counterW + ((counterW-1) / 3); // take into account: a delimiter for every 3 digits
String format = "%," + bucketW + "." + bucketP + "f: %," + counterW + "d";
int b = 0;
for (Map.Entry<Double,Counter> e : buckets.entrySet()) {
dump[b++] = String.format(format, e.getKey(), e.getValue().count);
}
return dump;
}
 
 
@Override
public String toString() {
String str = String.format("A=%.1f xA=%.1f SD=%.1f Md=%.1f Mi=%.1f Ma=%.1f T=%.1f C=%d bc=%d",
getMean(), getApproxMean(), getStdDev(), getMedian(),
getMin(), getMax(), getSum(), getCount(), getBucketCount());
return str;
}
 
 
/** Wrapper class to handle a mutable int in a collection. */
private static final class Counter {
public int count = 0;
 
@Override
public String toString() {
return String.valueOf(count);
}
}
}
 
 
/** A simple BucketSampleCounter where each bucket represents the interval [-0.5;0.5)
* around an integer (i.e. the sample value rounded to the closest integer).
*/
static class IntBucketSampleCounter extends BucketSampleCounter {
@Override
protected double getBucketMedian(double sampleValue) {
double median = Math.rint(sampleValue);
return median;
}
}
 
 
/** A BucketSampleCounter where each bucket represents a value interval with
* a given decimal precision.
* For example, with a decimal precision of 2 (the default), for each decimal
* order of magnitude (e.g. between 1.0 and 9.9, between 10 and 99 and so on)
* up to 90 intervals (buckets) are stored.
*/
static class DecBucketSampleCounter extends BucketSampleCounter {
@SuppressWarnings("unused")
private final int minOrderOfMagnitude;
private final int decimalPrecision;
private final double minAbsValue;
 
/** Creates a DecBucketSampleCounter with a minimum precision of 0. */
DecBucketSampleCounter() {
this(0);
}
 
/** Creates a DecBucketSampleCounter with the specified minimum order of magnitude. */
DecBucketSampleCounter(int minOrderOfMagnitude) {
this(minOrderOfMagnitude, 2);
}
 
/** Creates a DecBucketSampleCounter with the specified minimum order of magnitude. */
DecBucketSampleCounter(int minOrderOfMagnitude, int decimalPrecision) {
this.minOrderOfMagnitude = minOrderOfMagnitude;
this.decimalPrecision = decimalPrecision;
this.minAbsValue = Math.pow(10, minOrderOfMagnitude);
}
 
 
@Override
protected double getBucketMedian(double sampleValue) {
double median = roundDec(sampleValue, decimalPrecision);
if (Math.abs(median) < minAbsValue)
return 0;
else
return median;
}
}
 
 
 
/** Rounds a double value to the closest double value with the specified
* number of significant decimal digits. E.g:
* <UL>
* <LI>roundDec(111, 1) -> 100
* <LI>roundDec(0.111, 1) -> 0.1
* <LI>roundDec(111, 2) -> 110
* <LI>roundDec(111, 3) -> 111
* <LI>roundDec(111, 4) -> 111
* <LI>roundDec(-111, 2) -> -110
* </UL>
*
* @param value the value to round
* @param digits the number of significant decimal digits, must be equal to or greater than 1
* @return the rounded value
*/
public static final double roundDec(double value, int digits) {
assert digits > 0 : "digits less than 1: " + digits;
if (Math.abs(value) < Double.MIN_NORMAL)
return 0; // value is equal to or very close to zero
int mag = getDecMagnitude(value);
double precisionScale = Math.pow(10, mag-digits);
double result = Math.rint(value / precisionScale) * precisionScale;
return result;
}
 
 
/** Gets the decimal order of magnitude of a value. This represents the position
* of the most significant digit in the decimal representation of the value.
* (Note that the decimal exponent corresponding to the value's magnitude
* equals the result of this method minus 1.)
* The value must not be zero.
* The result is the same regardless of the sign of the passed value.
* E.g:
* <UL>
* <LI> 0,01 -> -1
* <LI> 0,011-> -1
* <LI> 0,09 -> -1
* <LI> 0,1 -> 0
* <LI> 0,11 -> 0
* <LI> 0,9 -> 0
* <LI> 1 -> 1
* <LI> 2 -> 1
* <LI> 9 -> 1
* <LI> 10 -> 2
* <LI> 11 -> 2
* <LI> 99 -> 2
* <LI> 100 -> 3
* <LI> 101 -> 3
* </UL>
*
* @param value a non-zero value
* @return
*/
public static final int getDecMagnitude(double value) {
int mag = ((int) Math.floor(Math.log10(Math.abs(value)))) + 1;
return mag;
}
}
StatCompiler.java
Java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
 
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
 
/** The statistics compiler.
* The static methods provide convenient short-hands for instantiating statistics compilers.
* <P>
* Typically one instance will be created for a measurement area, and samples will be
* put to it for each of the area's metrics, using a unique label per metric.
* <P>
* Usage example:
* <P>
<code><pre>
public class IOPacker {
public static final StatCompiler STAT = StatCompiler.getStatCompiler(IOPacker.class);
...
public void pack() {
long startTime = System.nanoTime();
...
STAT.putSample("Pack time [ms]", (System.nanoTime()-startTime)/1e6);
}
}
</pre></code>
* <P>
* The compiled statistics can be logged using the following simple statement,
* for example just before the program exits:
* <P>
<code>
StatCompiler.logAllStats(LOG, Level.INFO);
</code>
* <P>
* The static methods are thread-safe.
* <P>
* Instances of StatCompiler are not thread-safe.
*
*/
public class StatCompiler {
 
/*--- class members ---*/
 
/** Interface of a StatisticsFormatter. */
public static interface StatisticsFormatter {
public String toString(SampleSet sampleSet);
}
 
 
/** The default StatisticsFormatter used. */
private static final class DefaultStatFormatter implements StatisticsFormatter {
@Override
public String toString(SampleSet sampleSet) {
String str = String.format("sum: %,7.0f; avg: %,5.1f (sd +- %,.1f);\tcount: %,d; min<med<max: %,5.1f < %,5.1f < %,5.1f",
sampleSet.getSum(), sampleSet.getMean(), sampleSet.getStdDev(), sampleSet.getCount(),
sampleSet.getMin(), sampleSet.getMedian(), sampleSet.getMax());
return str;
}
}
 
private static final StatisticsFormatter defaultStatFormatter = new DefaultStatFormatter();
private static SampleSetFactory defaultSampleSetFactory = Samples.getDecBucketSampleSetFactory(-1, 2);
private static final Map<String,StatCompiler> instances = new LinkedHashMap<String,StatCompiler>();
 
 
/** Sets the SampleSetFactory to be used by default by the getStatCompiler() methods.
* @param factory the SampleSetFactory to use
*/
public static void setDefaultSampleSetFactory(SampleSetFactory factory) {
synchronized (instances) {
defaultSampleSetFactory = factory;
}
}
 
 
/** Gets a StatCompiler that shall be registered (associated) with the specified class.
* This method is thread-safe (though the returned StatCompiler instance is not).
* @param clazz the class to associate the StatCompiler with
* @return a new or previously existing StatCompiler
*/
public static StatCompiler getStatCompiler(Class<?> clazz) {
return getStatCompiler(clazz.getSimpleName());
}
 
/** Gets a StatCompiler that shall be registered (associated) with the specified name.
* This method is thread-safe (though the returned StatCompiler instance is not).
* @param name the name to associate the StatCompiler with
* @return a new or previously existing StatCompiler
*/
public static StatCompiler getStatCompiler(String name) {
synchronized (instances) {
StatCompiler sc = instances.get(name);
if (sc == null) {
sc = new StatCompiler(name, defaultSampleSetFactory);
sc.setStatFormatter(defaultStatFormatter);
instances.put(name, sc);
}
return sc;
}
}
 
 
/** Outputs all the registered StatCompiler instances' statistics via the provider logger.
* @param outputLog the logger to use for the output
* @param logLevel the logging level to use for the output
*/
public static void logAllStats(Logger outputLog, Level logLevel) {
List<StatCompiler> statCompilers;
synchronized (instances) {
statCompilers = new ArrayList<StatCompiler>(instances.values());
}
for (StatCompiler sc : statCompilers) {
sc.logStats(outputLog, logLevel);
}
}
 
 
 
 
/*--- instance members ---*/
 
private final String name;
private final SampleSetFactory sampleSetFactory;
private final Map<String,SampleSet> sampleSets = new LinkedHashMap<String,SampleSet>();
private StatisticsFormatter statFormatter;
private boolean printDataDump = false;
 
/** Creates a new StatCompiler instance.
* @param name the name of this StatCompiler
* @param sampleSetFactory the factory it will use to create a sample set for each metric
*/
public StatCompiler(String name, SampleSetFactory sampleSetFactory) {
this.name = name;
this.sampleSetFactory = sampleSetFactory;
}
 
 
/** Gets the name of this StatCompiler. */
public String getName() {
return name;
}
 
/** Gets the SampleSetFactory this StatCompiler uses. */
public SampleSetFactory getSampleSetFactory() {
return sampleSetFactory;
}
 
 
/** Gets the StatisticsFormatter this StatCompiler uses. */
public StatisticsFormatter getStatFormatter() {
return statFormatter;
}
 
/** Sets the StatisticsFormatter this StatCompiler uses. */
public void setStatFormatter(StatisticsFormatter statPrinter) {
this.statFormatter = statPrinter;
}
 
 
/** Returns true if print-data-dump is enabled for this StatCompiler.
* If enabled, the sample sets' detailed data will be output along with the
* compiled statistics. This is disabled by default.
*/
public boolean isPrintDataDumpEnabled() {
return printDataDump;
}
 
/** Sets print-data-dump to be enabled or disabled for this StatCompiler.
* If enabled, the sample sets' detailed data will be output along with the
* compiled statistics. This is disabled by default.
*/
public void setPrintDataDumpEnabled(boolean printDataDump) {
this.printDataDump = printDataDump;
}
 
 
/** Puts a new data sample to this statistics compiler.
* @param label the label of the sampled metric
* @param value the sample value
*/
public void putSample(String label, double value) {
SampleSet sampleSet = sampleSets.get(label);
if (sampleSet == null) {
sampleSet = sampleSetFactory.makeSampleSet();
sampleSets.put(label, sampleSet);
}
sampleSet.putSample(value);
}
 
 
/** Outputs the statistics of this StatCompiler to the provided Logger using the specified log level.
*/
public void logStats(Logger outputLog, Level logLevel) {
String label = "[" + name + "] ";
for (Map.Entry<String,SampleSet> e : sampleSets.entrySet()) {
String rowHeader = label + e.getKey() + ": ";
outputLog.log(logLevel, rowHeader + formatStats(e.getValue()));
if (printDataDump) {
for (String s : e.getValue().dumpData())
outputLog.log(logLevel, rowHeader + s);
}
}
}
 
 
/** Outputs the statistics of this StatCompiler to the provided stream.
* @param out the PrintStream to print the output to (e.g. System.out)
*/
public void printStats(PrintStream out) {
String label = "[" + name + "] ";
for (Map.Entry<String,SampleSet> e : sampleSets.entrySet()) {
String rowHeader = label + e.getKey() + ": ";
out.println(rowHeader + formatStats(e.getValue()));
if (printDataDump) {
for (String s : e.getValue().dumpData())
out.println(rowHeader + s);
}
}
}
 
 
/** Returns a string representation of the provided SampleSet suitable for console display.
* If a StatisticPrinter is set, it is used to produce the output. Otherwise the sample set's
* toString() method is used.
* This method may be overridden by subclasses to customize the output.
* @param sampleSet the sample set to print
* @return a console-friendly string representation of the provided sample set
*/
public String formatStats(SampleSet sampleSet) {
if (statFormatter != null)
return statFormatter.toString(sampleSet);
else
return sampleSet.toString();
}
 
 
 
@Override
public String toString() {
return getClass().getSimpleName() + "@" + name;
}
 
 
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((name == null) ? 0 : name.hashCode());
return result;
}
 
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
StatCompiler other = (StatCompiler) obj;
if (name == null) {
if (other.name != null)
return false;
}
else if (!name.equals(other.name))
return false;
return true;
}
}

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.