Created
September 8, 2013 20:51
-
-
Save sushain97/6488296 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private static Pair<List<Integer>,List<Integer>> calculateStats(List<Integer> list) | |
{ | |
double[] data = new double[list.size()]; | |
for(int i = 0; i < list.size(); i++) | |
data[i] = list.get(i); | |
DescriptiveStatistics dStats = new DescriptiveStatistics(data); | |
List<Integer> summary = new ArrayList<Integer>(5); | |
summary.add((int) dStats.getMin()); //Minimum | |
summary.add((int) dStats.getPercentile(25)); //Lower Quartile (Q1) | |
summary.add((int) dStats.getPercentile(50)); //Middle Quartile (Median - Q2) | |
summary.add((int) dStats.getPercentile(75)); //High Quartile (Q3) | |
summary.add((int) dStats.getMax()); //Maxiumum | |
List<Integer> outliers = new ArrayList<Integer>(); | |
if(list.size() > 5 && dStats.getStandardDeviation() > 0) //Only remove outliers if relatively normal | |
{ | |
double mean = dStats.getMean(); | |
double stDev = dStats.getStandardDeviation(); | |
NormalDistribution normalDistribution = new NormalDistribution(mean, stDev); | |
Iterator<Integer> listIterator = list.iterator(); | |
double significanceLevel = .50 / list.size(); | |
while(listIterator.hasNext()) | |
{ | |
int num = listIterator.next(); | |
double pValue = normalDistribution.cumulativeProbability(num); | |
if(pValue < significanceLevel) //Chauvenet's Criterion for Outliers | |
{ | |
outliers.add(num); | |
listIterator.remove(); | |
} | |
} | |
if(list.size() != dStats.getN()) //If and only if outliers have been removed | |
{ | |
double[] significantData = new double[list.size()]; | |
for(int i = 0; i < list.size(); i++) | |
significantData[i] = list.get(i); | |
dStats = new DescriptiveStatistics(significantData); | |
summary.set(0, (int) dStats.getMin()); | |
summary.set(4, (int) dStats.getMax()); | |
} | |
} | |
return new Pair<List<Integer>,List<Integer>>(summary, outliers); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment