Skip to content

Instantly share code, notes, and snippets.

@nicolasembleton
Last active July 28, 2021 12:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nicolasembleton/af8e8c09928c6911fe188e1a302c3bdf to your computer and use it in GitHub Desktop.
Save nicolasembleton/af8e8c09928c6911fe188e1a302c3bdf to your computer and use it in GitHub Desktop.
Benchmark of the multiple ways to find min and max in a large List in Dart
import 'dart:math';
import 'package:stats/stats.dart';
class Benchmark {
Benchmark({this.UPPER_BOUND = 100000, this.withSlow = false}) {
reset();
}
int UPPER_BOUND;
bool withSlow;
Map<String, Function> testsFunctions = {
'smallestUsingForLoopVar': findMinUsingForLoopVar,
'biggestUsingForLoopVar': findMaxUsingForLoopVar,
'smallestUsingForLoopGenericType': findMinUsingForLoopGenericType,
'biggestUsingForLoopGenericType': findMaxUsingForLoopGenericType,
'smallestUsingReverseForLoopVar': findMinUsingReverseForLoopVar,
'biggestUsingReverseForLoopVar': findMaxUsingReverseForLoopVar,
'smallestUsingReverseForLoopGenericType': findMinUsingReverseForLoopGenericType,
'biggestUsingReverseForLoopGenericType': findMaxUsingReverseForLoopGenericType,
};
Map<String, Function> slowTestsFunctions = {
'smallestUsingForEachLoop': findMinUsingForEachLoop,
'biggestUsingForEachLoop': findMaxUsingForEachLoop,
'smallestUsingReduce': findMinUsingReduce,
'biggestUsingReduce': findMaxUsingReduce,
'smallestUsingFold': findMinUsingFold,
'biggestUsingFold': findMaxUsingFold,
};
Map<String, Function> get tests {
if(withSlow) return {}..addAll(testsFunctions)..addAll(slowTestsFunctions);
return testsFunctions;
}
late Map<String, List<double>> usingForLoopData;
List<T> generateList<T extends num>({String type = 'int'}) {
var el = <T>[];
var rng = Random();
for(var i=0; i<UPPER_BOUND;i++) {
if(type == 'double') {
// double
el.add((rng.nextDouble() * UPPER_BOUND) as T);
} else {
// int
el.add(rng.nextInt(UPPER_BOUND) as T);
}
}
return el;
}
void addMeasurementToList({required String testKey, required double elapsed, bool andPrint = false}) {
assert(usingForLoopData.containsKey(testKey), true);
usingForLoopData[testKey]!.add(elapsed);
if(andPrint) {
print('$testKey, elapsed: $elapsed');
}
}
Stats getStatsForKey(key) {
return Stats.fromData(usingForLoopData[key] as List<num>).withPrecision(3);
}
Map<String, dynamic> getStats() {
return { for (var key in tests.keys) key : getStatsForKey(key)};
}
// Reset counters
void reset() {
usingForLoopData = { for (var value in tests.keys) value : [] };
}
}
void main() {
print('Benchmarking min / max finders');
// Good number for testing: 10000000
// Reasonable number for good quality output: 388805700
// rng.nextInt(429496729);
// Biggest possible number: 4294967296
var UPPER_BOUND = 10000000;
var BENCHMARKING_ROUNDS = 250;
var benchmark = Benchmark(UPPER_BOUND: UPPER_BOUND, withSlow: false);
var stopwatch = Stopwatch()..start();
print('UPPER_BOUND: $UPPER_BOUND');
print('BENCHMARKING_ROUNDS: $BENCHMARKING_ROUNDS');
print('**** INT suite ****');
var listInt = benchmark.generateList(); // List<int>
print('List<int> created, elapsed: ${stopwatch.elapsedMilliseconds/1000}');
stopwatch.reset(); // reset the watch
benchmark.tests.keys.forEach((element) {
// Run tests
for(var i=0;i<BENCHMARKING_ROUNDS;i++) {
benchmark.tests[element]!.call(listInt);
var elapsed = stopwatch.elapsedMilliseconds/1000;
benchmark.addMeasurementToList(testKey: element, elapsed: elapsed, andPrint: false);
stopwatch.reset();
}
print('$element: ${benchmark.getStatsForKey(element)}');
});
print('**** DOUBLE suite ****');
benchmark.reset();
var listDouble = benchmark.generateList(type: 'double'); // List<double>
print('List<double> created, elapsed: ${stopwatch.elapsedMilliseconds/1000}');
stopwatch.reset();
benchmark.tests.keys.forEach((element) {
// Run tests
for(var i=0;i<BENCHMARKING_ROUNDS;i++) {
benchmark.tests[element]!.call(listDouble);
var elapsed = stopwatch.elapsedMilliseconds/1000;
benchmark.addMeasurementToList(testKey: element, elapsed: elapsed, andPrint: false);
stopwatch.reset();
}
print('$element: ${benchmark.getStatsForKey(element)}');
});
// Unsurprisingly, sort is VERY slow. 20x forEach, which is itself very slow.
// Not running the tests for sanity sake, but feel free to if you want to see.
//List el2_min = List.from(el);
//List el2_max = List.from(el);
//print('Cloning the list for modifying methods: elapsed: ${stopwatch.elapsedMilliseconds/1000}');
//stopwatch.reset();
//int smallestUsingSort = findMinUsingSort(el2_min);
//print('Find min using sort ($smallestUsingSort), elapsed: ${stopwatch.elapsedMilliseconds/1000}');
//stopwatch.reset();
//int biggestUsingSort = findMaxUsingSort(el2_max);
//print('Find max using sort ($biggestUsingSort), elapsed: ${stopwatch.elapsedMilliseconds/1000}');
//stopwatch.reset();
}
num findMinUsingForLoopVar(List el) {
var smallest = el.first;
for (var i = 0; i < el.length; i++) {
if (el[i] < smallest) {
smallest = el[i];
}
}
return smallest;
}
num findMaxUsingForLoopVar(List el) {
var biggest = el.first;
for (var i = 0; i < el.length; i++) {
if (el[i] > biggest) {
biggest = el[i];
}
}
return biggest;
}
T findMinUsingForLoopGenericType<T extends num>(List el) {
var smallest = el.first as T;
for (var i = 0; i < el.length; i++) {
if (el[i] < smallest) {
smallest = el[i];
}
}
return smallest;
}
T findMaxUsingForLoopGenericType<T extends num>(List el) {
var biggest = el.first;
for (var i = 0; i < el.length; i++) {
if (el[i] > biggest) {
biggest = el[i];
}
}
return biggest;
}
num findMinUsingReverseForLoopVar(List el) {
var smallest = el.first;
for (var i = el.length-1; i >= 0; i--) {
if (el[i] < smallest) {
smallest = el[i];
}
}
return smallest;
}
num findMaxUsingReverseForLoopVar(List el) {
var biggest = el.first;
for (var i = el.length-1; i >= 0; i--) {
if (el[i] > biggest) {
biggest = el[i];
}
}
return biggest;
}
T findMinUsingReverseForLoopGenericType<T extends num>(List el) {
var smallest = el.first as T;
for (var i = el.length-1; i >= 0; i--) {
if (el[i] < smallest) {
smallest = el[i];
}
}
return smallest;
}
T findMaxUsingReverseForLoopGenericType<T extends num>(List el) {
var biggest = el.first as T;
for (var i = el.length-1; i >= 0; i--) {
if (el[i] > biggest) {
biggest = el[i];
}
}
return biggest;
}
num findMinUsingSort(List el) {
el.sort();
return el.first;
}
num findMaxUsingSort(List el) {
el.sort();
return el.last;
}
num findMinUsingForEachLoop(List el) {
var smallest = el.first;
el.forEach((val) => {
if(val < smallest) {smallest = val}
});
return smallest;
}
num findMaxUsingForEachLoop(List el) {
var biggest = el.first;
el.forEach((val) => {
if(val > biggest) {biggest = val}
});
return biggest;
}
num findMinUsingFold(List el) {
return (el as List<num>).fold(el.first, min);
}
num findMaxUsingFold(List el) {
return (el as List<num>).fold(el.first, max);
}
num findMinUsingReduce(List el) {
return (el as List<num>).reduce(min);
}
num findMaxUsingReduce(List el) {
return (el as List<num>).reduce(max);
}
@nicolasembleton
Copy link
Author

Simply run with dart min_max_benchmark.dart

Tested with: Dart SDK version: 2.13.4 (stable) (Wed Jun 23 13:08:41 2021 +0200) on "macos_x64"

Results

Benchmarking min / max finders
UPPER_BOUND: 429496729
List created, elapsed: 14.776
Find min using for loops (0), elapsed: 0.751
Find max using for loops (429496728), elapsed: 0.757
Find min using reverse for loops (0), elapsed: 0.782
Find max using reverse for loops (429496728), elapsed: 0.661
Find min using forEach loops (0), elapsed: 7.304
Find max using forEach loops (429496728), elapsed: 6.504
Find min using reduce (0), elapsed: 2.904
Find max using reduce (429496728), elapsed: 2.802
Find min using fold (0), elapsed: 2.953
Find max using fold (429496728), elapsed: 2.652

Verdict

Generally speaking, ascending for loop is fastest for min, descending (reverse) for loop is fastest for max (most likely having to do with bell curve number distribution, especially since the dataset is random). But overall the different is small, so reverse for loop has a slight overall advantage, so depending on your need, on what you do most (min or max search) and the nature of your dataset, you may pick accordingly.

To be really accurate, we would need to run this test a certain amount of times and compare the data trends. I'll see to that someday.

Notes

  • Numbers are randomly generated and set. To try with a cryptographically secure generate, replace new Random() by new Random.secure() (generation is unsurprisingly MUCH slower -- see benchmark output)
  • I passed on the sort method, it was 20x worse than forEach, which is itself not great.
  • I did not try to not have dart:math as a dependency. Not sure this would have a huge speed impact.
  • If you find anything that could be done better, feel free to point out. It was coded with VIM so there may be some styling oddities.

Thanks to the following resources for listing:

@nicolasembleton
Copy link
Author

Tried a few more things playing with Types. Some more results which appear to be consistent over multiple runs, but need more data to confirm:

Benchmarking min / max finders
UPPER_BOUND: 388805700
**** INT suite ****
List<int> created, elapsed: 12.461
Find min using for loops with typed control value (2), elapsed: 0.75
Find max using for loops with typed control value (388805699), elapsed: 0.674
Find min using for loops with var control value (2, elapsed: 0.621
Find max using for loops with var control value (388805699), elapsed: 0.623
Find min using for loops with Generic Type (2), elapsed: 0.713
Find max using for loops with Generic Type (388805699), elapsed: 0.612
Find min using reverse for loops with typed control value (2), elapsed: 0.727
Find max using reverse for loops with typed control value (388805699), elapsed: 0.74
Find min using reverse for loops with var control value (2), elapsed: 0.62
Find max using reverse for loops with var control value (388805699), elapsed: 0.614
Find min using reverse for loops with Generic Type (2), elapsed: 0.704
Find max using reverse for loops with Generic Type (388805699), elapsed: 0.679
Find min using forEach loops (2), elapsed: 5.089
Find max using forEach loops (388805699), elapsed: 4.606
Find min using reduce (2), elapsed: 2.167
Find max using reduce (388805699), elapsed: 2.277
Find min using fold (2), elapsed: 2.21
Find max using fold (388805699), elapsed: 2.114

Interestingly, the generic type functions failed when working with doubles... Can't figure out why yet.

@nicolasembleton
Copy link
Author

Refactored and batched each test for much better statistical accuracy on int min / max. Interesting outcome with 10.000.000 values and 1000 rounds of tests each (removed the slowest ones):

Benchmarking min / max finders
List<int> created, elapsed: 0.372
UPPER_BOUND: 10000000
BENCHMARKING_ROUNDS: 1000
**** INT suite ****
smallestUsingForLoop: {count: 1000, average: 0.0173, min: 0.014, max: 0.035, median: 0.017, standardDeviation: 0.00123}
biggestUsingForLoop: {count: 1000, average: 0.0169, min: 0.014, max: 0.024, median: 0.017, standardDeviation: 0.0011}
smallestUsingForLoopVar: {count: 1000, average: 0.0147, min: 0.013, max: 0.018, median: 0.015, standardDeviation: 0.000718}
biggestUsingForLoopVar: {count: 1000, average: 0.0148, min: 0.013, max: 0.018, median: 0.015, standardDeviation: 0.000691}
smallestUsingForLoopGenericType: {count: 1000, average: 0.0173, min: 0.014, max: 0.03, median: 0.017, standardDeviation: 0.00159}
biggestUsingForLoopGenericType: {count: 1000, average: 0.0125, min: 0.01, max: 0.015, median: 0.012, standardDeviation: 0.000632}
smallestUsingReverseForLoop: {count: 1000, average: 0.0173, min: 0.015, max: 0.028, median: 0.017, standardDeviation: 0.00113}
biggestUsingReverseForLoop: {count: 1000, average: 0.0173, min: 0.015, max: 0.022, median: 0.017, standardDeviation: 0.00119}
smallestUsingReverseForLoopVar: {count: 1000, average: 0.0152, min: 0.013, max: 0.024, median: 0.015, standardDeviation: 0.000777}
biggestUsingReverseForLoopVar: {count: 1000, average: 0.0104, min: 0.008, max: 0.016, median: 0.01, standardDeviation: 0.000622}
smallestUsingReverseForLoopGenericType: {count: 1000, average: 0.0169, min: 0.015, max: 0.028, median: 0.017, standardDeviation: 0.00123}
biggestUsingReverseForLoopGenericType: {count: 1000, average: 0.0171, min: 0.015, max: 0.022, median: 0.017, standardDeviation: 0.00114} 

@nicolasembleton
Copy link
Author

Final tests on 50M integers and 2500 iterations for each function:

Benchmarking min / max finders
List<int> created, elapsed: 1.767
UPPER_BOUND: 50000000
BENCHMARKING_ROUNDS: 2500
**** INT suite ****
smallestUsingForLoop: {count: 2500, average: 0.0867, min: 0.075, max: 0.14, median: 0.086, standardDeviation: 0.00385}
biggestUsingForLoop: {count: 2500, average: 0.0866, min: 0.075, max: 0.114, median: 0.086, standardDeviation: 0.00344}
smallestUsingForLoopVar: {count: 2500, average: 0.0528, min: 0.049, max: 0.083, median: 0.053, standardDeviation: 0.00166}
biggestUsingForLoopVar: {count: 2500, average: 0.0639, min: 0.054, max: 0.079, median: 0.064, standardDeviation: 0.00187}
smallestUsingForLoopGenericType: {count: 2500, average: 0.0843, min: 0.074, max: 0.118, median: 0.084, standardDeviation: 0.00479}
biggestUsingForLoopGenericType: {count: 2500, average: 0.0635, min: 0.053, max: 0.085, median: 0.063, standardDeviation: 0.002}
smallestUsingReverseForLoop: {count: 2500, average: 0.0848, min: 0.075, max: 0.108, median: 0.085, standardDeviation: 0.00464}
biggestUsingReverseForLoop: {count: 2500, average: 0.0873, min: 0.075, max: 0.145, median: 0.086, standardDeviation: 0.00442}
smallestUsingReverseForLoopVar: {count: 2500, average: 0.0544, min: 0.049, max: 0.067, median: 0.054, standardDeviation: 0.00178}
biggestUsingReverseForLoopVar: {count: 2500, average: 0.0762, min: 0.065, max: 0.093, median: 0.076, standardDeviation: 0.00262}
smallestUsingReverseForLoopGenericType: {count: 2500, average: 0.0878, min: 0.076, max: 0.115, median: 0.087, standardDeviation: 0.00419}
biggestUsingReverseForLoopGenericType: {count: 2500, average: 0.0872, min: 0.078, max: 0.124, median: 0.086, standardDeviation: 0.00354}

@nicolasembleton
Copy link
Author

nicolasembleton commented Jul 28, 2021

Ok I'm happy with where it is now. More refactor, added the slow tests as an option if you want to run them, made all functions generic to any num type and added the benchmark suite for doubles. Good to go.

Final results (most likely) without slow methods:

Benchmarking min / max finders
UPPER_BOUND: 10000000
BENCHMARKING_ROUNDS: 250
**** INT suite ****
List<int> created, elapsed: 0.384
smallestUsingForLoopVar: {count: 250, average: 0.0153, min: 0.013, max: 0.038, median: 0.015, standardDeviation: 0.0019}
biggestUsingForLoopVar: {count: 250, average: 0.0149, min: 0.013, max: 0.024, median: 0.015, standardDeviation: 0.000884}
smallestUsingForLoopGenericType: {count: 250, average: 0.0163, min: 0.014, max: 0.021, median: 0.016, standardDeviation: 0.0011}
biggestUsingForLoopGenericType: {count: 250, average: 0.014, min: 0.012, max: 0.025, median: 0.014, standardDeviation: 0.00176}
smallestUsingReverseForLoopVar: {count: 250, average: 0.0117, min: 0.01, max: 0.016, median: 0.012, standardDeviation: 0.000761}
biggestUsingReverseForLoopVar: {count: 250, average: 0.0162, min: 0.013, max: 0.019, median: 0.016, standardDeviation: 0.00106}
smallestUsingReverseForLoopGenericType: {count: 250, average: 0.0183, min: 0.015, max: 0.021, median: 0.018, standardDeviation: 0.00129}
biggestUsingReverseForLoopGenericType: {count: 250, average: 0.0187, min: 0.015, max: 0.024, median: 0.019, standardDeviation: 0.00139}
**** DOUBLE suite ****
List<double> created, elapsed: 1.335
smallestUsingForLoopVar: {count: 250, average: 0.0705, min: 0.061, max: 0.103, median: 0.069, standardDeviation: 0.00763}
biggestUsingForLoopVar: {count: 250, average: 0.0576, min: 0.049, max: 0.083, median: 0.057, standardDeviation: 0.00385}
smallestUsingForLoopGenericType: {count: 250, average: 0.0511, min: 0.044, max: 0.066, median: 0.051, standardDeviation: 0.00326}
biggestUsingForLoopGenericType: {count: 250, average: 0.057, min: 0.051, max: 0.069, median: 0.057, standardDeviation: 0.00314}
smallestUsingReverseForLoopVar: {count: 250, average: 0.0632, min: 0.058, max: 0.084, median: 0.062, standardDeviation: 0.00432}
biggestUsingReverseForLoopVar: {count: 250, average: 0.0555, min: 0.048, max: 0.067, median: 0.055, standardDeviation: 0.00239}
smallestUsingReverseForLoopGenericType: {count: 250, average: 0.0567, min: 0.05, max: 0.073, median: 0.056, standardDeviation: 0.00327}
biggestUsingReverseForLoopGenericType: {count: 250, average: 0.0412, min: 0.036, max: 0.062, median: 0.041, standardDeviation: 0.00315}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment