Skip to content

Instantly share code, notes, and snippets.

@si14
Last active August 29, 2015 14:05
Show Gist options
  • Save si14/9902fca946a25e01d8a8 to your computer and use it in GitHub Desktop.
Save si14/9902fca946a25e01d8a8 to your computer and use it in GitHub Desktop.
import org.apache.commons.math3.util.FastMath;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.util.Random;
import java.util.concurrent.*;
import java.util.stream.DoubleStream;
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 5, time = 1)
@State(Scope.Benchmark)
public class FindMaxBenchmark {
@Param({"10000000"})
int arrayLen;
@Param({"2", "4", "8", "32", "256"})
int pieces;
double[] values;
double trueMaxValue;
ExecutorService pool;
double maxValue;
@Setup
public void prepare() {
values = new double[arrayLen];
final Random gen = new Random(42);
for (int i = 0; i < arrayLen; i++) {
values[i] = gen.nextDouble();
}
trueMaxValue = DoubleStream.of(values).max().getAsDouble();
pool = Executors.newFixedThreadPool(ForkJoinPool.getCommonPoolParallelism());
}
@TearDown
public void check() {
assert maxValue == trueMaxValue : "max should be right";
pool.shutdown();
}
public static class RecursiveFindMax extends RecursiveTask<Double> {
final int lo;
final int hi;
final double[] array;
final int threshold;
final int pieces;
RecursiveFindMax(final double[] array, final int pieces) {
this(array, 0, array.length, pieces);
}
RecursiveFindMax(final double[] array, final int lo, final int hi, final int pieces) {
this.array = array;
this.lo = lo;
this.hi = hi;
this.pieces = pieces;
threshold = array.length / pieces;
}
@Override
protected Double compute() {
if (hi - lo <= threshold) {
double max = FastMath.exp(array[lo]);
for (int i = lo + 1; i < hi; i++) {
// final double exp = FastMath.exp(array[i]);
final double exp = array[i];
if (exp > max) {
max = exp;
}
}
return max;
} else {
final int mid = lo + (hi - lo) / 2;
final RecursiveFindMax left = new RecursiveFindMax(array, lo, mid, pieces);
final RecursiveFindMax right = new RecursiveFindMax(array, mid, hi, pieces);
right.fork();
final double leftAns = left.compute();
final double rightAns = right.join();
return Math.max(leftAns, rightAns);
}
}
public static double apply(final double[] array, final int pieces) {
return ForkJoinPool.commonPool().invoke(new RecursiveFindMax(array, pieces));
}
}
double findMax(final double[] array) {
double max = FastMath.exp(array[0]);
for (final double x : array) {
// final double exp = FastMath.exp(x);
final double exp = x;
if (exp > max) {
max = exp;
}
}
return max;
}
double findMaxFutures(final double[] array, final int pieces, final ExecutorService pool) {
final int blockSize = array.length / pieces;
final FutureTask<Double>[] futures = new FutureTask[pieces];
int lo = 0;
int hi = blockSize;
for (int i = 0; i < pieces; i++) {
final int finalLo = lo;
final int finalHi = hi;
futures[i] = new FutureTask<>(() -> {
double localMax = FastMath.exp(array[finalLo]);
for (int j = finalLo; j < finalHi; j++) {
// final double exp = FastMath.exp(array[j]);
final double exp = array[j];
if (exp > localMax) {
localMax = exp;
}
}
return localMax;
});
pool.execute(futures[i]);
lo = hi;
hi += blockSize;
}
double max = array[0];
for (int i = 0; i < pieces; i++) {
try {
final double val = futures[i].get();
if (val > max) {
max = val;
}
} catch (InterruptedException|ExecutionException e) {
throw new RuntimeException(e);
}
}
return max;
}
public static class CountedFindMax extends CountedCompleter<Double> {
final int lo;
final int hi;
final double[] array;
final int blockSize;
double max;
CountedFindMax[] siblings;
CountedFindMax(final double[] array, final int blockSize, final int maxSiblings) {
this(null, array, 0, array.length, blockSize, maxSiblings);
}
CountedFindMax(final CountedCompleter<?> parent,
final double[] array, final int lo, final int hi,
final int blockSize, final int maxSiblings) {
super(parent);
this.array = array;
this.lo = lo;
this.hi = hi;
this.blockSize = blockSize;
this.siblings = new CountedFindMax[maxSiblings];
}
@Override
public void compute() {
final int l = lo;
int h = hi;
int j = 0;
while (h - l > blockSize) {
final int mid = (l + h + 1) / 2;
addToPendingCount(1);
siblings[j++] = (CountedFindMax) new CountedFindMax(this,
array, mid, h, blockSize,
siblings.length - j).fork();
h = mid;
}
if (h > l) {
max = FastMath.exp(array[l]);
for (int i = l + 1; i < h; i++) {
//final double exp = FastMath.exp(array[i]);
final double exp = array[i];
if (exp > max) {
max = exp;
}
}
}
tryComplete();
}
@Override
public void onCompletion(final CountedCompleter<?> caller) {
if (caller != this) {
for (final CountedFindMax sibling : siblings) {
if (sibling != null) { // this shouldn't be needed, but somehow I underfill siblings
if (sibling.max > this.max) {
this.max = sibling.max;
}
}
}
}
}
@Override
public Double getRawResult() {
return max;
}
public static double apply(final double[] array, final int pieces) {
final int blockSize = (array.length + pieces - 1) / pieces;
final int siblingCount = new Double(Math.floor(Math.log(1. / pieces) / Math.log(0.5))).intValue();
return new CountedFindMax(array, blockSize, siblingCount).invoke();
}
}
@Benchmark
public void simpleMax() {
maxValue = findMax(values);
}
@Benchmark
public void fjRecursiveMax() {
maxValue = RecursiveFindMax.apply(values, pieces);
}
@Benchmark
public void futuresMax() {
maxValue = findMaxFutures(values, pieces, pool);
}
@Benchmark
public void fjCountedMax() {
maxValue = CountedFindMax.apply(values, pieces);
}
public static void main(final String[] args) throws RunnerException {
final Options opts = new OptionsBuilder()
.include(".*" + FindMaxBenchmark.class.getSimpleName() + ".*")
.forks(1)
.build();
new Runner(opts).run();
}
}
Benchmark (arrayLen) (pieces) Mode Samples Score Score error Units
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 2 thrpt 5 152.022 26.515 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 4 thrpt 5 161.961 13.154 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 8 thrpt 5 167.746 4.560 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 32 thrpt 5 168.868 3.247 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 256 thrpt 5 170.415 4.082 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 2 thrpt 5 161.938 6.353 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 4 thrpt 5 165.995 6.204 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 8 thrpt 5 160.432 13.595 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 32 thrpt 5 156.904 22.049 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 256 thrpt 5 152.807 33.084 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 2 thrpt 5 140.285 23.045 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 4 thrpt 5 156.551 13.103 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 8 thrpt 5 159.055 13.238 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 32 thrpt 5 156.826 27.912 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 256 thrpt 5 152.148 33.182 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 2 thrpt 5 110.729 22.986 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 4 thrpt 5 110.015 20.115 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 8 thrpt 5 94.124 14.482 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 32 thrpt 5 107.391 12.271 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 256 thrpt 5 107.299 29.436 ops/s
import org.apache.commons.math3.util.FastMath;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.util.Random;
import java.util.concurrent.*;
import java.util.stream.DoubleStream;
/**
* Playground for parallel array operations
* @author Dmitry Groshev
* @date 12/08/2014
*/
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 5, time = 1)
@State(Scope.Benchmark)
public class FindMaxBenchmark {
//@Param({"100", "1000", "10000", "100000", "1000000", "10000000"})
//@Param({"1000000", "10000000"})
@Param({"10000000"})
int arrayLen;
@Param({"2", "4", "8", "32", "256"})
int pieces;
double[] values;
double trueMaxValue;
ExecutorService pool;
double maxValue;
@Setup
public void prepare() {
values = new double[arrayLen];
final Random gen = new Random(42);
for (int i = 0; i < arrayLen; i++) {
values[i] = gen.nextDouble();
}
trueMaxValue = DoubleStream.of(values).max().getAsDouble();
pool = Executors.newFixedThreadPool(ForkJoinPool.getCommonPoolParallelism());
}
@TearDown
public void check() {
assert maxValue == trueMaxValue : "max should be right";
pool.shutdown();
}
public static class RecursiveFindMax extends RecursiveTask<Double> {
final int lo;
final int hi;
final double[] array;
final int threshold;
final int pieces;
RecursiveFindMax(final double[] array, final int pieces) {
this(array, 0, array.length, pieces);
}
RecursiveFindMax(final double[] array, final int lo, final int hi, final int pieces) {
this.array = array;
this.lo = lo;
this.hi = hi;
this.pieces = pieces;
threshold = array.length / pieces;
}
@Override
protected Double compute() {
if (hi - lo <= threshold) {
double max = FastMath.exp(array[lo]);
for (int i = lo + 1; i < hi; i++) {
final double exp = FastMath.exp(array[i]);
if (exp > max) {
max = exp;
}
}
return max;
} else {
final int mid = lo + (hi - lo) / 2;
final RecursiveFindMax left = new RecursiveFindMax(array, lo, mid, pieces);
final RecursiveFindMax right = new RecursiveFindMax(array, mid, hi, pieces);
right.fork();
final double leftAns = left.compute();
final double rightAns = right.join();
return Math.max(leftAns, rightAns);
}
}
public static double apply(final double[] array, final int pieces) {
return ForkJoinPool.commonPool().invoke(new RecursiveFindMax(array, pieces));
}
}
double findMax(final double[] array) {
double max = FastMath.exp(array[0]);
for (final double x : array) {
final double exp = FastMath.exp(x);
if (exp > max) {
max = exp;
}
}
return max;
}
double findMaxFutures(final double[] array, final int pieces, final ExecutorService pool) {
final int blockSize = array.length / pieces;
final FutureTask<Double>[] futures = new FutureTask[pieces];
int lo = 0;
int hi = blockSize;
for (int i = 0; i < pieces; i++) {
final int finalLo = lo;
final int finalHi = hi;
futures[i] = new FutureTask<>(() -> {
double localMax = FastMath.exp(array[finalLo]);
for (int j = finalLo; j < finalHi; j++) {
final double exp = FastMath.exp(array[j]);
if (exp > localMax) {
localMax = exp;
}
}
return localMax;
});
pool.execute(futures[i]);
lo = hi;
hi += blockSize;
}
double max = array[0];
for (int i = 0; i < pieces; i++) {
try {
final double val = futures[i].get();
if (val > max) {
max = val;
}
} catch (InterruptedException|ExecutionException e) {
throw new RuntimeException(e);
}
}
return max;
}
public static class CountedFindMax extends CountedCompleter<Double> {
final int lo;
final int hi;
final double[] array;
final int blockSize;
double max;
CountedFindMax[] siblings;
CountedFindMax(final double[] array, final int blockSize, final int maxSiblings) {
this(null, array, 0, array.length, blockSize, maxSiblings);
}
CountedFindMax(final CountedCompleter<?> parent,
final double[] array, final int lo, final int hi,
final int blockSize, final int maxSiblings) {
super(parent);
this.array = array;
this.lo = lo;
this.hi = hi;
this.blockSize = blockSize;
this.siblings = new CountedFindMax[maxSiblings];
}
@Override
public void compute() {
final int l = lo;
int h = hi;
int j = 0;
while (h - l > blockSize) {
final int mid = (l + h + 1) / 2;
addToPendingCount(1);
siblings[j++] = (CountedFindMax) new CountedFindMax(this,
array, mid, h, blockSize,
siblings.length - 1).fork();
h = mid;
}
if (h > l) {
max = FastMath.exp(array[l]);
for (int i = l + 1; i < h; i++) {
final double exp = FastMath.exp(array[i]);
if (exp > max) {
max = exp;
}
}
}
tryComplete();
}
@Override
public void onCompletion(final CountedCompleter<?> caller) {
if (caller != this) {
for (final CountedFindMax sibling : siblings) {
if (sibling != null) { // this shouldn't be needed, but somehow I underfill siblings
if (sibling.max > this.max) {
this.max = sibling.max;
}
}
}
}
}
@Override
public Double getRawResult() {
return max;
}
public static double apply(final double[] array, final int pieces) {
final int blockSize = (array.length + pieces - 1) / pieces;
final int siblingCount = new Double(Math.floor(Math.log(1. / pieces) / Math.log(0.5))).intValue();
return new CountedFindMax(array, blockSize, siblingCount).invoke();
}
}
@Benchmark
public void simpleMax() {
maxValue = findMax(values);
}
@Benchmark
public void fjRecursiveMax() {
maxValue = RecursiveFindMax.apply(values, pieces);
}
@Benchmark
public void futuresMax() {
maxValue = findMaxFutures(values, pieces, pool);
}
@Benchmark
public void fjCountedMax() {
maxValue = CountedFindMax.apply(values, pieces);
}
public static void main(final String[] args) throws RunnerException {
final Options opts = new OptionsBuilder()
.include(".*" + FindMaxBenchmark.class.getSimpleName() + ".*")
.forks(1)
.build();
new Runner(opts).run();
}
}
Benchmark (arrayLen) (pieces) Mode Samples Score Score error Units
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 2 thrpt 5 5.827 0.357 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 4 thrpt 5 13.045 1.223 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 8 thrpt 5 21.936 6.818 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 32 thrpt 5 23.270 2.956 ops/s
o.j.b.FindMaxBenchmark.fjCountedMax 10000000 256 thrpt 5 24.338 1.101 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 2 thrpt 5 8.914 2.990 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 4 thrpt 5 13.291 4.091 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 8 thrpt 5 20.789 8.258 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 32 thrpt 5 20.440 2.159 ops/s
o.j.b.FindMaxBenchmark.fjRecursiveMax 10000000 256 thrpt 5 19.389 8.121 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 2 thrpt 5 9.878 0.899 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 4 thrpt 5 13.257 2.340 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 8 thrpt 5 19.084 2.034 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 32 thrpt 5 22.057 1.652 ops/s
o.j.b.FindMaxBenchmark.futuresMax 10000000 256 thrpt 5 22.638 4.262 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 2 thrpt 5 5.496 0.211 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 4 thrpt 5 5.136 0.438 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 8 thrpt 5 5.013 0.535 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 32 thrpt 5 5.058 1.601 ops/s
o.j.b.FindMaxBenchmark.simpleMax 10000000 256 thrpt 5 5.455 0.612 ops/s
@ya-pulser
Copy link

Меня сильно смущает твой left.compute. Можешь поправить первый вариант и прогнать? Должно начать чуть дольше вызывать, но, имхо, чуть больше шансов на steal. Плюс поменял константы ближе к 64k L1 кешей.

18c18
-< @param({"10000000"})
-> @param({"16777216"})
21c21
-< @param({"2", "4", "8", "32", "256"})
-> @param({"2", "4", "8", "32", "256", "65536", "131072"})
82,83c82,83
-< right.fork();
-< final double leftAns = left.compute();
-> invokeAll(left,rigth);
-> final double leftAns = left.join();

@shipilev
Copy link

2x2 i5-2520M, JDK 8 GA:

Benchmark                              (arrayLen)  (pieces)   Mode  Samples    Score  Score error  Units
o.o.FindMaxBenchmark.fjCountedMax        10000000         2  thrpt       15  132.858       29.619  ops/s
o.o.FindMaxBenchmark.fjCountedMax        10000000         4  thrpt       15  176.044        8.036  ops/s
o.o.FindMaxBenchmark.fjCountedMax        10000000         8  thrpt       15  174.449        8.615  ops/s
o.o.FindMaxBenchmark.fjCountedMax        10000000        32  thrpt       15  161.985       16.314  ops/s
o.o.FindMaxBenchmark.fjCountedMax        10000000       256  thrpt       15  176.822       10.759  ops/s
o.o.FindMaxBenchmark.fjRecursiveMax      10000000         2  thrpt       15  115.386        9.911  ops/s
o.o.FindMaxBenchmark.fjRecursiveMax      10000000         4  thrpt       15  155.198       19.247  ops/s
o.o.FindMaxBenchmark.fjRecursiveMax      10000000         8  thrpt       15  142.627       25.476  ops/s
o.o.FindMaxBenchmark.fjRecursiveMax      10000000        32  thrpt       15  171.528        3.865  ops/s
o.o.FindMaxBenchmark.fjRecursiveMax      10000000       256  thrpt       15  169.140        2.069  ops/s
o.o.FindMaxBenchmark.futuresMax          10000000         2  thrpt       15  130.649       10.201  ops/s
o.o.FindMaxBenchmark.futuresMax          10000000         4  thrpt       15  143.422        4.727  ops/s
o.o.FindMaxBenchmark.futuresMax          10000000         8  thrpt       15  143.518        7.510  ops/s
o.o.FindMaxBenchmark.futuresMax          10000000        32  thrpt       15  151.627       11.779  ops/s
o.o.FindMaxBenchmark.futuresMax          10000000       256  thrpt       15  138.823       13.500  ops/s
o.o.FindMaxBenchmark.simpleMax           10000000         2  thrpt       15   79.631        3.943  ops/s
o.o.FindMaxBenchmark.simpleMax           10000000         4  thrpt       15   79.769        2.952  ops/s
o.o.FindMaxBenchmark.simpleMax           10000000         8  thrpt       15   78.356        2.192  ops/s
o.o.FindMaxBenchmark.simpleMax           10000000        32  thrpt       15   79.745        4.774  ops/s
o.o.FindMaxBenchmark.simpleMax           10000000       256  thrpt       15   76.996        2.722  ops/s

Ну вот, почти в два раза быстрее, без всяких изменений. Во всех случаях мы упёрлись именно в вычисление максимума, паразитных проблем в профиле нет. Цикл развернулся на разное количество итераций кое-где.

Частоту проца-то не забыл зафиксировать? А то ведь simpleMax отлично с turboboost-ами дружит.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment