Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
package jmh;
import jdk.incubator.foreign.CLinker;
import jdk.incubator.foreign.MemoryAccess;
import jdk.incubator.foreign.MemoryAddress;
import jdk.incubator.foreign.MemorySegment;
import jdk.incubator.foreign.ResourceScope;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.VectorSpecies;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(org.openjdk.jmh.annotations.Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 1, jvmArgsAppend = {
"--add-modules=jdk.incubator.foreign,jdk.incubator.vector",
"-Dforeign.restricted=permit",
"--enable-native-access", "ALL-UNNAMED"})
public class TestLoadStoreBytes {
private static final VectorSpecies<Byte> SPECIES = VectorSpecies.ofPreferred(byte.class);
@Param("1024")
private int size;
private byte[] srcArray;
private byte[] dstArray;
private ByteBuffer srcBufferHeap;
private ByteBuffer dstBufferHeap;
private ByteBuffer srcBufferNative;
private ByteBuffer dstBufferNative;
private ResourceScope implicitScope;
private MemorySegment srcSegmentImplicit;
private MemorySegment dstSegmentImplicit;
private ByteBuffer srcBufferSegmentImplicit;
private ByteBuffer dstBufferSegmentImplicit;
private MemoryAddress srcAddress;
private MemoryAddress dstAddress;
@Setup
public void setup() {
srcArray = new byte[size];
dstArray = srcArray.clone();
for (int i = 0; i < srcArray.length; i++) {
srcArray[i] = (byte) i;
}
srcBufferHeap = ByteBuffer.allocate(size);
dstBufferHeap = ByteBuffer.allocate(size);
srcBufferNative = ByteBuffer.allocateDirect(size);
dstBufferNative = ByteBuffer.allocateDirect(size);
implicitScope = ResourceScope.newImplicitScope();
srcSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
srcBufferSegmentImplicit = srcSegmentImplicit.asByteBuffer();
dstSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
dstBufferSegmentImplicit = dstSegmentImplicit.asByteBuffer();
srcAddress = CLinker.allocateMemory(size);
dstAddress = CLinker.allocateMemory(size);
}
@Benchmark
public void array() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
var v = ByteVector.fromArray(SPECIES, srcArray, i);
v.intoArray(dstArray, i);
}
}
@Benchmark
public void arrayScalar() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i ++) {
var v = srcArray[i];
dstArray[i] = v;
}
}
@Benchmark
public void bufferHeap() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferHeap, i, ByteOrder.nativeOrder());
v.intoByteBuffer(dstBufferHeap, i, ByteOrder.nativeOrder());
}
}
@Benchmark
public void bufferHeapScalar() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
var v = srcBufferHeap.get(i);
dstBufferHeap.put(i, v);
}
}
@Benchmark
public void bufferNative() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder());
v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder());
}
}
@Benchmark
public void bufferNativeScalar() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
var v = srcBufferNative.get(i);
dstBufferNative.put(i, v);
}
}
@Benchmark
public void bufferSegmentImplicit() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentImplicit, i, ByteOrder.nativeOrder());
v.intoByteBuffer(dstBufferSegmentImplicit, i, ByteOrder.nativeOrder());
}
}
@Benchmark
public void segmentImplicitScalar() {
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
var v = MemoryAccess.getByteAtOffset(srcSegmentImplicit, i);
MemoryAccess.setByteAtOffset(dstSegmentImplicit, i, v);
}
}
@Benchmark
public void bufferSegmentConfined() {
try (final var scope = ResourceScope.newConfinedScope()) {
final var srcBufferSegmentConfined = srcAddress.asSegment(size, scope).asByteBuffer();
final var dstBufferSegmentConfined = dstAddress.asSegment(size, scope).asByteBuffer();
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentConfined, i, ByteOrder.nativeOrder());
v.intoByteBuffer(dstBufferSegmentConfined, i, ByteOrder.nativeOrder());
}
}
}
}
/*
git@github.com:openjdk/jdk.git
cd20c01942dd8559a31e51ef2a595c6eba44b8ad refs/remotes/origin/HEAD
# VM options: -XX:-TieredCompilation -XX:ObjectAlignmentInBytes=32 --add-modules=jdk.incubator.foreign,jdk.incubator.vector -Dforeign.restricted=permit --enable-native-access ALL-UNNAMED
Benchmark (size) Mode Cnt Score Error Units
TestLoadStoreBytes.array 1024 avgt 10 13.767 ± 0.215 ns/op
TestLoadStoreBytes.array 16384 avgt 10 279.825 ± 5.764 ns/op
TestLoadStoreBytes.arrayScalar 1024 avgt 10 23.255 ± 0.684 ns/op
TestLoadStoreBytes.arrayScalar 16384 avgt 10 195.704 ± 6.330 ns/op
TestLoadStoreBytes.bufferHeap 1024 avgt 10 78.539 ± 0.431 ns/op
TestLoadStoreBytes.bufferHeap 16384 avgt 10 1303.619 ± 31.830 ns/op
TestLoadStoreBytes.bufferHeapScalar 1024 avgt 10 268.601 ± 8.998 ns/op
TestLoadStoreBytes.bufferHeapScalar 16384 avgt 10 4217.291 ± 28.522 ns/op
TestLoadStoreBytes.bufferNative 1024 avgt 10 73.032 ± 0.611 ns/op
TestLoadStoreBytes.bufferNative 16384 avgt 10 1282.294 ± 13.322 ns/op
TestLoadStoreBytes.bufferNativeScalar 1024 avgt 10 257.354 ± 13.796 ns/op
TestLoadStoreBytes.bufferNativeScalar 16384 avgt 10 4064.541 ± 53.939 ns/op
TestLoadStoreBytes.bufferSegmentConfined 1024 avgt 10 439.680 ± 8.065 ns/op
TestLoadStoreBytes.bufferSegmentConfined 16384 avgt 10 1388.378 ± 16.244 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 1024 avgt 10 77.520 ± 0.753 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 16384 avgt 10 1266.489 ± 11.080 ns/op
TestLoadStoreBytes.segmentImplicitScalar 1024 avgt 10 749.856 ± 8.769 ns/op
TestLoadStoreBytes.segmentImplicitScalar 16384 avgt 10 11846.255 ± 130.863 ns/op
# VM options: -XX:+TieredCompilation -XX:ObjectAlignmentInBytes=32 --add-modules=jdk.incubator.foreign,jdk.incubator.vector -Dforeign.restricted=permit --enable-native-access ALL-UNNAMED
Benchmark (size) Mode Cnt Score Error Units
TestLoadStoreBytes.array 1024 avgt 10 13.728 ± 0.818 ns/op
TestLoadStoreBytes.array 16384 avgt 10 260.386 ± 11.354 ns/op
TestLoadStoreBytes.arrayScalar 1024 avgt 10 23.388 ± 0.703 ns/op
TestLoadStoreBytes.arrayScalar 16384 avgt 10 190.860 ± 11.174 ns/op
TestLoadStoreBytes.bufferHeap 1024 avgt 10 71.069 ± 0.883 ns/op
TestLoadStoreBytes.bufferHeap 16384 avgt 10 1282.913 ± 11.387 ns/op
TestLoadStoreBytes.bufferHeapScalar 1024 avgt 10 266.658 ± 6.184 ns/op
TestLoadStoreBytes.bufferHeapScalar 16384 avgt 10 4165.673 ± 68.071 ns/op
TestLoadStoreBytes.bufferNative 1024 avgt 10 70.646 ± 1.210 ns/op
TestLoadStoreBytes.bufferNative 16384 avgt 10 1239.830 ± 12.737 ns/op
TestLoadStoreBytes.bufferNativeScalar 1024 avgt 10 251.330 ± 2.522 ns/op
TestLoadStoreBytes.bufferNativeScalar 16384 avgt 10 4042.816 ± 50.017 ns/op
TestLoadStoreBytes.bufferSegmentConfined 1024 avgt 10 50.773 ± 0.413 ns/op
TestLoadStoreBytes.bufferSegmentConfined 16384 avgt 10 535.652 ± 3.546 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 1024 avgt 10 70.323 ± 0.748 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 16384 avgt 10 1265.497 ± 9.701 ns/op
TestLoadStoreBytes.segmentImplicitScalar 1024 avgt 10 747.202 ± 9.150 ns/op
TestLoadStoreBytes.segmentImplicitScalar 16384 avgt 10 11903.696 ± 353.461 ns/op
----
With patch applied:
diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp
index c36b2a22caf..bb1cc6f723e 100644
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@@ -827,7 +827,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
if (can_access_non_heap) {
- insert_mem_bar(Op_MemBarCPUOrder);
+// insert_mem_bar(Op_MemBarCPUOrder);
}
if (is_store) {
@@ -874,7 +874,7 @@ bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
old_map->destruct(&_gvn);
if (can_access_non_heap) {
- insert_mem_bar(Op_MemBarCPUOrder);
+// insert_mem_bar(Op_MemBarCPUOrder);
}
C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
# VM options: -XX:-TieredCompilation -XX:ObjectAlignmentInBytes=32 --add-modules=jdk.incubator.foreign,jdk.incubator.vector -Dforeign.restricted=permit --enable-native-access ALL-UNNAMED
Benchmark (size) Mode Cnt Score Error Units
TestLoadStoreBytes.array 1024 avgt 10 13.713 ± 0.135 ns/op
TestLoadStoreBytes.array 16384 avgt 10 283.524 ± 4.238 ns/op
TestLoadStoreBytes.arrayScalar 1024 avgt 10 23.123 ± 0.283 ns/op
TestLoadStoreBytes.arrayScalar 16384 avgt 10 200.712 ± 8.645 ns/op
TestLoadStoreBytes.bufferHeap 1024 avgt 10 25.664 ± 0.316 ns/op
TestLoadStoreBytes.bufferHeap 16384 avgt 10 385.964 ± 12.450 ns/op
TestLoadStoreBytes.bufferHeapScalar 1024 avgt 10 265.677 ± 3.302 ns/op
TestLoadStoreBytes.bufferHeapScalar 16384 avgt 10 4163.773 ± 72.358 ns/op
TestLoadStoreBytes.bufferNative 1024 avgt 10 25.556 ± 0.390 ns/op
TestLoadStoreBytes.bufferNative 16384 avgt 10 349.823 ± 4.298 ns/op
TestLoadStoreBytes.bufferNativeScalar 1024 avgt 10 252.061 ± 3.130 ns/op
TestLoadStoreBytes.bufferNativeScalar 16384 avgt 10 4062.956 ± 70.301 ns/op
TestLoadStoreBytes.bufferSegmentConfined 1024 avgt 10 63.998 ± 0.425 ns/op
TestLoadStoreBytes.bufferSegmentConfined 16384 avgt 10 806.393 ± 10.080 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 1024 avgt 10 25.539 ± 0.268 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 16384 avgt 10 349.869 ± 4.381 ns/op
TestLoadStoreBytes.segmentImplicitScalar 1024 avgt 10 754.664 ± 21.680 ns/op
TestLoadStoreBytes.segmentImplicitScalar 16384 avgt 10 11887.237 ± 189.345 ns/op
# VM options: -XX:+TieredCompilation -XX:ObjectAlignmentInBytes=32 --add-modules=jdk.incubator.foreign,jdk.incubator.vector -Dforeign.restricted=permit --enable-native-access ALL-UNNAMED
Benchmark (size) Mode Cnt Score Error Units
TestLoadStoreBytes.array 1024 avgt 10 14.034 ± 0.176 ns/op
TestLoadStoreBytes.array 16384 avgt 10 252.624 ± 10.333 ns/op
TestLoadStoreBytes.arrayAdd 1024 avgt 10 15.911 ± 0.523 ns/op
TestLoadStoreBytes.arrayAdd 16384 avgt 10 271.969 ± 5.597 ns/op
TestLoadStoreBytes.arrayScalar 1024 avgt 10 23.344 ± 0.535 ns/op
TestLoadStoreBytes.arrayScalar 16384 avgt 10 195.957 ± 10.636 ns/op
TestLoadStoreBytes.bufferHeap 1024 avgt 10 25.644 ± 0.281 ns/op
TestLoadStoreBytes.bufferHeap 16384 avgt 10 388.989 ± 4.021 ns/op
TestLoadStoreBytes.bufferHeapScalar 1024 avgt 10 266.135 ± 3.611 ns/op
TestLoadStoreBytes.bufferHeapScalar 16384 avgt 10 4158.670 ± 63.424 ns/op
TestLoadStoreBytes.bufferNative 1024 avgt 10 25.678 ± 0.332 ns/op
TestLoadStoreBytes.bufferNative 16384 avgt 10 349.329 ± 3.850 ns/op
TestLoadStoreBytes.bufferNativeScalar 1024 avgt 10 251.668 ± 3.311 ns/op
TestLoadStoreBytes.bufferNativeScalar 16384 avgt 10 4045.838 ± 129.280 ns/op
TestLoadStoreBytes.bufferSegmentConfined 1024 avgt 10 34.934 ± 0.257 ns/op
TestLoadStoreBytes.bufferSegmentConfined 16384 avgt 10 286.946 ± 6.120 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 1024 avgt 10 25.681 ± 0.501 ns/op
TestLoadStoreBytes.bufferSegmentImplicit 16384 avgt 10 351.099 ± 10.846 ns/op
TestLoadStoreBytes.segmentImplicitScalar 1024 avgt 10 750.972 ± 9.326 ns/op
TestLoadStoreBytes.segmentImplicitScalar 16384 avgt 10 11864.784 ± 135.626 ns/op
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment