Skip to content

Instantly share code, notes, and snippets.

@overheadhunter
Created July 15, 2021 10:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save overheadhunter/86e7baae7dfe47c49ff364590a4f3ea6 to your computer and use it in GitHub Desktop.
Save overheadhunter/86e7baae7dfe47c49ff364590a4f3ea6 to your computer and use it in GitHub Desktop.
Benchmark of different strlen() variation
import jdk.incubator.foreign.MemoryAccess;
import jdk.incubator.foreign.MemorySegment;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import java.io.IOException;
import java.nio.ByteOrder;
import java.util.concurrent.TimeUnit;
public class BenchmarkTest {
public static void main(String[] args) throws IOException {
assert strlenBase(segment, 0) == 239;
assert strlenUnroll4(segment, 0) == 239;
assert strlenUnroll8(segment, 0) == 239;
assert strlenUnroll4b(segment, 0) == 239;
org.openjdk.jmh.Main.main(args);
}
private static final MemorySegment segment = MemorySegment.ofArray(new byte[]{
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00, // 240th byte -> strlen 239
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
0x10, 0x11, 0x12
});
@Benchmark
@Warmup(iterations = 2)
@Fork(value = 1)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@BenchmarkMode(Mode.AverageTime)
public void benchmarkStrlenBase(Blackhole blackhole) {
int n = strlenBase(segment, 0);
assert n == 239;
blackhole.consume(n);
}
private static int strlenBase(MemorySegment segment, long start) {
for (int offset = 0; offset >= 0; offset++) {
byte curr = MemoryAccess.getByteAtOffset(segment, start + offset);
if (curr == 0) {
return offset;
}
}
throw new IllegalArgumentException("String too large");
}
@Benchmark
@Warmup(iterations = 2)
@Fork(value = 1)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@BenchmarkMode(Mode.AverageTime)
public void benchmarkStrlenUnroll4(Blackhole blackhole) {
int n = strlenUnroll4(segment, 0);
assert n == 239;
blackhole.consume(n);
}
@Benchmark
@Warmup(iterations = 2)
@Fork(value = 1)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@BenchmarkMode(Mode.AverageTime)
public void benchmarkStrlenUnroll4b(Blackhole blackhole) {
int n = strlenUnroll4b(segment, 0);
assert n == 239;
blackhole.consume(n);
}
private static int strlenUnroll4(MemorySegment segment, long start) {
int offset;
for (offset = 0; offset < segment.byteSize()-3; offset+=4) {
byte b0 = MemoryAccess.getByteAtOffset(segment, start + offset + 0);
byte b1 = MemoryAccess.getByteAtOffset(segment, start + offset + 1);
byte b2 = MemoryAccess.getByteAtOffset(segment, start + offset + 2);
byte b3 = MemoryAccess.getByteAtOffset(segment, start + offset + 3);
if (b0 == 0 || b1 == 0 || b2 == 0 || b3 == 0) {
if (b0 == 0) {
return offset;
} else if (b1 == 0) {
return offset + 1;
} else if (b2 == 0) {
return offset + 2;
} else if (b3 == 0) {
return offset + 3;
}
}
}
while (offset < segment.byteSize()) {
byte b = MemoryAccess.getByteAtOffset(segment, start + offset);
if (b == 0) {
return offset;
}
}
throw new IllegalArgumentException("String too large");
}
private static int strlenUnroll4b(MemorySegment segment, long start) {
int offset;
for (offset = 0; offset < segment.byteSize()-3; offset+=4) {
int i = MemoryAccess.getIntAtOffset(segment, start + offset, ByteOrder.BIG_ENDIAN);
byte b0 = (byte) (i >> 24 & 0xFF);
byte b1 = (byte) (i >> 16 & 0xFF);
byte b2 = (byte) (i >> 8 & 0xFF);
byte b3 = (byte) (i >> 0 & 0xFF);
if (b0 == 0 || b1 == 0 || b2 == 0 || b3 == 0) {
if (b0 == 0) {
return offset;
} else if (b1 == 0) {
return offset + 1;
} else if (b2 == 0) {
return offset + 2;
} else if (b3 == 0) {
return offset + 3;
}
}
}
while (offset < segment.byteSize()) {
byte b = MemoryAccess.getByteAtOffset(segment, start + offset);
if (b == 0) {
return offset;
}
}
throw new IllegalArgumentException("String too large");
}
@Benchmark
@Warmup(iterations = 2)
@Fork(value = 1)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@BenchmarkMode(Mode.AverageTime)
public void benchmarkStrlenUnroll8(Blackhole blackhole) {
int n = strlenUnroll8(segment, 0);
assert n == 239;
blackhole.consume(n);
}
private static int strlenUnroll8(MemorySegment segment, long start) {
int offset;
for (offset = 0; offset < segment.byteSize()-7; offset+=8) {
byte b0 = MemoryAccess.getByteAtOffset(segment, start + offset + 0);
byte b1 = MemoryAccess.getByteAtOffset(segment, start + offset + 1);
byte b2 = MemoryAccess.getByteAtOffset(segment, start + offset + 2);
byte b3 = MemoryAccess.getByteAtOffset(segment, start + offset + 3);
byte b4 = MemoryAccess.getByteAtOffset(segment, start + offset + 4);
byte b5 = MemoryAccess.getByteAtOffset(segment, start + offset + 5);
byte b6 = MemoryAccess.getByteAtOffset(segment, start + offset + 6);
byte b7 = MemoryAccess.getByteAtOffset(segment, start + offset + 7);
if (b0 == 0 || b1 == 0 || b2 == 0 || b3 == 0 || b4 == 0 || b5 == 0 || b6 == 0 || b7 == 0) {
if (b0 == 0) {
return offset;
} else if (b1 == 0) {
return offset + 1;
} else if (b2 == 0) {
return offset + 2;
} else if (b3 == 0) {
return offset + 3;
} else if (b4 == 0) {
return offset + 4;
} else if (b5 == 0) {
return offset + 5;
} else if (b6 == 0) {
return offset + 6;
} else if (b7 == 0) {
return offset + 7;
}
}
}
while (offset < segment.byteSize()) {
byte b = MemoryAccess.getByteAtOffset(segment, start + offset);
if (b == 0) {
return offset;
}
}
throw new IllegalArgumentException("String too large");
}
}
@overheadhunter
Copy link
Author

overheadhunter commented Jul 15, 2021

Results using JDK on foreign-jextract branch on commit 8e31e1b6784:

Benchmark                              Mode  Cnt  Score   Error  Units
BenchmarkTest.benchmarkStrlenBase      avgt    5  0,077 ± 0,009  us/op
BenchmarkTest.benchmarkStrlenUnroll4   avgt    5  0,079 ± 0,011  us/op
BenchmarkTest.benchmarkStrlenUnroll4b  avgt    5  0,102 ± 0,009  us/op
BenchmarkTest.benchmarkStrlenUnroll8   avgt    5  0,243 ± 0,021  us/op

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment