Skip to content

Instantly share code, notes, and snippets.

@franz1981
Created January 12, 2024 15:54
Show Gist options
  • Save franz1981/9ddbc54e6cad02877f79f92951a4fb79 to your computer and use it in GitHub Desktop.
Save franz1981/9ddbc54e6cad02877f79f92951a4fb79 to your computer and use it in GitHub Desktop.
package red.hat.puzzles.http;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.CompilerControl;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.net.Inet4Address;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.concurrent.TimeUnit;
@State(Scope.Benchmark)
@Fork( value = 2)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Measurement(iterations = 10, time = 100, timeUnit = TimeUnit.MILLISECONDS)
@Warmup(iterations = 10, time = 1)
public class Inet4Benchmark {
private int[][] ips;
private int next;
@Setup
public void init() {
ips = new int[4][];
ips[0] = new int[]{127, 0, 12, 255};
ips[1] = new int[]{255, 255, 255, 255};
ips[2] = new int[]{0, 0, 0, 0};
ips[3] = new int[]{127, 0, 0, 1};
}
private int[] nextIp() {
var ips = this.ips;
int[] ip = ips[next];
next++;
if (next >= ips.length) {
next = 0;
}
return ip;
}
@Benchmark
public Inet4Address getInet4AddressIndify() {
var ips = nextIp();
return getInet4AddressIndify(ips[0], ips[1], ips[2], ips[3]);
}
@Benchmark
public Inet4Address getInet4Address() {
var ips = nextIp();
return getInet4Address(ips[0], ips[1], ips[2], ips[3]);
}
@Benchmark
public Inet4Address getInet4AddressStringBuilder() {
var ips = nextIp();
return getInet4AddressStringBuilder(ips[0], ips[1], ips[2], ips[3]);
}
public static Inet4Address getInet4AddressIndify(int s1, int s2, int s3, int s4) {
byte[] bytes = new byte[4];
bytes[0] = (byte) s1;
bytes[1] = (byte) s2;
bytes[2] = (byte) s3;
bytes[3] = (byte) s4;
try {
return (Inet4Address) InetAddress.getByAddress(s1 + "." + s2 + "." + s3 + "." + s4, bytes);
} catch (UnknownHostException e) {
// not possible
throw new IllegalStateException(e);
}
}
public static Inet4Address getInet4AddressStringBuilder(int s1, int s2, int s3, int s4) {
byte[] bytes = new byte[4];
bytes[0] = (byte) s1;
bytes[1] = (byte) s2;
bytes[2] = (byte) s3;
bytes[3] = (byte) s4;
int digitsForS1 = s1 < 10 ? 1 : s1 < 100 ? 2 : 3;
int digitsForS2 = s2 < 10 ? 1 : s2 < 100 ? 2 : 3;
int digitsForS3 = s3 < 10 ? 1 : s3 < 100 ? 2 : 3;
int digitsForS4 = s4 < 10 ? 1 : s4 < 100 ? 2 : 3;
StringBuilder hostName = new StringBuilder(3 + digitsForS1 + digitsForS2 + digitsForS3 + digitsForS4);
hostName.append(s1).append('.').append(s2).append('.').append(s3).append('.').append(s4);
try {
return (Inet4Address) InetAddress.getByAddress(hostName.toString(), bytes);
} catch (UnknownHostException e) {
// not possible
throw new IllegalStateException(e);
}
}
public static Inet4Address getInet4Address(int s1, int s2, int s3, int s4) {
byte[] bytes = new byte[4];
bytes[0] = (byte) s1;
bytes[1] = (byte) s2;
bytes[2] = (byte) s3;
bytes[3] = (byte) s4;
// pre-compute the digits required
int digitsForS1 = s1 < 10 ? 1 : s1 < 100 ? 2 : 3;
int digitsForS2 = s2 < 10 ? 1 : s2 < 100 ? 2 : 3;
int digitsForS3 = s3 < 10 ? 1 : s3 < 100 ? 2 : 3;
int digitsForS4 = s4 < 10 ? 1 : s4 < 100 ? 2 : 3;
byte[] hostBytes = new byte[3 + digitsForS1 + digitsForS2 + digitsForS3 + digitsForS4];
// use encodeUnsignedByte to encode s1,s2,s3,s4 into hostBytes
encodeUnsignedByte(s1, hostBytes, 0, digitsForS1);
hostBytes[digitsForS1] = '.';
encodeUnsignedByte(s2, hostBytes, digitsForS1 + 1, digitsForS2);
hostBytes[digitsForS1 + digitsForS2 + 1] = '.';
encodeUnsignedByte(s3, hostBytes, digitsForS1 + digitsForS2 + 2, digitsForS3);
hostBytes[digitsForS1 + digitsForS2 + digitsForS3 + 2] = '.';
encodeUnsignedByte(s4, hostBytes, digitsForS1 + digitsForS2 + digitsForS3 + 3, digitsForS4);
String hostName = new String(hostBytes, 0);
try {
return (Inet4Address) InetAddress.getByAddress(hostName, bytes);
} catch (UnknownHostException e) {
// not possible
throw new IllegalStateException(e);
}
}
private static void encodeUnsignedByte(int value, byte[] bytes, int offset, int digits) {
assert value >= 0 && value <= 255;
// this is using an ancient math trick to divide by 10 without using division
// because 205/2048 is very close to 1/10
if (digits == 3) {
bytes[offset + 2] = (byte) ('0' + (value % 10));
value /= 10;
}
if (digits == 2) {
bytes[offset + 1] = (byte) ('0' + (value % 10));
value /= 10;
}
bytes[offset] = (byte) ('0' + (value % 10));
}
}
@franz1981
Copy link
Author

Why the getInet4Address version is slower?
That's why:

  1. the indify version is both not copying the byte[] into the String (more on this later) and the byte[] allocated is not initialized
  2. because the copy byte[] -> String doesn't happen, the stub generated for getInet4Address doesn't get generated nor called (everything is inlined!)

Additionally, the stub looks like

....[Hottest Region 1]..............................................................................
runtime stub, StubRoutines::jlong_disjoint_arraycopy_avx3 (14 bytes) 

            0x00007fc1efe49be1:   mov    %rsp,%rbp
            0x00007fc1efe49be4:   movabs $0x0,%rcx
            0x00007fc1efe49bee:   mov    %rdx,%r8
            0x00007fc1efe49bf1:   cmp    $0x0,%r8
            0x00007fc1efe49bf5:   jle    0x00007fc1efe4a162
            0x00007fc1efe49bfb:   cmp    $0x4,%r8
         ╭  0x00007fc1efe49bff:   jg     0x00007fc1efe49c28
         │  0x00007fc1efe49c01:   movabs $0xffffffffffffffff,%rax
         │  0x00007fc1efe49c0b:   bzhi   %r8,%rax,%rax
         │  0x00007fc1efe49c10:   kmovq  %rax,%k2
  0.12%  │  0x00007fc1efe49c15:   vmovdqu64 (%rdi,%rcx,8),%ymm1{%k2}{z}
 42.59%  │  0x00007fc1efe49c1c:   vmovdqu64 %ymm1,(%rsi,%rcx,8){%k2}
  0.23%  │  0x00007fc1efe49c23:   jmp    0x00007fc1efe4a162
         ↘  0x00007fc1efe49c28:   cmp    $0x8,%r8
            0x00007fc1efe49c2c:   jg     0x00007fc1efe49c65
            0x00007fc1efe49c2e:   vmovdqu (%rdi,%rcx,8),%ymm1
            0x00007fc1efe49c33:   vmovdqu %ymm1,(%rsi,%rcx,8)
            0x00007fc1efe49c38:   sub    $0x4,%r8
            0x00007fc1efe49c3c:   movabs $0xffffffffffffffff,%rax
            0x00007fc1efe49c46:   bzhi   %r8,%rax,%rax
            0x00007fc1efe49c4b:   kmovq  %rax,%k2
            0x00007fc1efe49c50:   vmovdqu64 0x20(%rdi,%rcx,8),%ymm1{%k2}{z}
            0x00007fc1efe49c58:   vmovdqu64 %ymm1,0x20(%rsi,%rcx,8){%k2}
....................................................................................................
 42.94%  <total for region 1>

which sounds weird given that the data to be copied never exceed 12 + 3 = 15 bytes.

If we forcibly set

        ips = new int[4][];
        ips[0] = new int[]{0, 0, 0, 0};
        ips[1] = new int[]{0, 0, 0, 0};
        ips[2] = new int[]{0, 0, 0, 0};
        ips[3] = new int[]{0, 0, 0, 0};

which lower the number of bytes to 7, than the performance of the indify version and the rew version will be the same, given that no stub is used

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment