Skip to content

Instantly share code, notes, and snippets.

@anuraaga
Last active November 28, 2017 17:03
Show Gist options
  • Save anuraaga/f94119780438bded4209f453a8f85169 to your computer and use it in GitHub Desktop.
Save anuraaga/f94119780438bded4209f453a8f85169 to your computer and use it in GitHub Desktop.
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
package com.google.protobuf;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.protobuf.benchmarks.BenchmarkProto.StringsMessage;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.infra.Blackhole;
import sun.nio.ch.DirectBuffer;
public class Utf8DecodeBenchmark {
public enum StringValue {
ENGLISH(Strings.repeat("abcde", 10)),
JAPANESE(Strings.repeat("\u3042\u3044\u3046\u3048\u304a", 10)),
MIXED(Strings.repeat("\u3042\u3044\u3046\u3048\u304aabcde", 10)),
HEBREW(Strings.repeat("\u05d3\u05d2\u05e1\u05e7\u05e8", 10)),
CHINESE(Strings.repeat("\uD841\uDF0E\uD841\uDF31\uD841\uDF10", 10)),
;
private final String value;
StringValue(String value) {
this.value = value;
}
}
public enum ByteBufferType {
DIRECT,
HEAP,
;
}
@State(Scope.Thread)
public static class DirectByteBufferBenchmark {
@Param
public StringValue value = StringValue.JAPANESE;
private int size;
private ByteBuffer byteBuffer;
// Initialize fresh buffers every invocation to prevent influence from CPU cache.
@Setup(Level.Invocation)
public void initializeBuffers() {
byte[] bytes = value.value.getBytes(Internal.UTF_8);
size = bytes.length;
byteBuffer = ByteBuffer.allocateDirect(size);
byteBuffer.put(bytes);
byteBuffer.flip();
}
@TearDown(Level.Invocation)
public void releaseBuffers() {
release(byteBuffer);
}
@Benchmark
public String copyToHeapFirst() throws InvalidProtocolBufferException {
byte[] bytes = new byte[size];
long pos = UnsafeUtil.addressOffset(byteBuffer);
UnsafeUtil.copyMemory(pos, bytes, 0, size);
if (!Utf8.isValidUtf8(bytes)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
return new String(bytes, Internal.UTF_8);
}
}
@State(Scope.Thread)
public static class ByteBufferBenchmark {
@Param
public StringValue value = StringValue.JAPANESE;
@Param
public ByteBufferType byteBufferType = ByteBufferType.DIRECT;
private int size;
private ByteBuffer byteBuffer;
// Initialize fresh buffers every invocation to prevent influence from CPU cache.
@Setup(Level.Invocation)
public void initializeBuffers() {
byte[] bytes = value.value.getBytes(Internal.UTF_8);
size = bytes.length;
byteBuffer = byteBufferType == ByteBufferType.DIRECT
? ByteBuffer.allocateDirect(size) : ByteBuffer.allocate(size);
byteBuffer.put(bytes);
byteBuffer.flip();
}
@TearDown(Level.Invocation)
public void releaseBuffers() {
release(byteBuffer);
}
@Benchmark
public String decodeDirectly() throws InvalidProtocolBufferException {
return Utf8.decodeUtf8(byteBuffer, 0, size);
}
}
@State(Scope.Thread)
public static class ArrayBenchmark {
@Param
public StringValue value = StringValue.JAPANESE;
private int size;
private byte[] arr;
// Initialize fresh buffers every invocation to prevent influence from CPU cache.
@Setup(Level.Invocation)
public void initializeBuffers() {
arr = value.value.getBytes(Internal.UTF_8);
size = arr.length;
}
@Benchmark
public String array_jdk() throws InvalidProtocolBufferException {
if (!Utf8.isValidUtf8(arr)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
return new String(arr, Internal.UTF_8);
}
@Benchmark
public String array_custom() throws InvalidProtocolBufferException {
return Utf8.decodeUtf8(arr, 0, size);
}
}
@State(Scope.Thread)
public static class AllTypesArrayBenchmark {
private int size;
private byte[] arrEnglish;
private byte[] arrJapanese;
private byte[] arrMixed;
private byte[] arrHebrew;
private byte[] arrChinese;
// Initialize fresh buffers every invocation to prevent influence from CPU cache.
@Setup(Level.Invocation)
public void initializeBuffers() {
arrEnglish = StringValue.ENGLISH.value.getBytes(Internal.UTF_8);
arrJapanese = StringValue.JAPANESE.value.getBytes(Internal.UTF_8);
arrMixed = StringValue.MIXED.value.getBytes(Internal.UTF_8);
arrHebrew = StringValue.HEBREW.value.getBytes(Internal.UTF_8);
arrChinese = StringValue.CHINESE.value.getBytes(Internal.UTF_8);
}
@Benchmark
public void array_jdk(Blackhole bh) throws InvalidProtocolBufferException {
if (!Utf8.isValidUtf8(arrEnglish)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
bh.consume(new String(arrEnglish, Internal.UTF_8));
if (!Utf8.isValidUtf8(arrJapanese)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
bh.consume(new String(arrJapanese, Internal.UTF_8));
if (!Utf8.isValidUtf8(arrMixed)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
bh.consume(new String(arrMixed, Internal.UTF_8));
if (!Utf8.isValidUtf8(arrHebrew)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
bh.consume(new String(arrHebrew, Internal.UTF_8));
if (!Utf8.isValidUtf8(arrChinese)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
bh.consume(new String(arrChinese, Internal.UTF_8));
}
@Benchmark
public void array_custom(Blackhole bh) throws InvalidProtocolBufferException {
bh.consume(Utf8.decodeUtf8(arrEnglish, 0, arrEnglish.length));
bh.consume(Utf8.decodeUtf8(arrJapanese, 0, arrJapanese.length));
bh.consume(Utf8.decodeUtf8(arrMixed, 0, arrMixed.length));
bh.consume(Utf8.decodeUtf8(arrHebrew, 0, arrHebrew.length));
bh.consume(Utf8.decodeUtf8(arrChinese, 0, arrChinese.length));
}
}
@State(Scope.Thread)
public static class StringsMessageBenchmark {
@Param
public StringValue value = StringValue.JAPANESE;
@Param
public ByteBufferType byteBufferType = ByteBufferType.DIRECT;
private byte[] arr;
private int size;
private ByteBuffer byteBuffer;
// Initialize fresh buffers every invocation to prevent influence from CPU cache.
@Setup(Level.Invocation)
public void initializeBuffers() {
ImmutableList.Builder<String> manyStrings = ImmutableList.builder();
for (int i = 0; i < 30; i++) {
manyStrings.add(value.value);
}
StringsMessage message = StringsMessage.newBuilder().addAllValue(manyStrings.build()).build();
arr = message.toByteArray();
size = arr.length;
byteBuffer = byteBufferType == ByteBufferType.DIRECT
? ByteBuffer.allocateDirect(size) : ByteBuffer.allocate(size);
byteBuffer.put(arr);
byteBuffer.flip();
}
@TearDown
public void releaseBuffers() {
release(byteBuffer);
}
@Benchmark
public StringsMessage array() throws IOException {
return StringsMessage.parseFrom(CodedInputStream.newInstance(arr));
}
@Benchmark
public StringsMessage byteBuffer() throws IOException {
return StringsMessage.parseFrom(CodedInputStream.newInstance(byteBuffer));
}
@Benchmark
@Fork(jvmArgsAppend = "-Dcom.google.protobuf.enableCustomUtf8Decode=false")
public StringsMessage byteBuffer_old() throws IOException {
return StringsMessage.parseFrom(CodedInputStream.newInstance(byteBuffer));
}
@Benchmark
public StringsMessage byteBuffer_copyFirst() throws IOException {
byte[] bytes = new byte[size];
byteBuffer.get(bytes);
return StringsMessage.parseFrom(CodedInputStream.newInstance(bytes));
}
}
// Direct buffers take a long time to deallocate without any pooling. We release directly after
// invocations to reduce noise during invocations due to slow GC of direct buffers.
static void release(ByteBuffer buffer) {
if (!buffer.isDirect()) {
return;
}
((DirectBuffer) buffer).cleaner().clean();
}
public static void main(String[] args) throws Exception {
ByteBufferBenchmark benchmark = new ByteBufferBenchmark();
benchmark.initializeBuffers();
System.out.println(benchmark.decodeDirectly());
ArrayBenchmark arrayBenchmark = new ArrayBenchmark();
arrayBenchmark.initializeBuffers();
System.out.println(arrayBenchmark.array_jdk());
System.out.println(arrayBenchmark.array_custom());
StringsMessageBenchmark msgBenchmark = new StringsMessageBenchmark();
msgBenchmark.initializeBuffers();
System.out.println(msgBenchmark.array());
System.out.println(msgBenchmark.byteBuffer());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment