Skip to content

Instantly share code, notes, and snippets.

@apangin
Created January 14, 2016 13:54
Show Gist options
  • Save apangin/ad6352306a119411d441 to your computer and use it in GitHub Desktop.
Save apangin/ad6352306a119411d441 to your computer and use it in GitHub Desktop.
Java LZ4 benchmark
# VM version: JDK 1.8.0_60, VM 25.60-b23
Benchmark Mode Cnt Score Error Units
LZ4Bench.array thrpt 10 7,001 ± 0,027 ops/ms
LZ4Bench.byteBuffer thrpt 10 5,529 ± 0,030 ops/ms
LZ4Bench.jni thrpt 10 24,174 ± 0,376 ops/ms
LZ4Bench.unsafe thrpt 10 21,359 ± 0,318 ops/ms
# VM version: JDK 9-ea, VM 9-ea+100-2016-01-06-213251.javare.4235.nc
Benchmark Mode Cnt Score Error Units
LZ4Bench.array thrpt 10 7,897 ± 0,070 ops/ms
LZ4Bench.byteBuffer thrpt 10 12,048 ± 0,074 ops/ms
LZ4Bench.jni thrpt 10 24,300 ± 0,214 ops/ms
LZ4Bench.unsafe thrpt 10 23,501 ± 0,195 ops/ms
package lz4;
import static lz4.LZ4Common.*;
public class LZ4Array {
private static long getLong(byte[] b, int p) {
return (b[p] & 0xffL)
| (b[p + 1] & 0xffL) << 8
| (b[p + 2] & 0xffL) << 16
| (b[p + 3] & 0xffL) << 24
| (b[p + 4] & 0xffL) << 32
| (b[p + 5] & 0xffL) << 40
| (b[p + 6] & 0xffL) << 48
| (b[p + 7] & 0xffL) << 56;
}
private static int getInt(byte[] b, int p) {
return (b[p] & 0xff)
| (b[p + 1] & 0xff) << 8
| (b[p + 2] & 0xff) << 16
| (b[p + 3] & 0xff) << 24;
}
private static short getShort(byte[] b, int p) {
return (short) ((b[p] & 0xff) | (b[p + 1] & 0xff) << 8);
}
private static byte getByte(byte[] b, int p) {
return b[p];
}
private static void putShort(byte[] b, int p, short val) {
b[p] = (byte) val;
b[p + 1] = (byte) (val >>> 8);
}
private static void putByte(byte[] b, int p, byte val) {
b[p] = val;
}
private static int hashPosition16(byte[] src, int p) {
long sequence = getLong(src, p);
return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
}
private static void putPosition(short[] table, int p, byte[] src, int srcOffset) {
int h = hashPosition16(src, p);
table[h] = (short) (p - srcOffset);
}
private static int replacePosition(short[] table, int p, byte[] src, int srcOffset) {
int h = hashPosition16(src, p);
int prev = (table[h] & 0xffff) + srcOffset;
table[h] = (short) (p - srcOffset);
return prev;
}
private static int matchLength(byte[] src, int pIn, int pMatch, int pInLimit) {
final int pStart = pIn;
while (pIn < pInLimit - 7) {
long diff = getLong(src, pMatch) ^ getLong(src, pIn);
if (diff != 0) {
pIn += Long.numberOfTrailingZeros(diff) >>> 3;
return pIn - pStart;
}
pIn += 8;
pMatch += 8;
}
if (pIn < pInLimit - 3 && getInt(src, pMatch) == getInt(src, pIn)) {
pIn += 4;
pMatch += 4;
}
if (pIn < pInLimit - 1 && getShort(src, pMatch) == getShort(src, pIn)) {
pIn += 2;
pMatch += 2;
}
if (pIn < pInLimit && getByte(src, pMatch) == getByte(src, pIn)) {
pIn++;
}
return pIn - pStart;
}
private static void wildCopy(byte[] src, int srcOffset, byte[] dst, int dstOffset, int dstEnd) {
do {
dst[dstOffset] = src[srcOffset];
dst[dstOffset + 1] = src[srcOffset + 1];
dst[dstOffset + 2] = src[srcOffset + 2];
dst[dstOffset + 3] = src[srcOffset + 3];
dst[dstOffset + 4] = src[srcOffset + 4];
dst[dstOffset + 5] = src[srcOffset + 5];
dst[dstOffset + 6] = src[srcOffset + 6];
dst[dstOffset + 7] = src[srcOffset + 7];
dstOffset += 8;
srcOffset += 8;
} while (dstOffset < dstEnd);
}
public static int compress(final byte[] src, final int srcOffset,
final byte[] dst, final int dstOffset,
final int inputSize) {
final int srcEnd = srcOffset + inputSize;
final int mfLimit = srcEnd - MFLIMIT;
final int matchLimit = srcEnd - LASTLITERALS;
int ip = srcOffset;
int anchor = srcOffset;
int op = dstOffset;
if (inputSize >= LZ4_MIN_LENGTH) {
final short[] table = new short[HASH_SIZE_16];
// First Byte
putPosition(table, ip, src, srcOffset);
ip++;
int forwardH = hashPosition16(src, ip);
mainLoop:
for (; ; ) {
int match;
int token;
{
int forwardIp = ip;
int step = 1;
int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;
// Find a match
do {
int h = forwardH;
ip = forwardIp;
forwardIp += step;
step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);
if (forwardIp > mfLimit) break mainLoop;
match = (table[h] & 0xffff) + srcOffset;
forwardH = hashPosition16(src, forwardIp);
table[h] = (short) (ip - srcOffset);
} while (getInt(src, match) != getInt(src, ip));
}
// Catch up
while (ip > anchor && match > srcOffset && getByte(src, ip - 1) == getByte(src, match - 1)) {
ip--;
match--;
}
{
// Encode Literal length
int litLength = ip - anchor;
token = op++;
if (litLength >= RUN_MASK) {
int len = litLength - RUN_MASK;
putByte(dst, token, (byte) (RUN_MASK << ML_BITS));
for (; len >= 255; len -= 255) {
putByte(dst, op++, (byte) 0xff);
}
putByte(dst, op++, (byte) len);
} else {
putByte(dst, token, (byte) (litLength << ML_BITS));
}
// Copy Literals
wildCopy(src, anchor, dst, op, op + litLength);
op += litLength;
}
for (; ; ) {
// Encode Offset
putShort(dst, op, (short) (ip - match));
op += 2;
// Encode MatchLength
{
int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
ip += MINMATCH + matchLength;
if (matchLength >= ML_MASK) {
putByte(dst, token, (byte) (getByte(dst, token) + ML_MASK));
matchLength -= ML_MASK;
for (; matchLength >= 510; matchLength -= 510) {
putShort(dst, op, (short) 0xffff);
op += 2;
}
if (matchLength >= 255) {
matchLength -= 255;
putByte(dst, op++, (byte) 0xff);
}
putByte(dst, op++, (byte) matchLength);
} else {
putByte(dst, token, (byte) (getByte(dst, token) + matchLength));
}
}
anchor = ip;
// Test end of chunk
if (ip > mfLimit) break mainLoop;
// Fill table
putPosition(table, ip - 2, src, srcOffset);
// Test next position
match = replacePosition(table, ip, src, srcOffset);
if (match + MAX_DISTANCE >= ip && getInt(src, match) == getInt(src, ip)) {
token = op++;
putByte(dst, token, (byte) 0);
continue;
}
// Prepare next loop
forwardH = hashPosition16(src, ++ip);
break;
}
} // mainLoop
} // if (inputSize >= LZ4_MIN_LENGTH)
// Encode Last Literals
{
int lastRun = srcEnd - anchor;
if (lastRun >= RUN_MASK) {
int accumulator = lastRun - RUN_MASK;
putByte(dst, op++, (byte) (RUN_MASK << ML_BITS));
for (; accumulator >= 255; accumulator -= 255) {
putByte(dst, op++, (byte) 0xff);
}
putByte(dst, op++, (byte) accumulator);
} else {
putByte(dst, op++, (byte) (lastRun << ML_BITS));
}
System.arraycopy(src, anchor, dst, op, lastRun);
op += lastRun;
}
return op - dstOffset;
}
}
package lz4;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Paths;
@State(Scope.Benchmark)
public class LZ4Bench {
byte[] in;
byte[] out;
@Setup
public void setup() throws IOException {
in = Files.readAllBytes(Paths.get("test32k.txt"));
out = new byte[LZ4Common.compressBound(in.length)];
if (array() != byteBuffer() || array() != jni() || array() != unsafe()) {
throw new AssertionError();
}
}
@Benchmark
public int array() {
return LZ4Array.compress(in, 0, out, 0, in.length);
}
@Benchmark
public int byteBuffer() {
ByteBuffer src = ByteBuffer.wrap(in).order(ByteOrder.nativeOrder());
ByteBuffer dst = ByteBuffer.wrap(out).order(ByteOrder.nativeOrder());
return LZ4ByteBuffer.compress(src, 0, dst, 0, in.length);
}
@Benchmark
public int jni() {
return LZ4Native.compress(in, 0, out, 0, in.length);
}
@Benchmark
public int unsafe() {
return LZ4Unsafe.compress(in, LZ4Unsafe.byteArrayOffset, out, LZ4Unsafe.byteArrayOffset, in.length);
}
}
package lz4;
import java.nio.ByteBuffer;
import static lz4.LZ4Common.*;
public class LZ4ByteBuffer {
private static int hashPosition16(ByteBuffer src, int p) {
long sequence = src.getLong(p);
return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
}
private static void putPosition(short[] table, int p, ByteBuffer src, int srcOffset) {
int h = hashPosition16(src, p);
table[h] = (short) (p - srcOffset);
}
private static int replacePosition(short[] table, int p, ByteBuffer src, int srcOffset) {
int h = hashPosition16(src, p);
int prev = (table[h] & 0xffff) + srcOffset;
table[h] = (short) (p - srcOffset);
return prev;
}
private static int matchLength(ByteBuffer src, int pIn, int pMatch, int pInLimit) {
final int pStart = pIn;
while (pIn < pInLimit - 7) {
long diff = src.getLong(pMatch) ^ src.getLong(pIn);
if (diff != 0) {
pIn += Long.numberOfTrailingZeros(diff) >>> 3;
return pIn - pStart;
}
pIn += 8;
pMatch += 8;
}
if (pIn < pInLimit - 3 && src.getInt(pMatch) == src.getInt(pIn)) {
pIn += 4;
pMatch += 4;
}
if (pIn < pInLimit - 1 && src.getShort(pMatch) == src.getShort(pIn)) {
pIn += 2;
pMatch += 2;
}
if (pIn < pInLimit && src.get(pMatch) == src.get(pIn)) {
pIn++;
}
return pIn - pStart;
}
private static void wildCopy(ByteBuffer src, int srcOffset, ByteBuffer dst, int dstOffset, int dstEnd) {
do {
dst.putLong(dstOffset, src.getLong(srcOffset));
dstOffset += 8;
srcOffset += 8;
} while (dstOffset < dstEnd);
}
public static int compress(final ByteBuffer src, final int srcOffset,
final ByteBuffer dst, final int dstOffset,
final int inputSize) {
final int srcEnd = srcOffset + inputSize;
final int mfLimit = srcEnd - MFLIMIT;
final int matchLimit = srcEnd - LASTLITERALS;
int ip = srcOffset;
int anchor = srcOffset;
int op = dstOffset;
if (inputSize >= LZ4_MIN_LENGTH) {
final short[] table = new short[HASH_SIZE_16];
// First Byte
putPosition(table, ip, src, srcOffset);
ip++;
int forwardH = hashPosition16(src, ip);
mainLoop:
for (; ; ) {
int match;
int token;
{
int forwardIp = ip;
int step = 1;
int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;
// Find a match
do {
int h = forwardH;
ip = forwardIp;
forwardIp += step;
step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);
if (forwardIp > mfLimit) break mainLoop;
match = (table[h] & 0xffff) + srcOffset;
forwardH = hashPosition16(src, forwardIp);
table[h] = (short) (ip - srcOffset);
} while (src.getInt(match) != src.getInt(ip));
}
// Catch up
while (ip > anchor && match > srcOffset && src.get(ip - 1) == src.get(match - 1)) {
ip--;
match--;
}
{
// Encode Literal length
int litLength = ip - anchor;
token = op++;
if (litLength >= RUN_MASK) {
int len = litLength - RUN_MASK;
dst.put(token, (byte) (RUN_MASK << ML_BITS));
for (; len >= 255; len -= 255) {
dst.put(op++, (byte) 0xff);
}
dst.put(op++, (byte) len);
} else {
dst.put(token, (byte) (litLength << ML_BITS));
}
// Copy Literals
wildCopy(src, anchor, dst, op, op + litLength);
op += litLength;
}
for (; ; ) {
// Encode Offset
dst.putShort(op, (short) (ip - match));
op += 2;
// Encode MatchLength
{
int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
ip += MINMATCH + matchLength;
if (matchLength >= ML_MASK) {
dst.put(token, (byte) (dst.get(token) + ML_MASK));
matchLength -= ML_MASK;
for (; matchLength >= 510; matchLength -= 510) {
dst.putShort(op, (short) 0xffff);
op += 2;
}
if (matchLength >= 255) {
matchLength -= 255;
dst.put(op++, (byte) 0xff);
}
dst.put(op++, (byte) matchLength);
} else {
dst.put(token, (byte) (dst.get(token) + matchLength));
}
}
anchor = ip;
// Test end of chunk
if (ip > mfLimit) break mainLoop;
// Fill table
putPosition(table, ip - 2, src, srcOffset);
// Test next position
match = replacePosition(table, ip, src, srcOffset);
if (match + MAX_DISTANCE >= ip && src.getInt(match) == src.getInt(ip)) {
token = op++;
dst.put(token, (byte) 0);
continue;
}
// Prepare next loop
forwardH = hashPosition16(src, ++ip);
break;
}
} // mainLoop
} // if (inputSize >= LZ4_MIN_LENGTH)
// Encode Last Literals
{
int lastRun = srcEnd - anchor;
if (lastRun >= RUN_MASK) {
int accumulator = lastRun - RUN_MASK;
dst.put(op++, (byte) (RUN_MASK << ML_BITS));
for (; accumulator >= 255; accumulator -= 255) {
dst.put(op++, (byte) 0xff);
}
dst.put(op++, (byte) accumulator);
} else {
dst.put(op++, (byte) (lastRun << ML_BITS));
}
System.arraycopy(src.array(), anchor, dst.array(), op, lastRun);
op += lastRun;
}
return op - dstOffset;
}
}
package lz4;
public class LZ4Common {
static final int LZ4_MEMORY_USAGE = 15;
static final int ACCELERATION = 1;
static final int MINMATCH = 4;
static final int COPYLENGTH = 8;
static final int LASTLITERALS = 5;
static final int MFLIMIT = COPYLENGTH + MINMATCH;
static final int LZ4_MIN_LENGTH = MFLIMIT + 1;
static final int MAXD_LOG = 16;
static final int MAX_DISTANCE = (1 << MAXD_LOG) - 1;
static final int ML_BITS = 4;
static final int ML_MASK = (1 << ML_BITS) - 1;
static final int RUN_BITS = 8 - ML_BITS;
static final int RUN_MASK = (1 << RUN_BITS) - 1;
static final int HASH_SIZE_16 = 1 << (LZ4_MEMORY_USAGE - 1);
static final int LZ4_SKIP_TRIGGER = 6;
public static int compressBound(int size) {
return size + size / 255 + 16;
}
}
package lz4;
public class LZ4Native {
public static native int compress(final byte[] src, final int srcOffset,
final byte[] dst, final int dstOffset,
final int inputSize);
static {
System.loadLibrary("lz4");
}
}
package lz4;
import sun.misc.Unsafe;
import java.lang.reflect.Field;
import static lz4.LZ4Common.*;
public class LZ4Unsafe {
static final Unsafe unsafe = getUnsafe();
static final long byteArrayOffset = unsafe.arrayBaseOffset(byte[].class);
private static int hashPosition16(Object src, long p) {
long sequence = unsafe.getLong(src, p);
return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
}
private static void putPosition(short[] table, long p, Object src, long srcOffset) {
int h = hashPosition16(src, p);
table[h] = (short) (p - srcOffset);
}
private static long replacePosition(short[] table, long p, Object src, long srcOffset) {
int h = hashPosition16(src, p);
long prev = (table[h] & 0xffffL) + srcOffset;
table[h] = (short) (p - srcOffset);
return prev;
}
private static int matchLength(Object src, long pIn, long pMatch, long pInLimit) {
final long pStart = pIn;
while (pIn < pInLimit - 7) {
long diff = unsafe.getLong(src, pMatch) ^ unsafe.getLong(src, pIn);
if (diff != 0) {
pIn += Long.numberOfTrailingZeros(diff) >>> 3;
return (int) (pIn - pStart);
}
pIn += 8;
pMatch += 8;
}
if (pIn < pInLimit - 3 && unsafe.getInt(src, pMatch) == unsafe.getInt(src, pIn)) {
pIn += 4;
pMatch += 4;
}
if (pIn < pInLimit - 1 && unsafe.getShort(src, pMatch) == unsafe.getShort(src, pIn)) {
pIn += 2;
pMatch += 2;
}
if (pIn < pInLimit && unsafe.getByte(src, pMatch) == unsafe.getByte(src, pIn)) {
pIn++;
}
return (int) (pIn - pStart);
}
private static void wildCopy(Object src, long srcOffset, Object dst, long dstOffset, long dstEnd) {
do {
unsafe.putLong(dst, dstOffset, unsafe.getLong(src, srcOffset));
dstOffset += 8;
srcOffset += 8;
} while (dstOffset < dstEnd);
}
public static int compress(final Object src, final long srcOffset,
final Object dst, final long dstOffset,
final int inputSize) {
final long srcEnd = srcOffset + inputSize;
final long mfLimit = srcEnd - MFLIMIT;
final long matchLimit = srcEnd - LASTLITERALS;
long ip = srcOffset;
long anchor = srcOffset;
long op = dstOffset;
if (inputSize >= LZ4_MIN_LENGTH) {
final short[] table = new short[HASH_SIZE_16];
// First Byte
putPosition(table, ip, src, srcOffset);
ip++;
int forwardH = hashPosition16(src, ip);
mainLoop:
for (; ; ) {
long match;
long token;
{
long forwardIp = ip;
int step = 1;
int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;
// Find a match
do {
int h = forwardH;
ip = forwardIp;
forwardIp += step;
step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);
if (forwardIp > mfLimit) break mainLoop;
match = (table[h] & 0xffffL) + srcOffset;
forwardH = hashPosition16(src, forwardIp);
table[h] = (short) (ip - srcOffset);
} while (unsafe.getInt(src, match) != unsafe.getInt(src, ip));
}
// Catch up
while (ip > anchor && match > srcOffset && unsafe.getByte(src, ip - 1) == unsafe.getByte(src, match - 1)) {
ip--;
match--;
}
{
// Encode Literal length
int litLength = (int) (ip - anchor);
token = op++;
if (litLength >= RUN_MASK) {
int len = litLength - RUN_MASK;
unsafe.putByte(dst, token, (byte) (RUN_MASK << ML_BITS));
for (; len >= 255; len -= 255) {
unsafe.putByte(dst, op++, (byte) 0xff);
}
unsafe.putByte(dst, op++, (byte) len);
} else {
unsafe.putByte(dst, token, (byte) (litLength << ML_BITS));
}
// Copy Literals
wildCopy(src, anchor, dst, op, op + litLength);
op += litLength;
}
for (; ; ) {
// Encode Offset
unsafe.putShort(dst, op, (short) (ip - match));
op += 2;
// Encode MatchLength
{
int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
ip += MINMATCH + matchLength;
if (matchLength >= ML_MASK) {
unsafe.putByte(dst, token, (byte) (unsafe.getByte(dst, token) + ML_MASK));
matchLength -= ML_MASK;
for (; matchLength >= 510; matchLength -= 510) {
unsafe.putShort(dst, op, (short) 0xffff);
op += 2;
}
if (matchLength >= 255) {
matchLength -= 255;
unsafe.putByte(dst, op++, (byte) 0xff);
}
unsafe.putByte(dst, op++, (byte) matchLength);
} else {
unsafe.putByte(dst, token, (byte) (unsafe.getByte(dst, token) + matchLength));
}
}
anchor = ip;
// Test end of chunk
if (ip > mfLimit) break mainLoop;
// Fill table
putPosition(table, ip - 2, src, srcOffset);
// Test next position
match = replacePosition(table, ip, src, srcOffset);
if (match + MAX_DISTANCE >= ip && unsafe.getInt(src, match) == unsafe.getInt(src, ip)) {
token = op++;
unsafe.putByte(dst, token, (byte) 0);
continue;
}
// Prepare next loop
forwardH = hashPosition16(src, ++ip);
break;
}
} // mainLoop
} // if (inputSize >= LZ4_MIN_LENGTH)
// Encode Last Literals
{
int lastRun = (int) (srcEnd - anchor);
if (lastRun >= RUN_MASK) {
int accumulator = lastRun - RUN_MASK;
unsafe.putByte(dst, op++, (byte) (RUN_MASK << ML_BITS));
for (; accumulator >= 255; accumulator -= 255) {
unsafe.putByte(dst, op++, (byte) 0xff);
}
unsafe.putByte(dst, op++, (byte) accumulator);
} else {
unsafe.putByte(dst, op++, (byte) (lastRun << ML_BITS));
}
unsafe.copyMemory(src, anchor, dst, op, lastRun);
op += lastRun;
}
return (int) (op - dstOffset);
}
private static Unsafe getUnsafe() {
try {
Field f = Unsafe.class.getDeclaredField("theUnsafe");
f.setAccessible(true);
return (Unsafe) f.get(null);
} catch (Exception e) {
throw new AssertionError(e);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment