apangin/LZ4Array.java

## _results.txt
# VM version: JDK 1.8.0_60, VM 25.60-b23

Benchmark             Mode  Cnt   Score   Error   Units
LZ4Bench.array       thrpt   10   7,001 ± 0,027  ops/ms
LZ4Bench.byteBuffer  thrpt   10   5,529 ± 0,030  ops/ms
LZ4Bench.jni         thrpt   10  24,174 ± 0,376  ops/ms
LZ4Bench.unsafe      thrpt   10  21,359 ± 0,318  ops/ms

# VM version: JDK 9-ea, VM 9-ea+100-2016-01-06-213251.javare.4235.nc

Benchmark             Mode  Cnt   Score   Error   Units
LZ4Bench.array       thrpt   10   7,897 ± 0,070  ops/ms
LZ4Bench.byteBuffer  thrpt   10  12,048 ± 0,074  ops/ms
LZ4Bench.jni         thrpt   10  24,300 ± 0,214  ops/ms
LZ4Bench.unsafe      thrpt   10  23,501 ± 0,195  ops/ms

## LZ4Array.java
package lz4;

import static lz4.LZ4Common.*;

public class LZ4Array {

    private static long getLong(byte[] b, int p) {
        return (b[p] & 0xffL)
                | (b[p + 1] & 0xffL) << 8
                | (b[p + 2] & 0xffL) << 16
                | (b[p + 3] & 0xffL) << 24
                | (b[p + 4] & 0xffL) << 32
                | (b[p + 5] & 0xffL) << 40
                | (b[p + 6] & 0xffL) << 48
                | (b[p + 7] & 0xffL) << 56;
    }

    private static int getInt(byte[] b, int p) {
        return (b[p] & 0xff)
                | (b[p + 1] & 0xff) << 8
                | (b[p + 2] & 0xff) << 16
                | (b[p + 3] & 0xff) << 24;
    }

    private static short getShort(byte[] b, int p) {
        return (short) ((b[p] & 0xff) | (b[p + 1] & 0xff) << 8);
    }

    private static byte getByte(byte[] b, int p) {
        return b[p];
    }

    private static void putShort(byte[] b, int p, short val) {
        b[p] = (byte) val;
        b[p + 1] = (byte) (val >>> 8);
    }

    private static void putByte(byte[] b, int p, byte val) {
        b[p] = val;
    }

    private static int hashPosition16(byte[] src, int p) {
        long sequence = getLong(src, p);
        return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
    }

    private static void putPosition(short[] table, int p, byte[] src, int srcOffset) {
        int h = hashPosition16(src, p);
        table[h] = (short) (p - srcOffset);
    }

    private static int replacePosition(short[] table, int p, byte[] src, int srcOffset) {
        int h = hashPosition16(src, p);
        int prev = (table[h] & 0xffff) + srcOffset;
        table[h] = (short) (p - srcOffset);
        return prev;
    }

    private static int matchLength(byte[] src, int pIn, int pMatch, int pInLimit) {
        final int pStart = pIn;

        while (pIn < pInLimit - 7) {
            long diff = getLong(src, pMatch) ^ getLong(src, pIn);
            if (diff != 0) {
                pIn += Long.numberOfTrailingZeros(diff) >>> 3;
                return pIn - pStart;
            }
            pIn += 8;
            pMatch += 8;
        }

        if (pIn < pInLimit - 3 && getInt(src, pMatch) == getInt(src, pIn)) {
            pIn += 4;
            pMatch += 4;
        }
        if (pIn < pInLimit - 1 && getShort(src, pMatch) == getShort(src, pIn)) {
            pIn += 2;
            pMatch += 2;
        }
        if (pIn < pInLimit && getByte(src, pMatch) == getByte(src, pIn)) {
            pIn++;
        }
        return pIn - pStart;
    }

    private static void wildCopy(byte[] src, int srcOffset, byte[] dst, int dstOffset, int dstEnd) {
        do {
            dst[dstOffset] = src[srcOffset];
            dst[dstOffset + 1] = src[srcOffset + 1];
            dst[dstOffset + 2] = src[srcOffset + 2];
            dst[dstOffset + 3] = src[srcOffset + 3];
            dst[dstOffset + 4] = src[srcOffset + 4];
            dst[dstOffset + 5] = src[srcOffset + 5];
            dst[dstOffset + 6] = src[srcOffset + 6];
            dst[dstOffset + 7] = src[srcOffset + 7];
            dstOffset += 8;
            srcOffset += 8;
        } while (dstOffset < dstEnd);
    }

    public static int compress(final byte[] src, final int srcOffset,
                               final byte[] dst, final int dstOffset,
                               final int inputSize) {
        final int srcEnd = srcOffset + inputSize;
        final int mfLimit = srcEnd - MFLIMIT;
        final int matchLimit = srcEnd - LASTLITERALS;

        int ip = srcOffset;
        int anchor = srcOffset;
        int op = dstOffset;

        if (inputSize >= LZ4_MIN_LENGTH) {
            final short[] table = new short[HASH_SIZE_16];

            // First Byte
            putPosition(table, ip, src, srcOffset);
            ip++;
            int forwardH = hashPosition16(src, ip);

            mainLoop:
            for (; ; ) {
                int match;
                int token;
                {
                    int forwardIp = ip;
                    int step = 1;
                    int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;

                    // Find a match
                    do {
                        int h = forwardH;
                        ip = forwardIp;
                        forwardIp += step;
                        step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);

                        if (forwardIp > mfLimit) break mainLoop;

                        match = (table[h] & 0xffff) + srcOffset;
                        forwardH = hashPosition16(src, forwardIp);
                        table[h] = (short) (ip - srcOffset);
                    } while (getInt(src, match) != getInt(src, ip));
                }

                // Catch up
                while (ip > anchor && match > srcOffset && getByte(src, ip - 1) == getByte(src, match - 1)) {
                    ip--;
                    match--;
                }

                {
                    // Encode Literal length
                    int litLength = ip - anchor;
                    token = op++;
                    if (litLength >= RUN_MASK) {
                        int len = litLength - RUN_MASK;
                        putByte(dst, token, (byte) (RUN_MASK << ML_BITS));
                        for (; len >= 255; len -= 255) {
                            putByte(dst, op++, (byte) 0xff);
                        }
                        putByte(dst, op++, (byte) len);
                    } else {
                        putByte(dst, token, (byte) (litLength << ML_BITS));
                    }

                    // Copy Literals
                    wildCopy(src, anchor, dst, op, op + litLength);
                    op += litLength;
                }

                for (; ; ) {
                    // Encode Offset
                    putShort(dst, op, (short) (ip - match));
                    op += 2;

                    // Encode MatchLength
                    {
                        int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
                        ip += MINMATCH + matchLength;

                        if (matchLength >= ML_MASK) {
                            putByte(dst, token, (byte) (getByte(dst, token) + ML_MASK));
                            matchLength -= ML_MASK;
                            for (; matchLength >= 510; matchLength -= 510) {
                                putShort(dst, op, (short) 0xffff);
                                op += 2;
                            }
                            if (matchLength >= 255) {
                                matchLength -= 255;
                                putByte(dst, op++, (byte) 0xff);
                            }
                            putByte(dst, op++, (byte) matchLength);
                        } else {
                            putByte(dst, token, (byte) (getByte(dst, token) + matchLength));
                        }
                    }

                    anchor = ip;

                    // Test end of chunk
                    if (ip > mfLimit) break mainLoop;

                    // Fill table
                    putPosition(table, ip - 2, src, srcOffset);

                    // Test next position
                    match = replacePosition(table, ip, src, srcOffset);
                    if (match + MAX_DISTANCE >= ip && getInt(src, match) == getInt(src, ip)) {
                        token = op++;
                        putByte(dst, token, (byte) 0);
                        continue;
                    }

                    // Prepare next loop
                    forwardH = hashPosition16(src, ++ip);
                    break;
                }
            } // mainLoop
        } // if (inputSize >= LZ4_MIN_LENGTH)

        // Encode Last Literals
        {
            int lastRun = srcEnd - anchor;
            if (lastRun >= RUN_MASK) {
                int accumulator = lastRun - RUN_MASK;
                putByte(dst, op++, (byte) (RUN_MASK << ML_BITS));
                for (; accumulator >= 255; accumulator -= 255) {
                    putByte(dst, op++, (byte) 0xff);
                }
                putByte(dst, op++, (byte) accumulator);
            } else {
                putByte(dst, op++, (byte) (lastRun << ML_BITS));
            }
            System.arraycopy(src, anchor, dst, op, lastRun);
            op += lastRun;
        }

        return op - dstOffset;
    }
}

## LZ4Bench.java
package lz4;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.file.Files;
import java.nio.file.Paths;

@State(Scope.Benchmark)
public class LZ4Bench {

    byte[] in;
    byte[] out;

    @Setup
    public void setup() throws IOException {
        in = Files.readAllBytes(Paths.get("test32k.txt"));
        out = new byte[LZ4Common.compressBound(in.length)];

        if (array() != byteBuffer() || array() != jni() || array() != unsafe()) {
            throw new AssertionError();
        }
    }

    @Benchmark
    public int array() {
        return LZ4Array.compress(in, 0, out, 0, in.length);
    }

    @Benchmark
    public int byteBuffer() {
        ByteBuffer src = ByteBuffer.wrap(in).order(ByteOrder.nativeOrder());
        ByteBuffer dst = ByteBuffer.wrap(out).order(ByteOrder.nativeOrder());
        return LZ4ByteBuffer.compress(src, 0, dst, 0, in.length);
    }

    @Benchmark
    public int jni() {
        return LZ4Native.compress(in, 0, out, 0, in.length);
    }

    @Benchmark
    public int unsafe() {
        return LZ4Unsafe.compress(in, LZ4Unsafe.byteArrayOffset, out, LZ4Unsafe.byteArrayOffset, in.length);
    }
}

## LZ4ByteBuffer.java
package lz4;

import java.nio.ByteBuffer;

import static lz4.LZ4Common.*;

public class LZ4ByteBuffer {

    private static int hashPosition16(ByteBuffer src, int p) {
        long sequence = src.getLong(p);
        return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
    }

    private static void putPosition(short[] table, int p, ByteBuffer src, int srcOffset) {
        int h = hashPosition16(src, p);
        table[h] = (short) (p - srcOffset);
    }

    private static int replacePosition(short[] table, int p, ByteBuffer src, int srcOffset) {
        int h = hashPosition16(src, p);
        int prev = (table[h] & 0xffff) + srcOffset;
        table[h] = (short) (p - srcOffset);
        return prev;
    }

    private static int matchLength(ByteBuffer src, int pIn, int pMatch, int pInLimit) {
        final int pStart = pIn;

        while (pIn < pInLimit - 7) {
            long diff = src.getLong(pMatch) ^ src.getLong(pIn);
            if (diff != 0) {
                pIn += Long.numberOfTrailingZeros(diff) >>> 3;
                return pIn - pStart;
            }
            pIn += 8;
            pMatch += 8;
        }

        if (pIn < pInLimit - 3 && src.getInt(pMatch) == src.getInt(pIn)) {
            pIn += 4;
            pMatch += 4;
        }
        if (pIn < pInLimit - 1 && src.getShort(pMatch) == src.getShort(pIn)) {
            pIn += 2;
            pMatch += 2;
        }
        if (pIn < pInLimit && src.get(pMatch) == src.get(pIn)) {
            pIn++;
        }
        return pIn - pStart;
    }

    private static void wildCopy(ByteBuffer src, int srcOffset, ByteBuffer dst, int dstOffset, int dstEnd) {
        do {
            dst.putLong(dstOffset, src.getLong(srcOffset));
            dstOffset += 8;
            srcOffset += 8;
        } while (dstOffset < dstEnd);
    }

    public static int compress(final ByteBuffer src, final int srcOffset,
                               final ByteBuffer dst, final int dstOffset,
                               final int inputSize) {
        final int srcEnd = srcOffset + inputSize;
        final int mfLimit = srcEnd - MFLIMIT;
        final int matchLimit = srcEnd - LASTLITERALS;

        int ip = srcOffset;
        int anchor = srcOffset;
        int op = dstOffset;

        if (inputSize >= LZ4_MIN_LENGTH) {
            final short[] table = new short[HASH_SIZE_16];

            // First Byte
            putPosition(table, ip, src, srcOffset);
            ip++;
            int forwardH = hashPosition16(src, ip);

            mainLoop:
            for (; ; ) {
                int match;
                int token;
                {
                    int forwardIp = ip;
                    int step = 1;
                    int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;

                    // Find a match
                    do {
                        int h = forwardH;
                        ip = forwardIp;
                        forwardIp += step;
                        step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);

                        if (forwardIp > mfLimit) break mainLoop;

                        match = (table[h] & 0xffff) + srcOffset;
                        forwardH = hashPosition16(src, forwardIp);
                        table[h] = (short) (ip - srcOffset);
                    } while (src.getInt(match) != src.getInt(ip));
                }

                // Catch up
                while (ip > anchor && match > srcOffset && src.get(ip - 1) == src.get(match - 1)) {
                    ip--;
                    match--;
                }

                {
                    // Encode Literal length
                    int litLength = ip - anchor;
                    token = op++;
                    if (litLength >= RUN_MASK) {
                        int len = litLength - RUN_MASK;
                        dst.put(token, (byte) (RUN_MASK << ML_BITS));
                        for (; len >= 255; len -= 255) {
                            dst.put(op++, (byte) 0xff);
                        }
                        dst.put(op++, (byte) len);
                    } else {
                        dst.put(token, (byte) (litLength << ML_BITS));
                    }

                    // Copy Literals
                    wildCopy(src, anchor, dst, op, op + litLength);
                    op += litLength;
                }

                for (; ; ) {
                    // Encode Offset
                    dst.putShort(op, (short) (ip - match));
                    op += 2;

                    // Encode MatchLength
                    {
                        int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
                        ip += MINMATCH + matchLength;

                        if (matchLength >= ML_MASK) {
                            dst.put(token, (byte) (dst.get(token) + ML_MASK));
                            matchLength -= ML_MASK;
                            for (; matchLength >= 510; matchLength -= 510) {
                                dst.putShort(op, (short) 0xffff);
                                op += 2;
                            }
                            if (matchLength >= 255) {
                                matchLength -= 255;
                                dst.put(op++, (byte) 0xff);
                            }
                            dst.put(op++, (byte) matchLength);
                        } else {
                            dst.put(token, (byte) (dst.get(token) + matchLength));
                        }
                    }

                    anchor = ip;

                    // Test end of chunk
                    if (ip > mfLimit) break mainLoop;

                    // Fill table
                    putPosition(table, ip - 2, src, srcOffset);

                    // Test next position
                    match = replacePosition(table, ip, src, srcOffset);
                    if (match + MAX_DISTANCE >= ip && src.getInt(match) == src.getInt(ip)) {
                        token = op++;
                        dst.put(token, (byte) 0);
                        continue;
                    }

                    // Prepare next loop
                    forwardH = hashPosition16(src, ++ip);
                    break;
                }
            } // mainLoop
        } // if (inputSize >= LZ4_MIN_LENGTH)

        // Encode Last Literals
        {
            int lastRun = srcEnd - anchor;
            if (lastRun >= RUN_MASK) {
                int accumulator = lastRun - RUN_MASK;
                dst.put(op++, (byte) (RUN_MASK << ML_BITS));
                for (; accumulator >= 255; accumulator -= 255) {
                    dst.put(op++, (byte) 0xff);
                }
                dst.put(op++, (byte) accumulator);
            } else {
                dst.put(op++, (byte) (lastRun << ML_BITS));
            }
            System.arraycopy(src.array(), anchor, dst.array(), op, lastRun);
            op += lastRun;
        }

        return op - dstOffset;
    }
}

## LZ4Common.java
package lz4;

public class LZ4Common {
    static final int LZ4_MEMORY_USAGE = 15;
    static final int ACCELERATION = 1;

    static final int MINMATCH = 4;
    static final int COPYLENGTH = 8;
    static final int LASTLITERALS = 5;
    static final int MFLIMIT = COPYLENGTH + MINMATCH;
    static final int LZ4_MIN_LENGTH = MFLIMIT + 1;

    static final int MAXD_LOG = 16;
    static final int MAX_DISTANCE = (1 << MAXD_LOG) - 1;

    static final int ML_BITS = 4;
    static final int ML_MASK = (1 << ML_BITS) - 1;
    static final int RUN_BITS = 8 - ML_BITS;
    static final int RUN_MASK = (1 << RUN_BITS) - 1;

    static final int HASH_SIZE_16 = 1 << (LZ4_MEMORY_USAGE - 1);
    static final int LZ4_SKIP_TRIGGER = 6;

    public static int compressBound(int size) {
        return size + size / 255 + 16;
    }
}

## LZ4Native.java
package lz4;

public class LZ4Native {

    public static native int compress(final byte[] src, final int srcOffset,
                                      final byte[] dst, final int dstOffset,
                                      final int inputSize);

    static {
        System.loadLibrary("lz4");
    }
}

## LZ4Unsafe.java
package lz4;

import sun.misc.Unsafe;

import java.lang.reflect.Field;

import static lz4.LZ4Common.*;

public class LZ4Unsafe {
    static final Unsafe unsafe = getUnsafe();
    static final long byteArrayOffset = unsafe.arrayBaseOffset(byte[].class);

    private static int hashPosition16(Object src, long p) {
        long sequence = unsafe.getLong(src, p);
        return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
    }

    private static void putPosition(short[] table, long p, Object src, long srcOffset) {
        int h = hashPosition16(src, p);
        table[h] = (short) (p - srcOffset);
    }

    private static long replacePosition(short[] table, long p, Object src, long srcOffset) {
        int h = hashPosition16(src, p);
        long prev = (table[h] & 0xffffL) + srcOffset;
        table[h] = (short) (p - srcOffset);
        return prev;
    }

    private static int matchLength(Object src, long pIn, long pMatch, long pInLimit) {
        final long pStart = pIn;

        while (pIn < pInLimit - 7) {
            long diff = unsafe.getLong(src, pMatch) ^ unsafe.getLong(src, pIn);
            if (diff != 0) {
                pIn += Long.numberOfTrailingZeros(diff) >>> 3;
                return (int) (pIn - pStart);
            }
            pIn += 8;
            pMatch += 8;
        }

        if (pIn < pInLimit - 3 && unsafe.getInt(src, pMatch) == unsafe.getInt(src, pIn)) {
            pIn += 4;
            pMatch += 4;
        }
        if (pIn < pInLimit - 1 && unsafe.getShort(src, pMatch) == unsafe.getShort(src, pIn)) {
            pIn += 2;
            pMatch += 2;
        }
        if (pIn < pInLimit && unsafe.getByte(src, pMatch) == unsafe.getByte(src, pIn)) {
            pIn++;
        }
        return (int) (pIn - pStart);
    }

    private static void wildCopy(Object src, long srcOffset, Object dst, long dstOffset, long dstEnd) {
        do {
            unsafe.putLong(dst, dstOffset, unsafe.getLong(src, srcOffset));
            dstOffset += 8;
            srcOffset += 8;
        } while (dstOffset < dstEnd);
    }

    public static int compress(final Object src, final long srcOffset,
                               final Object dst, final long dstOffset,
                               final int inputSize) {
        final long srcEnd = srcOffset + inputSize;
        final long mfLimit = srcEnd - MFLIMIT;
        final long matchLimit = srcEnd - LASTLITERALS;

        long ip = srcOffset;
        long anchor = srcOffset;
        long op = dstOffset;

        if (inputSize >= LZ4_MIN_LENGTH) {
            final short[] table = new short[HASH_SIZE_16];

            // First Byte
            putPosition(table, ip, src, srcOffset);
            ip++;
            int forwardH = hashPosition16(src, ip);

            mainLoop:
            for (; ; ) {
                long match;
                long token;
                {
                    long forwardIp = ip;
                    int step = 1;
                    int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;

                    // Find a match
                    do {
                        int h = forwardH;
                        ip = forwardIp;
                        forwardIp += step;
                        step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);

                        if (forwardIp > mfLimit) break mainLoop;

                        match = (table[h] & 0xffffL) + srcOffset;
                        forwardH = hashPosition16(src, forwardIp);
                        table[h] = (short) (ip - srcOffset);
                    } while (unsafe.getInt(src, match) != unsafe.getInt(src, ip));
                }

                // Catch up
                while (ip > anchor && match > srcOffset && unsafe.getByte(src, ip - 1) == unsafe.getByte(src, match - 1)) {
                    ip--;
                    match--;
                }

                {
                    // Encode Literal length
                    int litLength = (int) (ip - anchor);
                    token = op++;
                    if (litLength >= RUN_MASK) {
                        int len = litLength - RUN_MASK;
                        unsafe.putByte(dst, token, (byte) (RUN_MASK << ML_BITS));
                        for (; len >= 255; len -= 255) {
                            unsafe.putByte(dst, op++, (byte) 0xff);
                        }
                        unsafe.putByte(dst, op++, (byte) len);
                    } else {
                        unsafe.putByte(dst, token, (byte) (litLength << ML_BITS));
                    }

                    // Copy Literals
                    wildCopy(src, anchor, dst, op, op + litLength);
                    op += litLength;
                }

                for (; ; ) {
                    // Encode Offset
                    unsafe.putShort(dst, op, (short) (ip - match));
                    op += 2;

                    // Encode MatchLength
                    {
                        int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
                        ip += MINMATCH + matchLength;

                        if (matchLength >= ML_MASK) {
                            unsafe.putByte(dst, token, (byte) (unsafe.getByte(dst, token) + ML_MASK));
                            matchLength -= ML_MASK;
                            for (; matchLength >= 510; matchLength -= 510) {
                                unsafe.putShort(dst, op, (short) 0xffff);
                                op += 2;
                            }
                            if (matchLength >= 255) {
                                matchLength -= 255;
                                unsafe.putByte(dst, op++, (byte) 0xff);
                            }
                            unsafe.putByte(dst, op++, (byte) matchLength);
                        } else {
                            unsafe.putByte(dst, token, (byte) (unsafe.getByte(dst, token) + matchLength));
                        }
                    }

                    anchor = ip;

                    // Test end of chunk
                    if (ip > mfLimit) break mainLoop;

                    // Fill table
                    putPosition(table, ip - 2, src, srcOffset);

                    // Test next position
                    match = replacePosition(table, ip, src, srcOffset);
                    if (match + MAX_DISTANCE >= ip && unsafe.getInt(src, match) == unsafe.getInt(src, ip)) {
                        token = op++;
                        unsafe.putByte(dst, token, (byte) 0);
                        continue;
                    }

                    // Prepare next loop
                    forwardH = hashPosition16(src, ++ip);
                    break;
                }
            } // mainLoop
        } // if (inputSize >= LZ4_MIN_LENGTH)

        // Encode Last Literals
        {
            int lastRun = (int) (srcEnd - anchor);
            if (lastRun >= RUN_MASK) {
                int accumulator = lastRun - RUN_MASK;
                unsafe.putByte(dst, op++, (byte) (RUN_MASK << ML_BITS));
                for (; accumulator >= 255; accumulator -= 255) {
                    unsafe.putByte(dst, op++, (byte) 0xff);
                }
                unsafe.putByte(dst, op++, (byte) accumulator);
            } else {
                unsafe.putByte(dst, op++, (byte) (lastRun << ML_BITS));
            }
            unsafe.copyMemory(src, anchor, dst, op, lastRun);
            op += lastRun;
        }

        return (int) (op - dstOffset);
    }

    private static Unsafe getUnsafe() {
        try {
            Field f = Unsafe.class.getDeclaredField("theUnsafe");
            f.setAccessible(true);
            return (Unsafe) f.get(null);
        } catch (Exception e) {
            throw new AssertionError(e);
        }
    }
}
	# VM version: JDK 1.8.0_60, VM 25.60-b23

	Benchmark Mode Cnt Score Error Units
	LZ4Bench.array thrpt 10 7,001 ± 0,027 ops/ms
	LZ4Bench.byteBuffer thrpt 10 5,529 ± 0,030 ops/ms
	LZ4Bench.jni thrpt 10 24,174 ± 0,376 ops/ms
	LZ4Bench.unsafe thrpt 10 21,359 ± 0,318 ops/ms

	# VM version: JDK 9-ea, VM 9-ea+100-2016-01-06-213251.javare.4235.nc

	Benchmark Mode Cnt Score Error Units
	LZ4Bench.array thrpt 10 7,897 ± 0,070 ops/ms
	LZ4Bench.byteBuffer thrpt 10 12,048 ± 0,074 ops/ms
	LZ4Bench.jni thrpt 10 24,300 ± 0,214 ops/ms
	LZ4Bench.unsafe thrpt 10 23,501 ± 0,195 ops/ms
	package lz4;

	import static lz4.LZ4Common.*;

	public class LZ4Array {

	private static long getLong(byte[] b, int p) {
	return (b[p] & 0xffL)
	\| (b[p + 1] & 0xffL) << 8
	\| (b[p + 2] & 0xffL) << 16
	\| (b[p + 3] & 0xffL) << 24
	\| (b[p + 4] & 0xffL) << 32
	\| (b[p + 5] & 0xffL) << 40
	\| (b[p + 6] & 0xffL) << 48
	\| (b[p + 7] & 0xffL) << 56;
	}

	private static int getInt(byte[] b, int p) {
	return (b[p] & 0xff)
	\| (b[p + 1] & 0xff) << 8
	\| (b[p + 2] & 0xff) << 16
	\| (b[p + 3] & 0xff) << 24;
	}

	private static short getShort(byte[] b, int p) {
	return (short) ((b[p] & 0xff) \| (b[p + 1] & 0xff) << 8);
	}

	private static byte getByte(byte[] b, int p) {
	return b[p];
	}

	private static void putShort(byte[] b, int p, short val) {
	b[p] = (byte) val;
	b[p + 1] = (byte) (val >>> 8);
	}

	private static void putByte(byte[] b, int p, byte val) {
	b[p] = val;
	}

	private static int hashPosition16(byte[] src, int p) {
	long sequence = getLong(src, p);
	return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
	}

	private static void putPosition(short[] table, int p, byte[] src, int srcOffset) {
	int h = hashPosition16(src, p);
	table[h] = (short) (p - srcOffset);
	}

	private static int replacePosition(short[] table, int p, byte[] src, int srcOffset) {
	int h = hashPosition16(src, p);
	int prev = (table[h] & 0xffff) + srcOffset;
	table[h] = (short) (p - srcOffset);
	return prev;
	}

	private static int matchLength(byte[] src, int pIn, int pMatch, int pInLimit) {
	final int pStart = pIn;

	while (pIn < pInLimit - 7) {
	long diff = getLong(src, pMatch) ^ getLong(src, pIn);
	if (diff != 0) {
	pIn += Long.numberOfTrailingZeros(diff) >>> 3;
	return pIn - pStart;
	}
	pIn += 8;
	pMatch += 8;
	}

	if (pIn < pInLimit - 3 && getInt(src, pMatch) == getInt(src, pIn)) {
	pIn += 4;
	pMatch += 4;
	}
	if (pIn < pInLimit - 1 && getShort(src, pMatch) == getShort(src, pIn)) {
	pIn += 2;
	pMatch += 2;
	}
	if (pIn < pInLimit && getByte(src, pMatch) == getByte(src, pIn)) {
	pIn++;
	}
	return pIn - pStart;
	}

	private static void wildCopy(byte[] src, int srcOffset, byte[] dst, int dstOffset, int dstEnd) {
	do {
	dst[dstOffset] = src[srcOffset];
	dst[dstOffset + 1] = src[srcOffset + 1];
	dst[dstOffset + 2] = src[srcOffset + 2];
	dst[dstOffset + 3] = src[srcOffset + 3];
	dst[dstOffset + 4] = src[srcOffset + 4];
	dst[dstOffset + 5] = src[srcOffset + 5];
	dst[dstOffset + 6] = src[srcOffset + 6];
	dst[dstOffset + 7] = src[srcOffset + 7];
	dstOffset += 8;
	srcOffset += 8;
	} while (dstOffset < dstEnd);
	}

	public static int compress(final byte[] src, final int srcOffset,
	final byte[] dst, final int dstOffset,
	final int inputSize) {
	final int srcEnd = srcOffset + inputSize;
	final int mfLimit = srcEnd - MFLIMIT;
	final int matchLimit = srcEnd - LASTLITERALS;

	int ip = srcOffset;
	int anchor = srcOffset;
	int op = dstOffset;

	if (inputSize >= LZ4_MIN_LENGTH) {
	final short[] table = new short[HASH_SIZE_16];

	// First Byte
	putPosition(table, ip, src, srcOffset);
	ip++;
	int forwardH = hashPosition16(src, ip);

	mainLoop:
	for (; ; ) {
	int match;
	int token;
	{
	int forwardIp = ip;
	int step = 1;
	int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;

	// Find a match
	do {
	int h = forwardH;
	ip = forwardIp;
	forwardIp += step;
	step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);

	if (forwardIp > mfLimit) break mainLoop;

	match = (table[h] & 0xffff) + srcOffset;
	forwardH = hashPosition16(src, forwardIp);
	table[h] = (short) (ip - srcOffset);
	} while (getInt(src, match) != getInt(src, ip));
	}

	// Catch up
	while (ip > anchor && match > srcOffset && getByte(src, ip - 1) == getByte(src, match - 1)) {
	ip--;
	match--;
	}

	{
	// Encode Literal length
	int litLength = ip - anchor;
	token = op++;
	if (litLength >= RUN_MASK) {
	int len = litLength - RUN_MASK;
	putByte(dst, token, (byte) (RUN_MASK << ML_BITS));
	for (; len >= 255; len -= 255) {
	putByte(dst, op++, (byte) 0xff);
	}
	putByte(dst, op++, (byte) len);
	} else {
	putByte(dst, token, (byte) (litLength << ML_BITS));
	}

	// Copy Literals
	wildCopy(src, anchor, dst, op, op + litLength);
	op += litLength;
	}

	for (; ; ) {
	// Encode Offset
	putShort(dst, op, (short) (ip - match));
	op += 2;

	// Encode MatchLength
	{
	int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
	ip += MINMATCH + matchLength;

	if (matchLength >= ML_MASK) {
	putByte(dst, token, (byte) (getByte(dst, token) + ML_MASK));
	matchLength -= ML_MASK;
	for (; matchLength >= 510; matchLength -= 510) {
	putShort(dst, op, (short) 0xffff);
	op += 2;
	}
	if (matchLength >= 255) {
	matchLength -= 255;
	putByte(dst, op++, (byte) 0xff);
	}
	putByte(dst, op++, (byte) matchLength);
	} else {
	putByte(dst, token, (byte) (getByte(dst, token) + matchLength));
	}
	}

	anchor = ip;

	// Test end of chunk
	if (ip > mfLimit) break mainLoop;

	// Fill table
	putPosition(table, ip - 2, src, srcOffset);

	// Test next position
	match = replacePosition(table, ip, src, srcOffset);
	if (match + MAX_DISTANCE >= ip && getInt(src, match) == getInt(src, ip)) {
	token = op++;
	putByte(dst, token, (byte) 0);
	continue;
	}

	// Prepare next loop
	forwardH = hashPosition16(src, ++ip);
	break;
	}
	} // mainLoop
	} // if (inputSize >= LZ4_MIN_LENGTH)

	// Encode Last Literals
	{
	int lastRun = srcEnd - anchor;
	if (lastRun >= RUN_MASK) {
	int accumulator = lastRun - RUN_MASK;
	putByte(dst, op++, (byte) (RUN_MASK << ML_BITS));
	for (; accumulator >= 255; accumulator -= 255) {
	putByte(dst, op++, (byte) 0xff);
	}
	putByte(dst, op++, (byte) accumulator);
	} else {
	putByte(dst, op++, (byte) (lastRun << ML_BITS));
	}
	System.arraycopy(src, anchor, dst, op, lastRun);
	op += lastRun;
	}

	return op - dstOffset;
	}
	}
	package lz4;

	import org.openjdk.jmh.annotations.Benchmark;
	import org.openjdk.jmh.annotations.Scope;
	import org.openjdk.jmh.annotations.Setup;
	import org.openjdk.jmh.annotations.State;

	import java.io.IOException;
	import java.nio.ByteBuffer;
	import java.nio.ByteOrder;
	import java.nio.file.Files;
	import java.nio.file.Paths;

	@State(Scope.Benchmark)
	public class LZ4Bench {

	byte[] in;
	byte[] out;

	@Setup
	public void setup() throws IOException {
	in = Files.readAllBytes(Paths.get("test32k.txt"));
	out = new byte[LZ4Common.compressBound(in.length)];

	if (array() != byteBuffer() \|\| array() != jni() \|\| array() != unsafe()) {
	throw new AssertionError();
	}
	}

	@Benchmark
	public int array() {
	return LZ4Array.compress(in, 0, out, 0, in.length);
	}

	@Benchmark
	public int byteBuffer() {
	ByteBuffer src = ByteBuffer.wrap(in).order(ByteOrder.nativeOrder());
	ByteBuffer dst = ByteBuffer.wrap(out).order(ByteOrder.nativeOrder());
	return LZ4ByteBuffer.compress(src, 0, dst, 0, in.length);
	}

	@Benchmark
	public int jni() {
	return LZ4Native.compress(in, 0, out, 0, in.length);
	}

	@Benchmark
	public int unsafe() {
	return LZ4Unsafe.compress(in, LZ4Unsafe.byteArrayOffset, out, LZ4Unsafe.byteArrayOffset, in.length);
	}
	}
	package lz4;

	public class LZ4Common {
	static final int LZ4_MEMORY_USAGE = 15;
	static final int ACCELERATION = 1;

	static final int MINMATCH = 4;
	static final int COPYLENGTH = 8;
	static final int LASTLITERALS = 5;
	static final int MFLIMIT = COPYLENGTH + MINMATCH;
	static final int LZ4_MIN_LENGTH = MFLIMIT + 1;

	static final int MAXD_LOG = 16;
	static final int MAX_DISTANCE = (1 << MAXD_LOG) - 1;

	static final int ML_BITS = 4;
	static final int ML_MASK = (1 << ML_BITS) - 1;
	static final int RUN_BITS = 8 - ML_BITS;
	static final int RUN_MASK = (1 << RUN_BITS) - 1;

	static final int HASH_SIZE_16 = 1 << (LZ4_MEMORY_USAGE - 1);
	static final int LZ4_SKIP_TRIGGER = 6;

	public static int compressBound(int size) {
	return size + size / 255 + 16;
	}
	}
	package lz4;

	public class LZ4Native {

	public static native int compress(final byte[] src, final int srcOffset,
	final byte[] dst, final int dstOffset,
	final int inputSize);

	static {
	System.loadLibrary("lz4");
	}
	}
	package lz4;

	import sun.misc.Unsafe;

	import java.lang.reflect.Field;

	import static lz4.LZ4Common.*;

	public class LZ4Unsafe {
	static final Unsafe unsafe = getUnsafe();
	static final long byteArrayOffset = unsafe.arrayBaseOffset(byte[].class);

	private static int hashPosition16(Object src, long p) {
	long sequence = unsafe.getLong(src, p);
	return (int) ((sequence * 889523592379L) >>> (41 - LZ4_MEMORY_USAGE)) & (HASH_SIZE_16 - 1);
	}

	private static void putPosition(short[] table, long p, Object src, long srcOffset) {
	int h = hashPosition16(src, p);
	table[h] = (short) (p - srcOffset);
	}

	private static long replacePosition(short[] table, long p, Object src, long srcOffset) {
	int h = hashPosition16(src, p);
	long prev = (table[h] & 0xffffL) + srcOffset;
	table[h] = (short) (p - srcOffset);
	return prev;
	}

	private static int matchLength(Object src, long pIn, long pMatch, long pInLimit) {
	final long pStart = pIn;

	while (pIn < pInLimit - 7) {
	long diff = unsafe.getLong(src, pMatch) ^ unsafe.getLong(src, pIn);
	if (diff != 0) {
	pIn += Long.numberOfTrailingZeros(diff) >>> 3;
	return (int) (pIn - pStart);
	}
	pIn += 8;
	pMatch += 8;
	}

	if (pIn < pInLimit - 3 && unsafe.getInt(src, pMatch) == unsafe.getInt(src, pIn)) {
	pIn += 4;
	pMatch += 4;
	}
	if (pIn < pInLimit - 1 && unsafe.getShort(src, pMatch) == unsafe.getShort(src, pIn)) {
	pIn += 2;
	pMatch += 2;
	}
	if (pIn < pInLimit && unsafe.getByte(src, pMatch) == unsafe.getByte(src, pIn)) {
	pIn++;
	}
	return (int) (pIn - pStart);
	}

	private static void wildCopy(Object src, long srcOffset, Object dst, long dstOffset, long dstEnd) {
	do {
	unsafe.putLong(dst, dstOffset, unsafe.getLong(src, srcOffset));
	dstOffset += 8;
	srcOffset += 8;
	} while (dstOffset < dstEnd);
	}

	public static int compress(final Object src, final long srcOffset,
	final Object dst, final long dstOffset,
	final int inputSize) {
	final long srcEnd = srcOffset + inputSize;
	final long mfLimit = srcEnd - MFLIMIT;
	final long matchLimit = srcEnd - LASTLITERALS;

	long ip = srcOffset;
	long anchor = srcOffset;
	long op = dstOffset;

	if (inputSize >= LZ4_MIN_LENGTH) {
	final short[] table = new short[HASH_SIZE_16];

	// First Byte
	putPosition(table, ip, src, srcOffset);
	ip++;
	int forwardH = hashPosition16(src, ip);

	mainLoop:
	for (; ; ) {
	long match;
	long token;
	{
	long forwardIp = ip;
	int step = 1;
	int searchMatchNb = ACCELERATION << LZ4_SKIP_TRIGGER;

	// Find a match
	do {
	int h = forwardH;
	ip = forwardIp;
	forwardIp += step;
	step = (searchMatchNb++ >>> LZ4_SKIP_TRIGGER);

	if (forwardIp > mfLimit) break mainLoop;

	match = (table[h] & 0xffffL) + srcOffset;
	forwardH = hashPosition16(src, forwardIp);
	table[h] = (short) (ip - srcOffset);
	} while (unsafe.getInt(src, match) != unsafe.getInt(src, ip));
	}

	// Catch up
	while (ip > anchor && match > srcOffset && unsafe.getByte(src, ip - 1) == unsafe.getByte(src, match - 1)) {
	ip--;
	match--;
	}

	{
	// Encode Literal length
	int litLength = (int) (ip - anchor);
	token = op++;
	if (litLength >= RUN_MASK) {
	int len = litLength - RUN_MASK;
	unsafe.putByte(dst, token, (byte) (RUN_MASK << ML_BITS));
	for (; len >= 255; len -= 255) {
	unsafe.putByte(dst, op++, (byte) 0xff);
	}
	unsafe.putByte(dst, op++, (byte) len);
	} else {
	unsafe.putByte(dst, token, (byte) (litLength << ML_BITS));
	}

	// Copy Literals
	wildCopy(src, anchor, dst, op, op + litLength);
	op += litLength;
	}

	for (; ; ) {
	// Encode Offset
	unsafe.putShort(dst, op, (short) (ip - match));
	op += 2;

	// Encode MatchLength
	{
	int matchLength = matchLength(src, ip + MINMATCH, match + MINMATCH, matchLimit);
	ip += MINMATCH + matchLength;

	if (matchLength >= ML_MASK) {
	unsafe.putByte(dst, token, (byte) (unsafe.getByte(dst, token) + ML_MASK));
	matchLength -= ML_MASK;
	for (; matchLength >= 510; matchLength -= 510) {
	unsafe.putShort(dst, op, (short) 0xffff);
	op += 2;
	}
	if (matchLength >= 255) {
	matchLength -= 255;
	unsafe.putByte(dst, op++, (byte) 0xff);
	}
	unsafe.putByte(dst, op++, (byte) matchLength);
	} else {
	unsafe.putByte(dst, token, (byte) (unsafe.getByte(dst, token) + matchLength));
	}
	}

	anchor = ip;

	// Test end of chunk
	if (ip > mfLimit) break mainLoop;

	// Fill table
	putPosition(table, ip - 2, src, srcOffset);

	// Test next position
	match = replacePosition(table, ip, src, srcOffset);
	if (match + MAX_DISTANCE >= ip && unsafe.getInt(src, match) == unsafe.getInt(src, ip)) {
	token = op++;
	unsafe.putByte(dst, token, (byte) 0);
	continue;
	}

	// Prepare next loop
	forwardH = hashPosition16(src, ++ip);
	break;
	}
	} // mainLoop
	} // if (inputSize >= LZ4_MIN_LENGTH)

	// Encode Last Literals
	{
	int lastRun = (int) (srcEnd - anchor);
	if (lastRun >= RUN_MASK) {
	int accumulator = lastRun - RUN_MASK;
	unsafe.putByte(dst, op++, (byte) (RUN_MASK << ML_BITS));
	for (; accumulator >= 255; accumulator -= 255) {
	unsafe.putByte(dst, op++, (byte) 0xff);
	}
	unsafe.putByte(dst, op++, (byte) accumulator);
	} else {
	unsafe.putByte(dst, op++, (byte) (lastRun << ML_BITS));
	}
	unsafe.copyMemory(src, anchor, dst, op, lastRun);
	op += lastRun;
	}

	return (int) (op - dstOffset);
	}

	private static Unsafe getUnsafe() {
	try {
	Field f = Unsafe.class.getDeclaredField("theUnsafe");
	f.setAccessible(true);
	return (Unsafe) f.get(null);
	} catch (Exception e) {
	throw new AssertionError(e);
	}
	}
	}