Skip to content

Instantly share code, notes, and snippets.

@junkdog
Created June 10, 2014 08:55
Show Gist options
  • Save junkdog/f7d91692be21ed01d8ea to your computer and use it in GitHub Desktop.
Save junkdog/f7d91692be21ed01d8ea to your computer and use it in GitHub Desktop.
import org.openjdk.jmh.annotations.*;
import java.lang.reflect.Field;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.FloatBuffer;
import java.util.concurrent.TimeUnit;
import sun.misc.Unsafe;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@State(Scope.Thread)
public class UnsafeBench {
private Matrix4fField fieldA;
private Matrix4fField fieldB;
private Matrix4fField fieldC;
private float[] arrayA;
private float[] arrayB;
private float[] arrayC;
private FloatBuffer bufferA;
private FloatBuffer bufferB;
private FloatBuffer bufferC;
private long unsafeA;
private long unsafeB;
private long unsafeC;
@Setup
public void prepare() {
fieldA = new Matrix4fField();
fieldB = new Matrix4fField();
fieldC = new Matrix4fField();
arrayA = Matrix4fArray.create();
arrayB = Matrix4fArray.create();
arrayC = Matrix4fArray.create();
bufferA = Matrix4fBuffer.create();
bufferB = Matrix4fBuffer.create();
bufferC = Matrix4fBuffer.create();
unsafeA = Matrix4fUnsafe.address(bufferA);
unsafeB = Matrix4fUnsafe.address(bufferB);
unsafeC = Matrix4fUnsafe.address(bufferC);
validate();
}
private void validate() {
Matrix4fField.mul4f(fieldA, fieldB, fieldC);
Matrix4fArray.mul4f(arrayA, arrayB, arrayC);
Matrix4fArray.validate(fieldC, arrayC);
Matrix4fArray.mul4fOptimized(arrayA, arrayB, arrayC);
Matrix4fArray.validate(fieldC, arrayC);
Matrix4fBuffer.mul4f(bufferA, bufferB, bufferC);
Matrix4fBuffer.validate(fieldC, bufferC);
Matrix4fBuffer.mul4fOptimized(bufferA, bufferB, bufferC);
Matrix4fBuffer.validate(fieldC, bufferC);
Matrix4fUnsafe.mul4f(unsafeA, unsafeB, unsafeB);
Matrix4fUnsafe.validate(fieldC, unsafeC);
Matrix4fUnsafe.mul4fOptimized(unsafeA, unsafeB, unsafeB);
Matrix4fUnsafe.validate(fieldC, unsafeC);
}
@GenerateMicroBenchmark
public void field() {
Matrix4fField.mul4f(fieldA, fieldB, fieldC);
}
@GenerateMicroBenchmark
public void array() {
Matrix4fArray.mul4f(arrayA, arrayB, arrayC);
}
@GenerateMicroBenchmark
public void arrayOptimized() {
Matrix4fArray.mul4fOptimized(arrayA, arrayB, arrayC);
}
@GenerateMicroBenchmark
public void buffer() {
Matrix4fBuffer.mul4f(bufferA, bufferB, bufferC);
}
@GenerateMicroBenchmark
public void bufferOptimized() {
Matrix4fBuffer.mul4fOptimized(bufferA, bufferB, bufferC);
}
@GenerateMicroBenchmark
public void unsafe() {
Matrix4fUnsafe.mul4f(unsafeA, unsafeB, unsafeC);
}
@GenerateMicroBenchmark
public void unsafeOptimized() {
Matrix4fUnsafe.mul4fOptimized(unsafeA, unsafeB, unsafeC);
}
public static class Matrix4fField {
public float
m00, m10, m20, m30,
m01, m11, m21, m31,
m02, m12, m22, m32,
m03, m13, m23, m33;
public Matrix4fField() {
m00 = 1.0f;
m10 = 2.0f;
m20 = 3.0f;
m30 = 4.0f;
m01 = 5.0f;
m11 = 6.0f;
m21 = 7.0f;
m31 = 8.0f;
m02 = 9.0f;
m12 = 10.0f;
m22 = 11.0f;
m32 = 12.0f;
m03 = 13.0f;
m13 = 14.0f;
m23 = 15.0f;
m33 = 16.0f;
}
public void set4f(Matrix4fField src) {
this.m00 = src.m00;
this.m10 = src.m10;
this.m20 = src.m20;
this.m30 = src.m30;
this.m01 = src.m01;
this.m11 = src.m11;
this.m21 = src.m21;
this.m31 = src.m31;
this.m02 = src.m02;
this.m12 = src.m12;
this.m22 = src.m22;
this.m32 = src.m32;
this.m03 = src.m03;
this.m13 = src.m13;
this.m23 = src.m23;
this.m33 = src.m33;
}
public static void mul4f(Matrix4fField left, Matrix4fField right, Matrix4fField target) {
float m00 = left.m00 * right.m00 + left.m10 * right.m01 + left.m20 * right.m02 + left.m30 * right.m03;
float m10 = left.m00 * right.m10 + left.m10 * right.m11 + left.m20 * right.m12 + left.m30 * right.m13;
float m20 = left.m00 * right.m20 + left.m10 * right.m21 + left.m20 * right.m22 + left.m30 * right.m23;
float m30 = left.m00 * right.m30 + left.m10 * right.m31 + left.m20 * right.m32 + left.m30 * right.m33;
float m01 = left.m01 * right.m00 + left.m11 * right.m01 + left.m21 * right.m02 + left.m31 * right.m03;
float m11 = left.m01 * right.m10 + left.m11 * right.m11 + left.m21 * right.m12 + left.m31 * right.m13;
float m21 = left.m01 * right.m20 + left.m11 * right.m21 + left.m21 * right.m22 + left.m31 * right.m23;
float m31 = left.m01 * right.m30 + left.m11 * right.m31 + left.m21 * right.m32 + left.m31 * right.m33;
float m02 = left.m02 * right.m00 + left.m12 * right.m01 + left.m22 * right.m02 + left.m32 * right.m03;
float m12 = left.m02 * right.m10 + left.m12 * right.m11 + left.m22 * right.m12 + left.m32 * right.m13;
float m22 = left.m02 * right.m20 + left.m12 * right.m21 + left.m22 * right.m22 + left.m32 * right.m23;
float m32 = left.m02 * right.m30 + left.m12 * right.m31 + left.m22 * right.m32 + left.m32 * right.m33;
float m03 = left.m03 * right.m00 + left.m13 * right.m01 + left.m23 * right.m02 + left.m33 * right.m03;
float m13 = left.m03 * right.m10 + left.m13 * right.m11 + left.m23 * right.m12 + left.m33 * right.m13;
float m23 = left.m03 * right.m20 + left.m13 * right.m21 + left.m23 * right.m22 + left.m33 * right.m23;
float m33 = left.m03 * right.m30 + left.m13 * right.m31 + left.m23 * right.m32 + left.m33 * right.m33;
target.m00 = m00;
target.m10 = m10;
target.m20 = m20;
target.m30 = m30;
target.m01 = m01;
target.m11 = m11;
target.m21 = m21;
target.m31 = m31;
target.m02 = m02;
target.m12 = m12;
target.m22 = m22;
target.m32 = m32;
target.m03 = m03;
target.m13 = m13;
target.m23 = m23;
target.m33 = m33;
}
}
public static class Matrix4fArray {
public static float[] create() {
float[] buffer = new float[16];
buffer[0] = 1.0f;
buffer[1] = 2.0f;
buffer[2] = 3.0f;
buffer[3] = 4.0f;
buffer[4] = 5.0f;
buffer[5] = 6.0f;
buffer[6] = 7.0f;
buffer[7] = 8.0f;
buffer[8] = 9.0f;
buffer[9] = 10.0f;
buffer[10] = 11.0f;
buffer[11] = 12.0f;
buffer[12] = 13.0f;
buffer[13] = 14.0f;
buffer[14] = 15.0f;
buffer[15] = 16.0f;
return buffer;
}
public static void validate(Matrix4fField ref, float[] test) {
if (
test[0] != ref.m00 ||
test[1] != ref.m10 ||
test[2] != ref.m20 ||
test[3] != ref.m30 ||
test[4] != ref.m01 ||
test[5] != ref.m11 ||
test[6] != ref.m21 ||
test[7] != ref.m31 ||
test[8] != ref.m02 ||
test[9] != ref.m12 ||
test[10] != ref.m22 ||
test[11] != ref.m32 ||
test[12] != ref.m03 ||
test[13] != ref.m13 ||
test[14] != ref.m23 ||
test[15] != ref.m33
)
throw new IllegalStateException();
}
public static void mul4f(float[] left, float[] right, float[] target) {
float m00 = left[0] * right[0] + left[1] * right[4] + left[2] * right[8] + left[3] * right[12];
float m10 = left[0] * right[1] + left[1] * right[5] + left[2] * right[9] + left[3] * right[13];
float m20 = left[0] * right[2] + left[1] * right[6] + left[2] * right[10] + left[3] * right[14];
float m30 = left[0] * right[3] + left[1] * right[7] + left[2] * right[11] + left[3] * right[15];
float m01 = left[4] * right[0] + left[5] * right[4] + left[6] * right[8] + left[7] * right[12];
float m11 = left[4] * right[1] + left[5] * right[5] + left[6] * right[9] + left[7] * right[13];
float m21 = left[4] * right[2] + left[5] * right[6] + left[6] * right[10] + left[7] * right[14];
float m31 = left[4] * right[3] + left[5] * right[7] + left[6] * right[11] + left[7] * right[15];
float m02 = left[8] * right[0] + left[9] * right[4] + left[10] * right[8] + left[11] * right[12];
float m12 = left[8] * right[1] + left[9] * right[5] + left[10] * right[9] + left[11] * right[13];
float m22 = left[8] * right[2] + left[9] * right[6] + left[10] * right[10] + left[11] * right[14];
float m32 = left[8] * right[3] + left[9] * right[7] + left[10] * right[11] + left[11] * right[15];
float m03 = left[12] * right[0] + left[13] * right[4] + left[14] * right[8] + left[15] * right[12];
float m13 = left[12] * right[1] + left[13] * right[5] + left[14] * right[9] + left[15] * right[13];
float m23 = left[12] * right[2] + left[13] * right[6] + left[14] * right[10] + left[15] * right[14];
float m33 = left[12] * right[3] + left[13] * right[7] + left[14] * right[11] + left[15] * right[15];
target[0] = m00;
target[1] = m10;
target[2] = m20;
target[3] = m30;
target[4] = m01;
target[5] = m11;
target[6] = m21;
target[7] = m31;
target[8] = m02;
target[9] = m12;
target[10] = m22;
target[11] = m32;
target[12] = m03;
target[13] = m13;
target[14] = m23;
target[15] = m33;
}
public static void mul4fOptimized(float[] left, float[] right, float[] target) {
float
r00 = right[0], r10 = right[1], r20 = right[2], r30 = right[3],
r01 = right[4], r11 = right[5], r21 = right[6], r31 = right[7],
r02 = right[8], r12 = right[9], r22 = right[10], r32 = right[11],
r03 = right[12], r13 = right[13], r23 = right[14], r33 = right[15];
float l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3];
target[0] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03;
target[1] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13;
target[2] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23;
target[3] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33;
l0 = left[4];
l1 = left[5];
l2 = left[6];
l3 = left[7];
target[4] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03;
target[5] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13;
target[6] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23;
target[7] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33;
l0 = left[8];
l1 = left[9];
l2 = left[10];
l3 = left[11];
target[8] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03;
target[9] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13;
target[10] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23;
target[11] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33;
l0 = left[12];
l1 = left[13];
l2 = left[14];
l3 = left[15];
target[12] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03;
target[13] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13;
target[14] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23;
target[15] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33;
}
}
public static class Matrix4fBuffer {
public static FloatBuffer create() {
FloatBuffer buffer = ByteBuffer.allocateDirect(16 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer();
buffer.put(0, 1.0f);
buffer.put(1, 2.0f);
buffer.put(2, 3.0f);
buffer.put(3, 4.0f);
buffer.put(4, 5.0f);
buffer.put(5, 6.0f);
buffer.put(6, 7.0f);
buffer.put(7, 8.0f);
buffer.put(8, 9.0f);
buffer.put(9, 10.0f);
buffer.put(10, 11.0f);
buffer.put(11, 12.0f);
buffer.put(12, 13.0f);
buffer.put(13, 14.0f);
buffer.put(14, 15.0f);
buffer.put(15, 16.0f);
return buffer;
}
public static void validate(Matrix4fField ref, FloatBuffer test) {
if (
test.get(0) != ref.m00 ||
test.get(1) != ref.m10 ||
test.get(2) != ref.m20 ||
test.get(3) != ref.m30 ||
test.get(4) != ref.m01 ||
test.get(5) != ref.m11 ||
test.get(6) != ref.m21 ||
test.get(7) != ref.m31 ||
test.get(8) != ref.m02 ||
test.get(9) != ref.m12 ||
test.get(10) != ref.m22 ||
test.get(11) != ref.m32 ||
test.get(12) != ref.m03 ||
test.get(13) != ref.m13 ||
test.get(14) != ref.m23 ||
test.get(15) != ref.m33
)
throw new IllegalStateException();
}
public static void mul4f(FloatBuffer left, FloatBuffer right, FloatBuffer target) {
float m00 = left.get(0) * right.get(0) + left.get(1) * right.get(4) + left.get(2) * right.get(8) + left.get(3) * right.get(12);
float m10 = left.get(0) * right.get(1) + left.get(1) * right.get(5) + left.get(2) * right.get(9) + left.get(3) * right.get(13);
float m20 = left.get(0) * right.get(2) + left.get(1) * right.get(6) + left.get(2) * right.get(10) + left.get(3) * right.get(14);
float m30 = left.get(0) * right.get(3) + left.get(1) * right.get(7) + left.get(2) * right.get(11) + left.get(3) * right.get(15);
float m01 = left.get(4) * right.get(0) + left.get(5) * right.get(4) + left.get(6) * right.get(8) + left.get(7) * right.get(12);
float m11 = left.get(4) * right.get(1) + left.get(5) * right.get(5) + left.get(6) * right.get(9) + left.get(7) * right.get(13);
float m21 = left.get(4) * right.get(2) + left.get(5) * right.get(6) + left.get(6) * right.get(10) + left.get(7) * right.get(14);
float m31 = left.get(4) * right.get(3) + left.get(5) * right.get(7) + left.get(6) * right.get(11) + left.get(7) * right.get(15);
float m02 = left.get(8) * right.get(0) + left.get(9) * right.get(4) + left.get(10) * right.get(8) + left.get(11) * right.get(12);
float m12 = left.get(8) * right.get(1) + left.get(9) * right.get(5) + left.get(10) * right.get(9) + left.get(11) * right.get(13);
float m22 = left.get(8) * right.get(2) + left.get(9) * right.get(6) + left.get(10) * right.get(10) + left.get(11) * right.get(14);
float m32 = left.get(8) * right.get(3) + left.get(9) * right.get(7) + left.get(10) * right.get(11) + left.get(11) * right.get(15);
float m03 = left.get(12) * right.get(0) + left.get(13) * right.get(4) + left.get(14) * right.get(8) + left.get(15) * right.get(12);
float m13 = left.get(12) * right.get(1) + left.get(13) * right.get(5) + left.get(14) * right.get(9) + left.get(15) * right.get(13);
float m23 = left.get(12) * right.get(2) + left.get(13) * right.get(6) + left.get(14) * right.get(10) + left.get(15) * right.get(14);
float m33 = left.get(12) * right.get(3) + left.get(13) * right.get(7) + left.get(14) * right.get(11) + left.get(15) * right.get(15);
target.put(0, m00);
target.put(1, m10);
target.put(2, m20);
target.put(3, m30);
target.put(4, m01);
target.put(5, m11);
target.put(6, m21);
target.put(7, m31);
target.put(8, m02);
target.put(9, m12);
target.put(10, m22);
target.put(11, m32);
target.put(12, m03);
target.put(13, m13);
target.put(14, m23);
target.put(15, m33);
}
public static void mul4fOptimized(FloatBuffer left, FloatBuffer right, FloatBuffer target) {
float
r00 = right.get(0), r10 = right.get(1), r20 = right.get(2), r30 = right.get(3),
r01 = right.get(4), r11 = right.get(5), r21 = right.get(6), r31 = right.get(7),
r02 = right.get(8), r12 = right.get(9), r22 = right.get(10), r32 = right.get(11),
r03 = right.get(12), r13 = right.get(13), r23 = right.get(14), r33 = right.get(15);
float l0 = left.get(0), l1 = left.get(1), l2 = left.get(2), l3 = left.get(3);
target.put(0, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
target.put(1, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
target.put(2, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
target.put(3, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
l0 = left.get(4);
l1 = left.get(5);
l2 = left.get(6);
l3 = left.get(7);
target.put(4, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
target.put(5, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
target.put(6, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
target.put(7, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
l0 = left.get(8);
l1 = left.get(9);
l2 = left.get(10);
l3 = left.get(11);
target.put(8, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
target.put(9, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
target.put(10, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
target.put(11, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
l0 = left.get(12);
l1 = left.get(13);
l2 = left.get(14);
l3 = left.get(15);
target.put(12, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
target.put(13, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
target.put(14, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
target.put(15, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
}
}
public static class Matrix4fUnsafe {
private static final Unsafe unsafe;
private static final long ADDRESS;
static {
try {
unsafe = getUnsafeInstance();
ADDRESS = unsafe.objectFieldOffset(Buffer.class.getDeclaredField("address"));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public static FloatBuffer create() {
FloatBuffer buffer = ByteBuffer.allocateDirect(16 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer();
buffer.put(0, 1.0f);
buffer.put(1, 2.0f);
buffer.put(2, 3.0f);
buffer.put(3, 4.0f);
buffer.put(4, 5.0f);
buffer.put(5, 6.0f);
buffer.put(6, 7.0f);
buffer.put(7, 8.0f);
buffer.put(8, 9.0f);
buffer.put(9, 10.0f);
buffer.put(10, 11.0f);
buffer.put(11, 12.0f);
buffer.put(12, 13.0f);
buffer.put(13, 14.0f);
buffer.put(14, 15.0f);
buffer.put(15, 16.0f);
return buffer;
}
public static void validate(Matrix4fField ref, long test) {
if (
get(test, 0) != ref.m00 ||
get(test, 1) != ref.m10 ||
get(test, 2) != ref.m20 ||
get(test, 3) != ref.m30 ||
get(test, 4) != ref.m01 ||
get(test, 5) != ref.m11 ||
get(test, 6) != ref.m21 ||
get(test, 7) != ref.m31 ||
get(test, 8) != ref.m02 ||
get(test, 9) != ref.m12 ||
get(test, 10) != ref.m22 ||
get(test, 11) != ref.m32 ||
get(test, 12) != ref.m03 ||
get(test, 13) != ref.m13 ||
get(test, 14) != ref.m23 ||
get(test, 15) != ref.m33
)
throw new IllegalStateException();
}
static long address(FloatBuffer buffer) {
return unsafe.getLong(buffer, ADDRESS);
}
private static float get(long buffer, int index) {
return unsafe.getFloat(buffer + (index * 4));
}
private static void put(long buffer, int index, float value) {
unsafe.putFloat(buffer + (index * 4), value);
}
public static void mul4f(long left, long right, long target) {
float m00 = get(left, 0) * get(right, 0) + get(left, 1) * get(right, 4) + get(left, 2) * get(right, 8) + get(left, 3) * get(right, 12);
float m10 = get(left, 0) * get(right, 1) + get(left, 1) * get(right, 5) + get(left, 2) * get(right, 9) + get(left, 3) * get(right, 13);
float m20 = get(left, 0) * get(right, 2) + get(left, 1) * get(right, 6) + get(left, 2) * get(right, 10) + get(left, 3) * get(right, 14);
float m30 = get(left, 0) * get(right, 3) + get(left, 1) * get(right, 7) + get(left, 2) * get(right, 11) + get(left, 3) * get(right, 15);
float m01 = get(left, 4) * get(right, 0) + get(left, 5) * get(right, 4) + get(left, 6) * get(right, 8) + get(left, 7) * get(right, 12);
float m11 = get(left, 4) * get(right, 1) + get(left, 5) * get(right, 5) + get(left, 6) * get(right, 9) + get(left, 7) * get(right, 13);
float m21 = get(left, 4) * get(right, 2) + get(left, 5) * get(right, 6) + get(left, 6) * get(right, 10) + get(left, 7) * get(right, 14);
float m31 = get(left, 4) * get(right, 3) + get(left, 5) * get(right, 7) + get(left, 6) * get(right, 11) + get(left, 7) * get(right, 15);
float m02 = get(left, 8) * get(right, 0) + get(left, 9) * get(right, 4) + get(left, 10) * get(right, 8) + get(left, 11) * get(right, 12);
float m12 = get(left, 8) * get(right, 1) + get(left, 9) * get(right, 5) + get(left, 10) * get(right, 9) + get(left, 11) * get(right, 13);
float m22 = get(left, 8) * get(right, 2) + get(left, 9) * get(right, 6) + get(left, 10) * get(right, 10) + get(left, 11) * get(right, 14);
float m32 = get(left, 8) * get(right, 3) + get(left, 9) * get(right, 7) + get(left, 10) * get(right, 11) + get(left, 11) * get(right, 15);
float m03 = get(left, 12) * get(right, 0) + get(left, 13) * get(right, 4) + get(left, 14) * get(right, 8) + get(left, 15) * get(right, 12);
float m13 = get(left, 12) * get(right, 1) + get(left, 13) * get(right, 5) + get(left, 14) * get(right, 9) + get(left, 15) * get(right, 13);
float m23 = get(left, 12) * get(right, 2) + get(left, 13) * get(right, 6) + get(left, 14) * get(right, 10) + get(left, 15) * get(right, 14);
float m33 = get(left, 12) * get(right, 3) + get(left, 13) * get(right, 7) + get(left, 14) * get(right, 11) + get(left, 15) * get(right, 15);
put(target, 0, m00);
put(target, 1, m10);
put(target, 2, m20);
put(target, 3, m30);
put(target, 4, m01);
put(target, 5, m11);
put(target, 6, m21);
put(target, 7, m31);
put(target, 8, m02);
put(target, 9, m12);
put(target, 10, m22);
put(target, 11, m32);
put(target, 12, m03);
put(target, 13, m13);
put(target, 14, m23);
put(target, 15, m33);
}
public static void mul4fOptimized(long left, long right, long target) {
float
r00 = get(right, 0), r10 = get(right, 1), r20 = get(right, 3), r30 = get(right, 4),
r01 = get(right, 5), r11 = get(right, 5), r21 = get(right, 6), r31 = get(right, 7),
r02 = get(right, 8), r12 = get(right, 9), r22 = get(right, 10), r32 = get(right, 11),
r03 = get(right, 12), r13 = get(right, 13), r23 = get(right, 14), r33 = get(right, 15);
float l0 = get(left, 0), l1 = get(left, 1), l2 = get(left, 2), l3 = get(left, 3);
put(target, 0, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
put(target, 1, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
put(target, 2, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
put(target, 3, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
l0 = get(left, 4);
l1 = get(left, 5);
l2 = get(left, 6);
l3 = get(left, 7);
put(target, 4, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
put(target, 5, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
put(target, 6, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
put(target, 7, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
l0 = get(left, 8);
l1 = get(left, 9);
l2 = get(left, 10);
l3 = get(left, 11);
put(target, 8, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
put(target, 9, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
put(target, 10, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
put(target, 11, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
l0 = get(left, 12);
l1 = get(left, 13);
l2 = get(left, 14);
l3 = get(left, 15);
put(target, 12, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03);
put(target, 13, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13);
put(target, 14, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23);
put(target, 15, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33);
}
}
private static Unsafe getUnsafeInstance() throws NoSuchFieldException, IllegalAccessException {
Field field = getDeclaredFieldRecursive(Unsafe.class, "theUnsafe");
field.setAccessible(true);
return (Unsafe)field.get(null);
}
private static Field getDeclaredFieldRecursive(Class<?> leaf, final String fieldName) throws NoSuchFieldException {
Class type = leaf;
while ( type != null ) {
try {
return type.getDeclaredField(fieldName);
} catch (NoSuchFieldException e) {
type = type.getSuperclass();
}
}
throw new NoSuchFieldException(fieldName + " does not exist in " + leaf.getSimpleName() + " or any of its superclasses.");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment