Created
June 10, 2014 08:55
-
-
Save junkdog/f7d91692be21ed01d8ea to your computer and use it in GitHub Desktop.
Taken from Spasi + http://www.java-gaming.org/topics/jep-for-making-unsafe-a-public-api/32964/msg/310519/view.html#msg310519
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.openjdk.jmh.annotations.*; | |
import java.lang.reflect.Field; | |
import java.nio.Buffer; | |
import java.nio.ByteBuffer; | |
import java.nio.ByteOrder; | |
import java.nio.FloatBuffer; | |
import java.util.concurrent.TimeUnit; | |
import sun.misc.Unsafe; | |
@BenchmarkMode(Mode.AverageTime) | |
@OutputTimeUnit(TimeUnit.NANOSECONDS) | |
@State(Scope.Thread) | |
public class UnsafeBench { | |
private Matrix4fField fieldA; | |
private Matrix4fField fieldB; | |
private Matrix4fField fieldC; | |
private float[] arrayA; | |
private float[] arrayB; | |
private float[] arrayC; | |
private FloatBuffer bufferA; | |
private FloatBuffer bufferB; | |
private FloatBuffer bufferC; | |
private long unsafeA; | |
private long unsafeB; | |
private long unsafeC; | |
@Setup | |
public void prepare() { | |
fieldA = new Matrix4fField(); | |
fieldB = new Matrix4fField(); | |
fieldC = new Matrix4fField(); | |
arrayA = Matrix4fArray.create(); | |
arrayB = Matrix4fArray.create(); | |
arrayC = Matrix4fArray.create(); | |
bufferA = Matrix4fBuffer.create(); | |
bufferB = Matrix4fBuffer.create(); | |
bufferC = Matrix4fBuffer.create(); | |
unsafeA = Matrix4fUnsafe.address(bufferA); | |
unsafeB = Matrix4fUnsafe.address(bufferB); | |
unsafeC = Matrix4fUnsafe.address(bufferC); | |
validate(); | |
} | |
private void validate() { | |
Matrix4fField.mul4f(fieldA, fieldB, fieldC); | |
Matrix4fArray.mul4f(arrayA, arrayB, arrayC); | |
Matrix4fArray.validate(fieldC, arrayC); | |
Matrix4fArray.mul4fOptimized(arrayA, arrayB, arrayC); | |
Matrix4fArray.validate(fieldC, arrayC); | |
Matrix4fBuffer.mul4f(bufferA, bufferB, bufferC); | |
Matrix4fBuffer.validate(fieldC, bufferC); | |
Matrix4fBuffer.mul4fOptimized(bufferA, bufferB, bufferC); | |
Matrix4fBuffer.validate(fieldC, bufferC); | |
Matrix4fUnsafe.mul4f(unsafeA, unsafeB, unsafeB); | |
Matrix4fUnsafe.validate(fieldC, unsafeC); | |
Matrix4fUnsafe.mul4fOptimized(unsafeA, unsafeB, unsafeB); | |
Matrix4fUnsafe.validate(fieldC, unsafeC); | |
} | |
@GenerateMicroBenchmark | |
public void field() { | |
Matrix4fField.mul4f(fieldA, fieldB, fieldC); | |
} | |
@GenerateMicroBenchmark | |
public void array() { | |
Matrix4fArray.mul4f(arrayA, arrayB, arrayC); | |
} | |
@GenerateMicroBenchmark | |
public void arrayOptimized() { | |
Matrix4fArray.mul4fOptimized(arrayA, arrayB, arrayC); | |
} | |
@GenerateMicroBenchmark | |
public void buffer() { | |
Matrix4fBuffer.mul4f(bufferA, bufferB, bufferC); | |
} | |
@GenerateMicroBenchmark | |
public void bufferOptimized() { | |
Matrix4fBuffer.mul4fOptimized(bufferA, bufferB, bufferC); | |
} | |
@GenerateMicroBenchmark | |
public void unsafe() { | |
Matrix4fUnsafe.mul4f(unsafeA, unsafeB, unsafeC); | |
} | |
@GenerateMicroBenchmark | |
public void unsafeOptimized() { | |
Matrix4fUnsafe.mul4fOptimized(unsafeA, unsafeB, unsafeC); | |
} | |
public static class Matrix4fField { | |
public float | |
m00, m10, m20, m30, | |
m01, m11, m21, m31, | |
m02, m12, m22, m32, | |
m03, m13, m23, m33; | |
public Matrix4fField() { | |
m00 = 1.0f; | |
m10 = 2.0f; | |
m20 = 3.0f; | |
m30 = 4.0f; | |
m01 = 5.0f; | |
m11 = 6.0f; | |
m21 = 7.0f; | |
m31 = 8.0f; | |
m02 = 9.0f; | |
m12 = 10.0f; | |
m22 = 11.0f; | |
m32 = 12.0f; | |
m03 = 13.0f; | |
m13 = 14.0f; | |
m23 = 15.0f; | |
m33 = 16.0f; | |
} | |
public void set4f(Matrix4fField src) { | |
this.m00 = src.m00; | |
this.m10 = src.m10; | |
this.m20 = src.m20; | |
this.m30 = src.m30; | |
this.m01 = src.m01; | |
this.m11 = src.m11; | |
this.m21 = src.m21; | |
this.m31 = src.m31; | |
this.m02 = src.m02; | |
this.m12 = src.m12; | |
this.m22 = src.m22; | |
this.m32 = src.m32; | |
this.m03 = src.m03; | |
this.m13 = src.m13; | |
this.m23 = src.m23; | |
this.m33 = src.m33; | |
} | |
public static void mul4f(Matrix4fField left, Matrix4fField right, Matrix4fField target) { | |
float m00 = left.m00 * right.m00 + left.m10 * right.m01 + left.m20 * right.m02 + left.m30 * right.m03; | |
float m10 = left.m00 * right.m10 + left.m10 * right.m11 + left.m20 * right.m12 + left.m30 * right.m13; | |
float m20 = left.m00 * right.m20 + left.m10 * right.m21 + left.m20 * right.m22 + left.m30 * right.m23; | |
float m30 = left.m00 * right.m30 + left.m10 * right.m31 + left.m20 * right.m32 + left.m30 * right.m33; | |
float m01 = left.m01 * right.m00 + left.m11 * right.m01 + left.m21 * right.m02 + left.m31 * right.m03; | |
float m11 = left.m01 * right.m10 + left.m11 * right.m11 + left.m21 * right.m12 + left.m31 * right.m13; | |
float m21 = left.m01 * right.m20 + left.m11 * right.m21 + left.m21 * right.m22 + left.m31 * right.m23; | |
float m31 = left.m01 * right.m30 + left.m11 * right.m31 + left.m21 * right.m32 + left.m31 * right.m33; | |
float m02 = left.m02 * right.m00 + left.m12 * right.m01 + left.m22 * right.m02 + left.m32 * right.m03; | |
float m12 = left.m02 * right.m10 + left.m12 * right.m11 + left.m22 * right.m12 + left.m32 * right.m13; | |
float m22 = left.m02 * right.m20 + left.m12 * right.m21 + left.m22 * right.m22 + left.m32 * right.m23; | |
float m32 = left.m02 * right.m30 + left.m12 * right.m31 + left.m22 * right.m32 + left.m32 * right.m33; | |
float m03 = left.m03 * right.m00 + left.m13 * right.m01 + left.m23 * right.m02 + left.m33 * right.m03; | |
float m13 = left.m03 * right.m10 + left.m13 * right.m11 + left.m23 * right.m12 + left.m33 * right.m13; | |
float m23 = left.m03 * right.m20 + left.m13 * right.m21 + left.m23 * right.m22 + left.m33 * right.m23; | |
float m33 = left.m03 * right.m30 + left.m13 * right.m31 + left.m23 * right.m32 + left.m33 * right.m33; | |
target.m00 = m00; | |
target.m10 = m10; | |
target.m20 = m20; | |
target.m30 = m30; | |
target.m01 = m01; | |
target.m11 = m11; | |
target.m21 = m21; | |
target.m31 = m31; | |
target.m02 = m02; | |
target.m12 = m12; | |
target.m22 = m22; | |
target.m32 = m32; | |
target.m03 = m03; | |
target.m13 = m13; | |
target.m23 = m23; | |
target.m33 = m33; | |
} | |
} | |
public static class Matrix4fArray { | |
public static float[] create() { | |
float[] buffer = new float[16]; | |
buffer[0] = 1.0f; | |
buffer[1] = 2.0f; | |
buffer[2] = 3.0f; | |
buffer[3] = 4.0f; | |
buffer[4] = 5.0f; | |
buffer[5] = 6.0f; | |
buffer[6] = 7.0f; | |
buffer[7] = 8.0f; | |
buffer[8] = 9.0f; | |
buffer[9] = 10.0f; | |
buffer[10] = 11.0f; | |
buffer[11] = 12.0f; | |
buffer[12] = 13.0f; | |
buffer[13] = 14.0f; | |
buffer[14] = 15.0f; | |
buffer[15] = 16.0f; | |
return buffer; | |
} | |
public static void validate(Matrix4fField ref, float[] test) { | |
if ( | |
test[0] != ref.m00 || | |
test[1] != ref.m10 || | |
test[2] != ref.m20 || | |
test[3] != ref.m30 || | |
test[4] != ref.m01 || | |
test[5] != ref.m11 || | |
test[6] != ref.m21 || | |
test[7] != ref.m31 || | |
test[8] != ref.m02 || | |
test[9] != ref.m12 || | |
test[10] != ref.m22 || | |
test[11] != ref.m32 || | |
test[12] != ref.m03 || | |
test[13] != ref.m13 || | |
test[14] != ref.m23 || | |
test[15] != ref.m33 | |
) | |
throw new IllegalStateException(); | |
} | |
public static void mul4f(float[] left, float[] right, float[] target) { | |
float m00 = left[0] * right[0] + left[1] * right[4] + left[2] * right[8] + left[3] * right[12]; | |
float m10 = left[0] * right[1] + left[1] * right[5] + left[2] * right[9] + left[3] * right[13]; | |
float m20 = left[0] * right[2] + left[1] * right[6] + left[2] * right[10] + left[3] * right[14]; | |
float m30 = left[0] * right[3] + left[1] * right[7] + left[2] * right[11] + left[3] * right[15]; | |
float m01 = left[4] * right[0] + left[5] * right[4] + left[6] * right[8] + left[7] * right[12]; | |
float m11 = left[4] * right[1] + left[5] * right[5] + left[6] * right[9] + left[7] * right[13]; | |
float m21 = left[4] * right[2] + left[5] * right[6] + left[6] * right[10] + left[7] * right[14]; | |
float m31 = left[4] * right[3] + left[5] * right[7] + left[6] * right[11] + left[7] * right[15]; | |
float m02 = left[8] * right[0] + left[9] * right[4] + left[10] * right[8] + left[11] * right[12]; | |
float m12 = left[8] * right[1] + left[9] * right[5] + left[10] * right[9] + left[11] * right[13]; | |
float m22 = left[8] * right[2] + left[9] * right[6] + left[10] * right[10] + left[11] * right[14]; | |
float m32 = left[8] * right[3] + left[9] * right[7] + left[10] * right[11] + left[11] * right[15]; | |
float m03 = left[12] * right[0] + left[13] * right[4] + left[14] * right[8] + left[15] * right[12]; | |
float m13 = left[12] * right[1] + left[13] * right[5] + left[14] * right[9] + left[15] * right[13]; | |
float m23 = left[12] * right[2] + left[13] * right[6] + left[14] * right[10] + left[15] * right[14]; | |
float m33 = left[12] * right[3] + left[13] * right[7] + left[14] * right[11] + left[15] * right[15]; | |
target[0] = m00; | |
target[1] = m10; | |
target[2] = m20; | |
target[3] = m30; | |
target[4] = m01; | |
target[5] = m11; | |
target[6] = m21; | |
target[7] = m31; | |
target[8] = m02; | |
target[9] = m12; | |
target[10] = m22; | |
target[11] = m32; | |
target[12] = m03; | |
target[13] = m13; | |
target[14] = m23; | |
target[15] = m33; | |
} | |
public static void mul4fOptimized(float[] left, float[] right, float[] target) { | |
float | |
r00 = right[0], r10 = right[1], r20 = right[2], r30 = right[3], | |
r01 = right[4], r11 = right[5], r21 = right[6], r31 = right[7], | |
r02 = right[8], r12 = right[9], r22 = right[10], r32 = right[11], | |
r03 = right[12], r13 = right[13], r23 = right[14], r33 = right[15]; | |
float l0 = left[0], l1 = left[1], l2 = left[2], l3 = left[3]; | |
target[0] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03; | |
target[1] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13; | |
target[2] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23; | |
target[3] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33; | |
l0 = left[4]; | |
l1 = left[5]; | |
l2 = left[6]; | |
l3 = left[7]; | |
target[4] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03; | |
target[5] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13; | |
target[6] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23; | |
target[7] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33; | |
l0 = left[8]; | |
l1 = left[9]; | |
l2 = left[10]; | |
l3 = left[11]; | |
target[8] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03; | |
target[9] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13; | |
target[10] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23; | |
target[11] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33; | |
l0 = left[12]; | |
l1 = left[13]; | |
l2 = left[14]; | |
l3 = left[15]; | |
target[12] = l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03; | |
target[13] = l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13; | |
target[14] = l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23; | |
target[15] = l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33; | |
} | |
} | |
public static class Matrix4fBuffer { | |
public static FloatBuffer create() { | |
FloatBuffer buffer = ByteBuffer.allocateDirect(16 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer(); | |
buffer.put(0, 1.0f); | |
buffer.put(1, 2.0f); | |
buffer.put(2, 3.0f); | |
buffer.put(3, 4.0f); | |
buffer.put(4, 5.0f); | |
buffer.put(5, 6.0f); | |
buffer.put(6, 7.0f); | |
buffer.put(7, 8.0f); | |
buffer.put(8, 9.0f); | |
buffer.put(9, 10.0f); | |
buffer.put(10, 11.0f); | |
buffer.put(11, 12.0f); | |
buffer.put(12, 13.0f); | |
buffer.put(13, 14.0f); | |
buffer.put(14, 15.0f); | |
buffer.put(15, 16.0f); | |
return buffer; | |
} | |
public static void validate(Matrix4fField ref, FloatBuffer test) { | |
if ( | |
test.get(0) != ref.m00 || | |
test.get(1) != ref.m10 || | |
test.get(2) != ref.m20 || | |
test.get(3) != ref.m30 || | |
test.get(4) != ref.m01 || | |
test.get(5) != ref.m11 || | |
test.get(6) != ref.m21 || | |
test.get(7) != ref.m31 || | |
test.get(8) != ref.m02 || | |
test.get(9) != ref.m12 || | |
test.get(10) != ref.m22 || | |
test.get(11) != ref.m32 || | |
test.get(12) != ref.m03 || | |
test.get(13) != ref.m13 || | |
test.get(14) != ref.m23 || | |
test.get(15) != ref.m33 | |
) | |
throw new IllegalStateException(); | |
} | |
public static void mul4f(FloatBuffer left, FloatBuffer right, FloatBuffer target) { | |
float m00 = left.get(0) * right.get(0) + left.get(1) * right.get(4) + left.get(2) * right.get(8) + left.get(3) * right.get(12); | |
float m10 = left.get(0) * right.get(1) + left.get(1) * right.get(5) + left.get(2) * right.get(9) + left.get(3) * right.get(13); | |
float m20 = left.get(0) * right.get(2) + left.get(1) * right.get(6) + left.get(2) * right.get(10) + left.get(3) * right.get(14); | |
float m30 = left.get(0) * right.get(3) + left.get(1) * right.get(7) + left.get(2) * right.get(11) + left.get(3) * right.get(15); | |
float m01 = left.get(4) * right.get(0) + left.get(5) * right.get(4) + left.get(6) * right.get(8) + left.get(7) * right.get(12); | |
float m11 = left.get(4) * right.get(1) + left.get(5) * right.get(5) + left.get(6) * right.get(9) + left.get(7) * right.get(13); | |
float m21 = left.get(4) * right.get(2) + left.get(5) * right.get(6) + left.get(6) * right.get(10) + left.get(7) * right.get(14); | |
float m31 = left.get(4) * right.get(3) + left.get(5) * right.get(7) + left.get(6) * right.get(11) + left.get(7) * right.get(15); | |
float m02 = left.get(8) * right.get(0) + left.get(9) * right.get(4) + left.get(10) * right.get(8) + left.get(11) * right.get(12); | |
float m12 = left.get(8) * right.get(1) + left.get(9) * right.get(5) + left.get(10) * right.get(9) + left.get(11) * right.get(13); | |
float m22 = left.get(8) * right.get(2) + left.get(9) * right.get(6) + left.get(10) * right.get(10) + left.get(11) * right.get(14); | |
float m32 = left.get(8) * right.get(3) + left.get(9) * right.get(7) + left.get(10) * right.get(11) + left.get(11) * right.get(15); | |
float m03 = left.get(12) * right.get(0) + left.get(13) * right.get(4) + left.get(14) * right.get(8) + left.get(15) * right.get(12); | |
float m13 = left.get(12) * right.get(1) + left.get(13) * right.get(5) + left.get(14) * right.get(9) + left.get(15) * right.get(13); | |
float m23 = left.get(12) * right.get(2) + left.get(13) * right.get(6) + left.get(14) * right.get(10) + left.get(15) * right.get(14); | |
float m33 = left.get(12) * right.get(3) + left.get(13) * right.get(7) + left.get(14) * right.get(11) + left.get(15) * right.get(15); | |
target.put(0, m00); | |
target.put(1, m10); | |
target.put(2, m20); | |
target.put(3, m30); | |
target.put(4, m01); | |
target.put(5, m11); | |
target.put(6, m21); | |
target.put(7, m31); | |
target.put(8, m02); | |
target.put(9, m12); | |
target.put(10, m22); | |
target.put(11, m32); | |
target.put(12, m03); | |
target.put(13, m13); | |
target.put(14, m23); | |
target.put(15, m33); | |
} | |
public static void mul4fOptimized(FloatBuffer left, FloatBuffer right, FloatBuffer target) { | |
float | |
r00 = right.get(0), r10 = right.get(1), r20 = right.get(2), r30 = right.get(3), | |
r01 = right.get(4), r11 = right.get(5), r21 = right.get(6), r31 = right.get(7), | |
r02 = right.get(8), r12 = right.get(9), r22 = right.get(10), r32 = right.get(11), | |
r03 = right.get(12), r13 = right.get(13), r23 = right.get(14), r33 = right.get(15); | |
float l0 = left.get(0), l1 = left.get(1), l2 = left.get(2), l3 = left.get(3); | |
target.put(0, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
target.put(1, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
target.put(2, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
target.put(3, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
l0 = left.get(4); | |
l1 = left.get(5); | |
l2 = left.get(6); | |
l3 = left.get(7); | |
target.put(4, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
target.put(5, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
target.put(6, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
target.put(7, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
l0 = left.get(8); | |
l1 = left.get(9); | |
l2 = left.get(10); | |
l3 = left.get(11); | |
target.put(8, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
target.put(9, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
target.put(10, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
target.put(11, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
l0 = left.get(12); | |
l1 = left.get(13); | |
l2 = left.get(14); | |
l3 = left.get(15); | |
target.put(12, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
target.put(13, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
target.put(14, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
target.put(15, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
} | |
} | |
public static class Matrix4fUnsafe { | |
private static final Unsafe unsafe; | |
private static final long ADDRESS; | |
static { | |
try { | |
unsafe = getUnsafeInstance(); | |
ADDRESS = unsafe.objectFieldOffset(Buffer.class.getDeclaredField("address")); | |
} catch (Exception e) { | |
throw new RuntimeException(e); | |
} | |
} | |
public static FloatBuffer create() { | |
FloatBuffer buffer = ByteBuffer.allocateDirect(16 * 4).order(ByteOrder.nativeOrder()).asFloatBuffer(); | |
buffer.put(0, 1.0f); | |
buffer.put(1, 2.0f); | |
buffer.put(2, 3.0f); | |
buffer.put(3, 4.0f); | |
buffer.put(4, 5.0f); | |
buffer.put(5, 6.0f); | |
buffer.put(6, 7.0f); | |
buffer.put(7, 8.0f); | |
buffer.put(8, 9.0f); | |
buffer.put(9, 10.0f); | |
buffer.put(10, 11.0f); | |
buffer.put(11, 12.0f); | |
buffer.put(12, 13.0f); | |
buffer.put(13, 14.0f); | |
buffer.put(14, 15.0f); | |
buffer.put(15, 16.0f); | |
return buffer; | |
} | |
public static void validate(Matrix4fField ref, long test) { | |
if ( | |
get(test, 0) != ref.m00 || | |
get(test, 1) != ref.m10 || | |
get(test, 2) != ref.m20 || | |
get(test, 3) != ref.m30 || | |
get(test, 4) != ref.m01 || | |
get(test, 5) != ref.m11 || | |
get(test, 6) != ref.m21 || | |
get(test, 7) != ref.m31 || | |
get(test, 8) != ref.m02 || | |
get(test, 9) != ref.m12 || | |
get(test, 10) != ref.m22 || | |
get(test, 11) != ref.m32 || | |
get(test, 12) != ref.m03 || | |
get(test, 13) != ref.m13 || | |
get(test, 14) != ref.m23 || | |
get(test, 15) != ref.m33 | |
) | |
throw new IllegalStateException(); | |
} | |
static long address(FloatBuffer buffer) { | |
return unsafe.getLong(buffer, ADDRESS); | |
} | |
private static float get(long buffer, int index) { | |
return unsafe.getFloat(buffer + (index * 4)); | |
} | |
private static void put(long buffer, int index, float value) { | |
unsafe.putFloat(buffer + (index * 4), value); | |
} | |
public static void mul4f(long left, long right, long target) { | |
float m00 = get(left, 0) * get(right, 0) + get(left, 1) * get(right, 4) + get(left, 2) * get(right, 8) + get(left, 3) * get(right, 12); | |
float m10 = get(left, 0) * get(right, 1) + get(left, 1) * get(right, 5) + get(left, 2) * get(right, 9) + get(left, 3) * get(right, 13); | |
float m20 = get(left, 0) * get(right, 2) + get(left, 1) * get(right, 6) + get(left, 2) * get(right, 10) + get(left, 3) * get(right, 14); | |
float m30 = get(left, 0) * get(right, 3) + get(left, 1) * get(right, 7) + get(left, 2) * get(right, 11) + get(left, 3) * get(right, 15); | |
float m01 = get(left, 4) * get(right, 0) + get(left, 5) * get(right, 4) + get(left, 6) * get(right, 8) + get(left, 7) * get(right, 12); | |
float m11 = get(left, 4) * get(right, 1) + get(left, 5) * get(right, 5) + get(left, 6) * get(right, 9) + get(left, 7) * get(right, 13); | |
float m21 = get(left, 4) * get(right, 2) + get(left, 5) * get(right, 6) + get(left, 6) * get(right, 10) + get(left, 7) * get(right, 14); | |
float m31 = get(left, 4) * get(right, 3) + get(left, 5) * get(right, 7) + get(left, 6) * get(right, 11) + get(left, 7) * get(right, 15); | |
float m02 = get(left, 8) * get(right, 0) + get(left, 9) * get(right, 4) + get(left, 10) * get(right, 8) + get(left, 11) * get(right, 12); | |
float m12 = get(left, 8) * get(right, 1) + get(left, 9) * get(right, 5) + get(left, 10) * get(right, 9) + get(left, 11) * get(right, 13); | |
float m22 = get(left, 8) * get(right, 2) + get(left, 9) * get(right, 6) + get(left, 10) * get(right, 10) + get(left, 11) * get(right, 14); | |
float m32 = get(left, 8) * get(right, 3) + get(left, 9) * get(right, 7) + get(left, 10) * get(right, 11) + get(left, 11) * get(right, 15); | |
float m03 = get(left, 12) * get(right, 0) + get(left, 13) * get(right, 4) + get(left, 14) * get(right, 8) + get(left, 15) * get(right, 12); | |
float m13 = get(left, 12) * get(right, 1) + get(left, 13) * get(right, 5) + get(left, 14) * get(right, 9) + get(left, 15) * get(right, 13); | |
float m23 = get(left, 12) * get(right, 2) + get(left, 13) * get(right, 6) + get(left, 14) * get(right, 10) + get(left, 15) * get(right, 14); | |
float m33 = get(left, 12) * get(right, 3) + get(left, 13) * get(right, 7) + get(left, 14) * get(right, 11) + get(left, 15) * get(right, 15); | |
put(target, 0, m00); | |
put(target, 1, m10); | |
put(target, 2, m20); | |
put(target, 3, m30); | |
put(target, 4, m01); | |
put(target, 5, m11); | |
put(target, 6, m21); | |
put(target, 7, m31); | |
put(target, 8, m02); | |
put(target, 9, m12); | |
put(target, 10, m22); | |
put(target, 11, m32); | |
put(target, 12, m03); | |
put(target, 13, m13); | |
put(target, 14, m23); | |
put(target, 15, m33); | |
} | |
public static void mul4fOptimized(long left, long right, long target) { | |
float | |
r00 = get(right, 0), r10 = get(right, 1), r20 = get(right, 3), r30 = get(right, 4), | |
r01 = get(right, 5), r11 = get(right, 5), r21 = get(right, 6), r31 = get(right, 7), | |
r02 = get(right, 8), r12 = get(right, 9), r22 = get(right, 10), r32 = get(right, 11), | |
r03 = get(right, 12), r13 = get(right, 13), r23 = get(right, 14), r33 = get(right, 15); | |
float l0 = get(left, 0), l1 = get(left, 1), l2 = get(left, 2), l3 = get(left, 3); | |
put(target, 0, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
put(target, 1, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
put(target, 2, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
put(target, 3, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
l0 = get(left, 4); | |
l1 = get(left, 5); | |
l2 = get(left, 6); | |
l3 = get(left, 7); | |
put(target, 4, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
put(target, 5, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
put(target, 6, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
put(target, 7, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
l0 = get(left, 8); | |
l1 = get(left, 9); | |
l2 = get(left, 10); | |
l3 = get(left, 11); | |
put(target, 8, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
put(target, 9, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
put(target, 10, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
put(target, 11, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
l0 = get(left, 12); | |
l1 = get(left, 13); | |
l2 = get(left, 14); | |
l3 = get(left, 15); | |
put(target, 12, l0 * r00 + l1 * r01 + l2 * r02 + l3 * r03); | |
put(target, 13, l0 * r10 + l1 * r11 + l2 * r12 + l3 * r13); | |
put(target, 14, l0 * r20 + l1 * r21 + l2 * r22 + l3 * r23); | |
put(target, 15, l0 * r30 + l1 * r31 + l2 * r32 + l3 * r33); | |
} | |
} | |
private static Unsafe getUnsafeInstance() throws NoSuchFieldException, IllegalAccessException { | |
Field field = getDeclaredFieldRecursive(Unsafe.class, "theUnsafe"); | |
field.setAccessible(true); | |
return (Unsafe)field.get(null); | |
} | |
private static Field getDeclaredFieldRecursive(Class<?> leaf, final String fieldName) throws NoSuchFieldException { | |
Class type = leaf; | |
while ( type != null ) { | |
try { | |
return type.getDeclaredField(fieldName); | |
} catch (NoSuchFieldException e) { | |
type = type.getSuperclass(); | |
} | |
} | |
throw new NoSuchFieldException(fieldName + " does not exist in " + leaf.getSimpleName() + " or any of its superclasses."); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment