Skip to content

Instantly share code, notes, and snippets.

@rygorous
Created July 6, 2017 21:40
Embed
What would you like to do?
Row-major vs. column-major mat mult.
#version 430 core
in vec4 pos;
out vec4 pos_out;
buffer mats {
mat4x4 Matrices[];
};
void main()
{
pos_out = Matrices[0] * pos;
}
; -------- Disassembly --------------------
00 CALL_FS
01 ALU: ADDR(32) CNT(8)
0 x: MOV R2.x, 0.0f
y: MOV R0.y, (0x0000000C, 1.681558157e-44f).x
z: MOV R0.z, (0x00000008, 1.121038771e-44f).y
w: MOV R2.w, 1.0f
1 x: MOV R0.x, (0x00000004, 5.605193857e-45f).x
w: MOV R0.w, 0.0f
02 TEX: ADDR(64) CNT(4)
2 VFETCH R3, R0.y, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
3 VFETCH R4, R0.z, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
4 VFETCH R5, R0.x, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
5 VFETCH R0, R0.w, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
03 EXP_DONE: POS0, R2.xxxw
04 ALU: ADDR(40) CNT(16)
6 x: MUL ____, R1.w, R3.y
y: MUL ____, R1.w, R3.w
z: MUL ____, R1.w, R3.z
w: MUL ____, R1.w, R3.x
7 x: MULADD ____, R1.z, R4.y, PV6.x
y: MULADD ____, R1.z, R4.w, PV6.y
z: MULADD ____, R1.z, R4.z, PV6.z
w: MULADD ____, R1.z, R4.x, PV6.w
8 x: MULADD ____, R1.y, R5.y, PV7.x
y: MULADD ____, R1.y, R5.w, PV7.y
z: MULADD ____, R1.y, R5.z, PV7.z
w: MULADD ____, R1.y, R5.x, PV7.w
9 x: MULADD R0.x, R1.x, R0.x, PV8.w
y: MULADD R0.y, R1.x, R0.y, PV8.x
z: MULADD R0.z, R1.x, R0.z, PV8.z
w: MULADD R0.w, R1.x, R0.w, PV8.y
05 EXP_DONE: PARAM0, R0
06 ALU: ADDR(56) CNT(1)
10 x: NOP ____
07 NOP NO_BARRIER
08 END
END_OF_PROGRAM
#version 430 core
in vec4 pos;
out vec4 pos_out;
buffer mats {
mat4x4 Matrices[];
};
void main()
{
pos_out = pos * Matrices[0];
}
; -------- Disassembly --------------------
00 CALL_FS
01 ALU: ADDR(32) CNT(8)
0 x: MOV R0.x, 0.0f
y: MOV R0.y, 0.0f
z: MOV R0.z, (0x00000004, 5.605193857e-45f).x
w: MOV R0.w, 1.0f
1 y: MOV R2.y, (0x0000000C, 1.681558157e-44f).x
z: MOV R2.z, (0x00000008, 1.121038771e-44f).y
02 TEX: ADDR(64) CNT(4)
2 VFETCH R3, R0.y, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
3 VFETCH R4, R0.z, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
4 VFETCH R5, R2.z, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
5 VFETCH R2, R2.y, fc150 FORMAT(32_32_32_32_FLOAT)
FETCH_TYPE(NO_INDEX_OFFSET)
03 EXP_DONE: POS0, R0.xxxw
04 ALU: ADDR(40) CNT(16)
6 x: DOT4_e R3.x, R1.x, R3.x
y: DOT4_e ____, R1.y, R3.y
z: DOT4_e ____, R1.z, R3.z
w: DOT4_e ____, R1.w, R3.w
7 x: DOT4_e ____, R1.x, R4.x
y: DOT4_e R3.y, R1.y, R4.y
z: DOT4_e ____, R1.z, R4.z
w: DOT4_e ____, R1.w, R4.w
8 x: DOT4_e ____, R1.x, R5.x
y: DOT4_e ____, R1.y, R5.y
z: DOT4_e R3.z, R1.z, R5.z
w: DOT4_e ____, R1.w, R5.w
9 x: DOT4_e ____, R1.x, R2.x
y: DOT4_e ____, R1.y, R2.y
z: DOT4_e ____, R1.z, R2.z
w: DOT4_e R3.w, R1.w, R2.w
05 EXP_DONE: PARAM0, R3
06 ALU: ADDR(56) CNT(1)
10 x: NOP ____
07 NOP NO_BARRIER
08 END
END_OF_PROGRAM
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment