AMD TeraScale VLIW code for an int-heavy compute shader (excerpt)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 267 WAIT_ACK: Outstanding_acks <= 0 | |
| 268 ALU_POP_AFTER: ADDR(3659) CNT(92) | |
| 1177 x: ADD_INT ____, R0.x, 96 | |
| y: ADD_INT ____, R0.x, 24 | |
| z: ADD_INT R2.z, R0.x, 16 | |
| w: ADD_INT R2.w, R0.x, 104 | |
| 1178 x: LSHL ____, PV1177.y, 2 | |
| y: ADD_INT R2.y, R0.x, 8 | |
| z: LSHL R3.z, PV1177.x, 2 | |
| w: ADD_INT ____, 128, R0.w | |
| 1179 x: LDS_READ_RET QA, PV1178.w | |
| y: LSHL R3.y, R2.z, 2 | |
| z: LSHL R2.z, R2.w, 2 | |
| w: ADD_INT ____, 128, PV1178.x | |
| 1180 x: LDS_READ_RET QA, PV1179.w | |
| y: ADD_INT ____, 128, R13.w | |
| z: LSHL ____, R2.y, 2 | |
| w: ADD_INT ____, 128, R3.z | |
| 1181 x: LDS_READ2_RET QAB, PV1180.y, PV1180.w | |
| y: ADD_INT ____, 128, R2.z | |
| z: ADD_INT R2.z, 128, PV1180.z | |
| w: ADD_INT ____, 128, R3.y | |
| 1182 x: LDS_READ2_RET QAB, PV1181.w, PV1181.y | |
| 1183 x: LDS_READ_RET QA, R2.z | |
| 1184 z: MOV R2.z, QA[1179].pop | |
| 1185 w: ADD_INT ____, PV1184.z, QA[1180].pop VEC_102 | |
| 1186 x: ADD_INT ____, PV1185.w, 1048592 | |
| y: MOV R3.y, QA[1181].pop | |
| z: MOV R3.z, QB[1181].pop VEC_120 | |
| 1187 x: MOV R2.x, QB[1182].pop VEC_120 | |
| y: AND_INT R2.y, 0xFFFEFFFE, PV1186.x | |
| z: MOV ____, QA[1182].pop | |
| 1188 x: ADD_INT R3.x, R3.z, PV1187.z | |
| y: ADD_INT ____, R3.y, PV1187.x | |
| z: MOV ____, QA[1183].pop | |
| w: ADD_INT R3.w, R2.z, PV1187.z VEC_120 | |
| 1189 x: ADD_INT ____, PV1188.y, 1048592 | |
| y: ADD_INT ____, PV1188.z, R3.z | |
| z: ADD_INT R3.z, PV1188.z, R2.x | |
| w: BFE_INT R2.w, PV1188.x, 0.0f, 0x0000000C | |
| 1190 x: BFE_INT R3.x, R3.x, 0x00000010, 0x0000000C | |
| y: BFE_INT R3.y, PV1189.y, 0.0f, 0x0000000C | |
| z: BFE_INT R2.z, PV1189.y, 0x00000010, 0x0000000C | |
| w: AND_INT R4.w, 0xFFFEFFFE, PV1189.x | |
| 1191 x: BFE_INT R2.x, R2.y, 0x00000010, 0x0000000C | |
| y: BFE_INT ____, R2.y, 0.0f, 0x0000000C | |
| z: MUL_INT24__NI ____, R2.w, 19 | |
| w: LSHL R2.w, R3.z, 2 | |
| 1192 x: MUL_INT24__NI ____, R3.x, 19 | |
| y: LSHL R2.y, R3.w, 2 | |
| z: ADD_INT R3.z, PV1191.y, PV1191.z | |
| w: BFE_INT R3.w, R4.w, 0.0f, 0x0000000C VEC_120 | |
| 1193 x: MUL_INT24__NI ____, R2.z, 19 | |
| y: MUL_INT24__NI ____, R3.y, 19 | |
| z: BFE_INT ____, R4.w, 0x00000010, 0x0000000C | |
| w: ADD_INT R4.w, R2.x, PV1192.x | |
| 1194 x: ADD_INT R2.x, R3.w, PV1193.y | |
| y: ADD_INT R3.y, PV1193.z, PV1193.x | |
| z: BFE_INT ____, R2.w, 0x00000010, 0x0000000E VEC_120 | |
| w: BFE_INT ____, R2.w, 0.0f, 0x0000000E VEC_120 | |
| 1195 x: BFE_INT ____, R2.y, 0.0f, 0x0000000E | |
| y: SUB_INT ____, R3.z, PV1194.w | |
| z: SUB_INT ____, R4.w, PV1194.z | |
| w: BFE_INT ____, R2.y, 0x00000010, 0x0000000E | |
| 1196 x: SUB_INT ____, R3.y, PV1195.w | |
| y: ASHR ____, PV1195.y, 5 | |
| z: ASHR ____, PV1195.z, 5 | |
| w: SUB_INT ____, R2.x, PV1195.x | |
| 1197 x: ASHR R3.x, PV1196.w, 5 | |
| y: ASHR R3.y, PV1196.x, 5 | |
| z: MOV R3.z, PV1196.y | |
| w: MOV R3.w, PV1196.z | |
| 269 POP (1) ADDR(270) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| 235 WAIT_ACK: Outstanding_acks <= 0 | |
| 236 ALU_POP_AFTER: ADDR(3125) CNT(92) | |
| 954 x: ADD_INT ____, R0.x, 96 | |
| y: ADD_INT ____, R0.x, 24 | |
| z: ADD_INT ____, R0.x, 16 | |
| w: ADD_INT T0.w, R0.x, 104 | |
| 955 x: LSHL ____, PV954.y, 2 | |
| y: ADD_INT ____, R0.x, 8 | |
| z: LSHL T0.z, PV954.x, 2 | |
| w: ADD_INT ____, 128, R0.z | |
| t: LSHL T0.x, PV954.z, 2 | |
| 956 x: LDS_READ_RET QA, PV955.w | |
| y: LSHL ____, T0.w, 2 | |
| z: ADD_INT ____, 128, R12.w | |
| w: ADD_INT ____, 128, PV955.x | |
| t: LSHL T1.x, PV955.y, 2 | |
| 957 x: LDS_READ2_RET QAB, PV956.w, PV956.z | |
| y: ADD_INT ____, 128, T0.x | |
| z: ADD_INT T0.z, 128, PV956.y | |
| w: ADD_INT ____, 128, T0.z | |
| 958 x: LDS_READ2_RET QAB, PV957.w, PV957.y | |
| w: ADD_INT ____, 128, T1.x | |
| 959 x: LDS_READ2_RET QAB, T0.z, PV958.w | |
| 960 z: MOV T0.z, QA[956].pop | |
| 961 y: MOV T0.y, QB[957].pop VEC_120 | |
| w: ADD_INT ____, PV960.z, QA[957].pop VEC_102 | |
| 962 x: MOV ____, QB[958].pop VEC_120 | |
| y: ADD_INT ____, PV961.w, 1048592 | |
| z: MOV T1.z, QA[958].pop | |
| 963 x: MOV ____, QB[959].pop VEC_120 | |
| y: ADD_INT T1.y, PV962.z, PV962.x | |
| z: MOV ____, QA[959].pop | |
| w: AND_INT T0.w, 0xFFFEFFFE, PV962.y | |
| t: ADD_INT ____, T0.z, PV962.x | |
| 964 x: ADD_INT ____, PV963.x, T1.z | |
| y: ADD_INT ____, T0.y, PV963.z | |
| z: ADD_INT ____, PV963.x, PV963.z | |
| w: BFE_INT T1.w, PV963.y, 0.0f, 0x0000000C | |
| t: LSHL T0.y, PS963, 2 | |
| 965 x: ADD_INT ____, PV964.y, 1048592 | |
| y: BFE_INT T1.y, T1.y, 0x00000010, 0x0000000C | |
| z: BFE_INT T1.z, PV964.x, 0x00000010, 0x0000000C | |
| w: BFE_INT T2.w, PV964.x, 0.0f, 0x0000000C | |
| t: LSHL T3.w, PV964.z, 2 | |
| 966 x: BFE_INT T1.x, T0.w, 0x00000010, 0x0000000C | |
| y: BFE_INT ____, T0.w, 0.0f, 0x0000000C | |
| w: AND_INT ____, 0xFFFEFFFE, PV965.x | |
| t: MULLO_INT ____, T1.w, 19 | |
| 967 x: BFE_INT T0.x, PV966.w, 0x00000010, 0x0000000C | |
| y: BFE_INT ____, T3.w, 0.0f, 0x0000000E | |
| z: ADD_INT ____, PV966.y, PS966 | |
| w: BFE_INT T0.w, PV966.w, 0.0f, 0x0000000C | |
| t: MULLO_INT ____, T1.y, 19 | |
| 968 x: BFE_INT T1.x, T0.y, 0.0f, 0x0000000E | |
| y: SUB_INT ____, PV967.z, PV967.y | |
| z: BFE_INT ____, T3.w, 0x00000010, 0x0000000E | |
| w: ADD_INT ____, T1.x, PS967 | |
| t: MULLO_INT ____, T2.w, 19 | |
| 969 x: ADD_INT ____, T0.w, PS968 | |
| y: BFE_INT T0.y, T0.y, 0x00000010, 0x0000000E | |
| z: SUB_INT ____, PV968.w, PV968.z | |
| w: ASHR T0.w, PV968.y, 5 | |
| t: MULLO_INT ____, T1.z, 19 | |
| 970 x: ASHR T0.x, PV969.z, 5 | |
| y: ADD_INT ____, T0.x, PS969 | |
| w: SUB_INT ____, PV969.x, T1.x | |
| 971 x: SUB_INT ____, PV970.y, T0.y | |
| t: ASHR R2.x, PV970.w, 5 | |
| 972 y: ASHR R2.y, PV971.x, 5 | |
| z: MOV R2.z, T0.w | |
| w: MOV R2.w, T0.x | |
| 237 POP (1) ADDR(238) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment