Created
October 18, 2011 16:16
-
-
Save arsenm/1295851 to your computer and use it in GitHub Desktop.
AMD iteration bug?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Launch with global size = 6144, local size = 256 | |
Linux x86_64 (kernel 3.0), Catalyst 11.9, SDK 2.5, building for Cayman | |
*/ | |
#if cl_khr_fp64 | |
#pragma OPENCL EXTENSION cl_khr_fp64 : enable | |
#elif cl_amd_fp64 | |
#pragma OPENCL EXTENSION cl_amd_fp64 : enable | |
#else | |
#error Missing double precision extension | |
#endif | |
__attribute__ ((reqd_work_group_size(256, 1, 1))) | |
__kernel void iterationBug(__global volatile int* debug, | |
__global volatile double* restrict posX, | |
__global volatile double* restrict accX) | |
{ | |
__local double xs[256]; | |
__local double ys[256]; | |
__local double zs[256]; | |
int i = get_global_id(0); | |
double px = posX[i]; | |
double ax = 0.0; | |
for (int j = 0; j < 24; ++j) | |
{ | |
/* If I remove this "j" loop, the inner loop is executed the | |
* correct number of times */ | |
if (i == 0) | |
{ | |
/* This should be 24, and it is. */ | |
atomic_inc(&debug[0]); | |
} | |
xs[get_local_id(0)] = px; | |
ys[get_local_id(0)] = px; | |
zs[get_local_id(0)] = px; | |
barrier(CLK_LOCAL_MEM_FENCE); | |
/* The loop executes the correct number of times if I unroll it */ | |
//#pragma unroll 8 | |
for (int k = 0; k < 256; ++k) | |
{ | |
/* Changing the amount of work in here seems to change the | |
number of iterations which actually happen. If I reduce | |
the work, it happens correctly. | |
*/ | |
ax += xs[k] + ys[k] + zs[k]; | |
if (i == 0) | |
{ | |
/* At end this should be equal to 24 * 256. However in this case it is only 1452 */ | |
atomic_inc(&debug[1]); | |
} | |
} | |
barrier(CLK_LOCAL_MEM_FENCE); | |
} | |
accX[i] = ax; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
mdef(16383)_out(1)_in(2) | |
mov r0, in0 | |
mov r1, in1 | |
div_zeroop(infinity) r0.x___, r0.x, r1.x | |
mov out0, r0 | |
mend | |
il_cs_2_0 | |
dcl_cb cb0[10] ; Constant buffer that holds ABI data | |
dcl_literal l0, 4, 1, 2, 3 | |
dcl_literal l1, 0x00FFFFFF, -1, -2, -3 | |
dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE,0x000000FF,0xFFFFFFFC | |
dcl_literal l3, 24, 16, 8, 0xFFFFFFFF | |
dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF | |
dcl_literal l5, 0, 4, 8, 12 | |
dcl_literal l6, 32, 32, 32, 32 | |
dcl_literal l7, 24, 31, 16, 31 | |
call 1041;$ | |
endmain | |
func 1041 ; __OpenCL_iterationBug_kernel | |
mov r1013, cb0[8].x | |
mov r1019, l1.0 | |
dcl_num_thread_per_group 256, 1, 1 | |
dcl_lds_id(1) 32768 | |
dcl_raw_uav_id(11) | |
dcl_arena_uav_id(8) | |
mov r0.z, vThreadGrpIdFlat.x | |
mov r1022.xyz0, vTidInGrp.xyz | |
mov r1023.xyz0, vThreadGrpId.xyz | |
imad r1021.xyz0, r1023.xyz0, cb0[1].xyz0, r1022.xyz0 | |
iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0 | |
iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0 | |
mov r1023.w, r0.z | |
ishl r1023.w, r1023.w, l0.z | |
mov r1018.x, l0.0 | |
dcl_literal l14, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f32:i32 0 | |
dcl_literal l18, 0x00000001, 0x00000001, 0x00000001, 0x00000001; f32:i32 1 | |
dcl_literal l11, 0x00000003, 0x00000003, 0x00000003, 0x00000003; f32:i32 3 | |
dcl_literal l10, 0x00000004, 0x00000004, 0x00000004, 0x00000004; f32:i32 4 | |
dcl_literal l19, 0x0000000b, 0x0000000b, 0x0000000b, 0x0000000b; f32:i32 11 | |
dcl_literal l21, 0x0000000c, 0x0000000c, 0x0000000c, 0x0000000c; f32:i32 12 | |
dcl_literal l13, 0x00000018, 0x00000018, 0x00000018, 0x00000018; f32:i32 24 | |
dcl_literal l20, 0x00000100, 0x00000100, 0x00000100, 0x00000100; f32:i32 256 | |
dcl_literal l16, 0x00000800, 0x00000800, 0x00000800, 0x00000800; f32:i32 2048 | |
dcl_literal l17, 0x00001000, 0x00001000, 0x00001000, 0x00001000; f32:i32 4096 | |
dcl_literal l15, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff; f32:i32 4294967295 | |
dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f64:i64 0 | |
dcl_cb cb1[3] | |
; Kernel arg setup: debug | |
mov r1, cb1[0] | |
; Kernel arg setup: posX | |
mov r2, cb1[1] | |
; Kernel arg setup: accX | |
mov r3, cb1[2] | |
call 1061 ; iterationBug | |
ret | |
endfunc ; __OpenCL_iterationBug_kernel | |
;ARGSTART:__OpenCL_iterationBug_kernel | |
;version:2:0:68 | |
;device:cayman | |
;uniqueid:1041 | |
;memory:hwprivate:0 | |
;memory:hwregion:0 | |
;memory:hwlocal:0 | |
;cws:256:1:1 | |
;pointer:debug:i32:1:1:0:uav:11:8 | |
;pointer:posX:double:1:1:16:uav:11:16 | |
;pointer:accX:double:1:1:32:uav:11:16 | |
;function:1:1061 | |
;uavid:11 | |
;ARGEND:__OpenCL_iterationBug_kernel | |
func 1061 ; iterationBug ; @__OpenCL_iterationBug_kernel | |
; BB#0: ; %entry | |
mov r254, r3 | |
mov r256, r1 | |
mov r257, l10.xxxx | |
iadd r257.x___, r256.xxxx, r257.xxxx | |
mov r258, r1021.xyz0 | |
mov r258, r258.x000 | |
mov r259, l11.xxxx | |
ishl r260.x___, r258.xxxx, r259.xxxx | |
iadd r255.x___, r2.xxxx, r260.xxxx | |
mov r1010.x___, r255.xxxx | |
uav_raw_load_id(11)_cached r1011.xy__, r1010.xxxx | |
mov r255.xy__, r1011.xyxy | |
mov r260, l12.xyxy | |
mov r261, l13.xxxx | |
mov r262, l14.xxxx | |
mov r263, l15.xxxx | |
mov r264, l16.xxxx | |
mov r265, l17.xxxx | |
mov r266, l18.xxxx | |
mov r267, l19.xxxx | |
mov r268, l20.xxxx | |
mov r269, l21.xxxx | |
whileloop | |
ieq r270.x___, r258.xxxx, r262.xxxx | |
if_logicalnz r270.xxxx | |
uav_uinc_id(11) r256.xxxx, r263.xxxx | |
else | |
endif | |
mov r270, r1022.xyz0 | |
mov r270, r270.x000 | |
ishl r270.x___, r270.xxxx, r259.xxxx | |
mov r271, l14.xxxx | |
iadd r270.x___, r271.xxxx, r270.xxxx | |
mov r1011.xy__, r255.xyxy | |
mov r1010.x___, r270.xxxx | |
lds_store_vec_id(1) mem.xy__, r1010.xxxx, r1011.xyxy, r1011.xyxy | |
mov r270, r1022.xyz0 | |
mov r270, r270.x000 | |
ishl r270.x___, r270.xxxx, r259.xxxx | |
iadd r270.x___, r264.xxxx, r270.xxxx | |
mov r1011.xy__, r255.xyxy | |
mov r1010.x___, r270.xxxx | |
lds_store_vec_id(1) mem.xy__, r1010.xxxx, r1011.xyxy, r1011.xyxy | |
mov r270, r1022.xyz0 | |
mov r270, r270.x000 | |
ishl r270.x___, r270.xxxx, r259.xxxx | |
iadd r270.x___, r265.xxxx, r270.xxxx | |
mov r1011.xy__, r255.xyxy | |
mov r1010.x___, r270.xxxx | |
lds_store_vec_id(1) mem.xy__, r1010.xxxx, r1011.xyxy, r1011.xyxy | |
fence_threads_memory_lds | |
whileloop | |
ishl r270.x___, r271.xxxx, r259.xxxx | |
iadd r272.x___, r264.xxxx, r270.xxxx | |
mov r1010.x___, r272.xxxx | |
lds_load_vec_id(1) r1011.xy__, r1010.xxxx, r1010.xxxx | |
mov r272.xy__, r1011.xyxy | |
iadd r273.x___, r262.xxxx, r270.xxxx | |
mov r1010.x___, r273.xxxx | |
lds_load_vec_id(1) r1011.xy__, r1010.xxxx, r1010.xxxx | |
mov r273.xy__, r1011.xyxy | |
iadd r270.x___, r265.xxxx, r270.xxxx | |
mov r1010.x___, r270.xxxx | |
lds_load_vec_id(1) r1011.xy__, r1010.xxxx, r1010.xxxx | |
mov r270.xy__, r1011.xyxy | |
ieq r274.x___, r258.xxxx, r262.xxxx | |
if_logicalnz r274.xxxx | |
uav_uinc_id(11) r257.xxxx, r263.xxxx | |
else | |
endif | |
dadd r272.xy__, r273.xyxy, r272.xyxy | |
dadd r270.xy__, r272.xyxy, r270.xyxy | |
dadd r260.xy__, r260.xyxy, r270.xyxy | |
iadd r271.x___, r271.xxxx, r266.xxxx | |
ieq r270.x___, r271.xxxx, r268.xxxx | |
break_logicalnz r270.xxxx | |
endloop | |
fence_threads_memory_lds | |
iadd r261.x___, r261.xxxx, r263.xxxx | |
ieq r271.x___, r261.xxxx, r262.xxxx | |
break_logicalnz r271.xxxx | |
endloop | |
mov r255, l11.xxxx | |
ishl r255.x___, r258.xxxx, r255.xxxx | |
iadd r254.x___, r254.xxxx, r255.xxxx | |
mov r1011.xy__, r260.xyxy | |
mov r1010.x___, r254.xxxx | |
uav_raw_store_id(11) mem.xy__, r1010.xxxx, r1011.xyxy | |
ret | |
endfunc ; iterationBug | |
;ARGSTART:iterationBug | |
;uniqueid:1061 | |
;memory:hwregion:0 | |
;memory:hwlocal:6144 | |
;ARGEND:iterationBug | |
mdef(232)_out(1)_in(2) | |
mov r0, in0 | |
mov r1, in1 | |
dcl_literal l9, 12, 2, 0x7ff00000, 0x00100000 | |
ixor r2.x, r0.y, r1.y | |
ige r3.x, r0.y_abs, l9.z | |
ige r3.y, r1.y_abs, l9.z | |
ilt r3.z, r0.y_abs, l9.w | |
ilt r3.w, r1.y_abs, l9.w | |
ixor r2.x, r2.x_abs, r2.x | |
dfrexp r20, r0 | |
dfrexp r21, r1 | |
ishl r0.z, r0.y, l9.x | |
ishl r1.z, r1.y, l9.x | |
ior r0.z, r0.z, r0.x | |
ior r1.z, r1.z, r1.x | |
ieq r0.z, r0.z, r0.0 | |
ieq r1.z, r1.z, r0.0 | |
mov r20.w, r20.wwww_abs | |
mov r21.w, r21.wwww_abs | |
dcl_literal l1, 0x00000000, 0x3ff00000, 0x00000001, 0 | |
dcl_literal l2, 0x16f0068e, 0x40075048, 0x00000000, 0xc0000000 | |
dcl_literal l3, 0x00040000, 0x00000300, 0, 0 | |
drcp_zeroop(zero) r22.xy, r21.zw | |
dmad r23.xy, r21.zw, r22.xy, l2.zw | |
dmul r22.xy, r22.xy, r23.xy_neg(yw) | |
dmad r23.xy, r21.zw, r22.xy, l2.zw | |
dmul r22.xy, r22.xy, r23.xy_neg(yw) | |
dcl_literal l6, 0x80000000, 0x7fffffff, 0xf8000000, 0xffffffff | |
dmul r10.xy, r22.xy, r20.zw | |
dmad r23.xy, r21.zw, r10.xy_neg(yw), r20.zw | |
dmad r10.xy, r22.xy, r23.xy, r10.xy | |
dmad r23.xy, r21.zw, r22.xy_neg(yw), l1.xy | |
dmul r23.xy, r23.xy, r22.xy | |
dmad r24.xy, r21.zw, r10.xy_neg(yw), r20.zw | |
dmul r25.xy, r23.xy, r24.xy | |
dmad r23.xy, r22.xy, r24.xy, r25.xy | |
ior r23.y, r23.y, l3.x | |
inegate r21.y, r21.y | |
iadd r20.x, r20.y, r21.y | |
imin r20.y, r20.x, l3.y | |
inegate r21.y, r20.y | |
iadd r20.x, r20.x, r21.y | |
dldexp r11.xy, r10.xy, r20.y | |
dldexp r12.xy, r11.xy, r21.y | |
dadd r10.xy, r10.xy, r12.xy_neg(yw) | |
dadd r10.xy, r10.xy, r23.xy | |
dldexp r12.xy, l1.xy, r20.y | |
dmad r10.xy, r10.xy, r12.xy, r11.xy | |
dldexp r10.xy, r10.xy, r20.x | |
imad r4.x, r3.x, l9.y, r3.z | |
imad r4.y, r3.y, l9.y, r3.w | |
imad r4.x, r4.x, l9.y, r0.z | |
imad r4.y, r4.y, l9.y, r1.z | |
ishl r4.x, r4.x, l9.y | |
ishl r4.y, r4.y, l9.y | |
dcl_literal l7, 0x80008000, 0x66ff66ff, 0x26f326f3, 52 | |
dcl_literal l8, 0x0048c000, 0x00123000, 0x80000000, 31 | |
ushr r4.z, l8.x, r4.x | |
ushr r4.w, l8.y, r4.y | |
ior r4.w, r4.w, r4.z | |
ishl r5.x, l7.x, r4.w | |
ishl r5.y, l7.y, r4.w | |
ishl r5.z, l7.z, r4.w | |
ishr r5.x, r5.x, l8.w | |
ishr r5.y, r5.y, l8.w | |
ishr r5.z, r5.z, l8.w | |
ishl r5.y, r5.y, l7.w | |
and r10.xy, r10.xy, r5.xx | |
ior r10.y, r10.y, r5.y_abs | |
ior r10.x, r10.x, r5.z | |
ixor r10.y, r10.y_abs, r2.x | |
mov r0.xy, r10.xy | |
mov out0, r0 | |
mend | |
mdef(129)_out(1)_in(1) | |
mov r0, in0 | |
dcl_literal l1, 0x00000001, 0x3ff00000, 0x000fffff, 0x3ff80000 | |
dcl_literal l2, 0xfff80000, 0x3fe00000, -1, 0 | |
dcl_literal l3, 0x10000000, 256, -128, 0x7FF00000 | |
ilt r10.x, r0.y, l2.0 | |
deq r10.y, r0.xy, l2.00 | |
ult r10.z, r0.y, l3.x | |
deq r10.w, r0.xy, l3.0w | |
ior r10.w, r10.w, r10.y | |
cmov_logical r4.x, r10.z, l3.y, l3.0 | |
cmov_logical r10.z, r10.z, l3.z, l3.0 | |
dldexp r5.xy, r0.xy, r4.x | |
dsqrt r1.xy, r5.xy | |
mov r1.x, l2.0 | |
drcp_zeroop(infinity) r3.xy, r1.xy | |
mov r3.x, l2.0 | |
dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw) | |
dldexp r4.xy, r4.xy, l2.z | |
dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy | |
dmad r4.xy, r1.xy_neg(yw), r3.xy, l1.0y | |
dmad r3.xy, r3.xy, r4.xy, r3.xy | |
dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw) | |
dldexp r4.xy, r4.xy, l2.z | |
dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy | |
dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw) | |
dldexp r4.xy, r4.xy, l2.z | |
dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy | |
dldexp r1.xy, r1.xy, r10.z | |
cmov_logical r1.xy, r10.xx, l2.0x, r1.xy | |
cmov_logical r0.xy, r10.ww, r0.xy, r1.xy | |
mov out0, r0 | |
mend | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ShaderType = IL_SHADER_COMPUTE | |
TargetChip = c | |
; ------------- SC_SRCSHADER Dump ------------------ | |
SC_SHADERSTATE: u32NumIntVSConst = 0 | |
SC_SHADERSTATE: u32NumIntPSConst = 0 | |
SC_SHADERSTATE: u32NumIntGSConst = 0 | |
SC_SHADERSTATE: u32NumBoolVSConst = 0 | |
SC_SHADERSTATE: u32NumBoolPSConst = 0 | |
SC_SHADERSTATE: u32NumBoolGSConst = 0 | |
SC_SHADERSTATE: u32NumFloatVSConst = 0 | |
SC_SHADERSTATE: u32NumFloatPSConst = 0 | |
SC_SHADERSTATE: u32NumFloatGSConst = 0 | |
fConstantsAvailable = 1025537139 | |
iConstantsAvailable = 2573 | |
bConstantsAvailable = 1634494817 | |
u32SCOptions[0] = 0x01A00000 SCOption_IGNORE_SAMPLE_L_BUG SCOption_FLOAT_DO_NOT_DIST SCOption_FLOAT_DO_NOT_REASSOC | |
u32SCOptions[1] = 0x00202000 SCOption_R600_ERROR_ON_DOUBLE_MEMEXP SCOption_SET_VPM_FOR_SCATTER | |
u32SCOptions[2] = 0x00020041 SCOption_R800_UAV_NONARRAY_FIXUP SCOption_R800_UAV_NONUAV_SYNC_WORKAROUND_BUG216513_1 SCOption_R900_BRANCH_IN_NESTED_LOOPS_WORKAROUND_BUG281276 | |
; -------- Disassembly -------------------- | |
00 ALU: ADDR(288) CNT(24) KCACHE0(CB0:0-15) KCACHE1(CB1:0-15) | |
0 x: MULLO_INT ____, R1.x, KC0[1].x | |
y: MULLO_INT ____, R1.x, KC0[1].x | |
z: MULLO_INT ____, R1.x, KC0[1].x | |
w: MULLO_INT ____, R1.x, KC0[1].x | |
1 x: MBCNT_32LO_ACCUM_PREV_INT ____, -1 | |
y: MBCNT_32HI_INT ____, -1 | |
z: MULADD_UINT24 R127.z, SE_ID, 256, WAVE_ID | |
w: ADD_INT ____, R0.x, PV0.y | |
2 x: MULADD_UINT24 R3.x, PV1.z, 64, PV1.x | |
z: ADD_INT R0.z, PV1.w, KC0[6].x | |
w: ADD_INT R1.w, KC1[0].x, 4 | |
3 x: MOV R5.x, -1 | |
y: LSHL R3.y, PV2.z, 3 | |
4 x: MOV R6.x, 0.0f | |
y: MOV R1.y, R3.x | |
z: ADD_INT ____, KC1[1].x, PV3.y | |
5 x: LSHR R1.x, PV4.z, 2 | |
y: ADD_INT R2.y, R3.x, 32768 | |
6 x: ADD_INT R2.x, PV5.x, 1 | |
y: MOV R6.y, 0.0f | |
01 MEM_RAT_NOP_RTN: RAT(11)[R1], R1, MARK VPM | |
02 MEM_RAT_NOP_RTN_ACK: RAT(11)[R2], R2, MARK VPM | |
03 ALU: ADDR(312) CNT(4) KCACHE0(CB1:0-15) | |
7 x: LSHR R2.x, KC0[0].x, 2 | |
8 x: LSHR R4.x, R1.w, 2 | |
04 WAIT_ACK: Outstanding_acks <= 0 | |
05 TEX: ADDR(1536) CNT(2) | |
9 VFETCH R7.x___, R3.x, fc165 | |
FETCH_TYPE(NO_INDEX_OFFSET) | |
10 VFETCH R3.x___, R2.y, fc165 | |
FETCH_TYPE(NO_INDEX_OFFSET) | |
06 ALU: ADDR(316) CNT(1) | |
11 y: MOV R2.y, R3.x | |
07 LOOP_NO_AL i2 FAIL_JUMP_ADDR(229) | |
08 ALU_PUSH_BEFORE: ADDR(317) CNT(2) | |
12 w: SETE_INT R0.w, R0.z, 0.0f | |
13 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
09 JUMP POP_CNT(1) ADDR(12) | |
10 MEM_RAT_INC_UINT_ACK: RAT(11)[R2], R5, MARK VPM | |
11 POP (1) ADDR(12) | |
12 ALU: ADDR(319) CNT(14) | |
14 y: LSHL ____, R0.x, 3 | |
15 x: LDS_WRITE_REL (offset: 1) ____, PV14.y, R7.x, R2.y | |
y: ADD_INT ____, PV14.y, 2048 | |
z: ADD_INT R1.z, PV14.y, 4096 | |
16 x: LDS_WRITE ____, PV15.y, R7.x | |
y: ADD_INT R1.y, PV15.z, 4 | |
w: ADD_INT ____, PV15.y, 4 | |
17 x: LDS_WRITE ____, PV16.w, R2.y | |
18 x: LDS_WRITE ____, R1.z, R7.x | |
19 x: LDS_XCHG_RET QA, R1.y, R2.y | |
20 y: MOV ____, QA[19].pop | |
13 WAIT_ACK: Outstanding_acks <= 0 | |
14 ALU: ADDR(333) CNT(1) | |
21 x: GROUP_BARRIER ____ | |
15 ALU_PUSH_BEFORE: ADDR(334) CNT(13) | |
22 x: LDS_READ2_RET QAB, 2052, 4 | |
23 x: LDS_READ2_RET QAB, 4100, 2048 | |
24 x: LDS_READ2_RET QAB, 0.0f, 4096 | |
25 y: MOV R0.y, QA[22] | |
26 y: MOV R1.y, QB[22].pop VEC_120 | |
27 x: MOV R1.x, QB[23].pop VEC_120 | |
y: MOV R4.y, QA[23].pop | |
28 x: MOV R3.x, QA[24] | |
29 x: MOV R8.x, QB[24].pop VEC_120 | |
30 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
16 JUMP POP_CNT(1) ADDR(19) | |
17 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
18 POP (1) ADDR(19) | |
19 ALU_PUSH_BEFORE: ADDR(347) CNT(19) | |
31 x: LDS_READ2_RET QAB, 2060, 12 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
32 x: LDS_READ2_RET QAB, 4108, 2056 | |
z: ADD_64 R1.z, PV31.w, R4.y | |
w: ADD_64 R1.w, PV31.z, R8.x | |
33 x: LDS_READ2_RET QAB, 8, 4104 | |
34 x: ADD_64 R8.x, R6.y, R1.w | |
y: ADD_64 R8.y, R6.x, R1.z | |
35 y: MOV R0.y, QA[31] | |
36 y: MOV R1.y, QB[31].pop VEC_120 | |
37 x: MOV R1.x, QB[32].pop VEC_120 | |
y: MOV R4.y, QA[32].pop | |
38 x: MOV R3.x, QA[33] | |
39 x: MOV R6.x, QB[33].pop VEC_120 | |
40 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
20 JUMP POP_CNT(1) ADDR(23) | |
21 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
22 POP (1) ADDR(23) | |
23 ALU_PUSH_BEFORE: ADDR(366) CNT(19) | |
41 x: LDS_READ2_RET QAB, 2068, 20 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
42 x: LDS_READ2_RET QAB, 4116, 2064 | |
z: ADD_64 R1.z, PV41.w, R4.y | |
w: ADD_64 R1.w, PV41.z, R6.x | |
43 x: LDS_READ2_RET QAB, 16, 4112 | |
44 x: ADD_64 R8.x, R8.y, R1.w | |
y: ADD_64 R8.y, R8.x, R1.z | |
45 y: MOV R0.y, QA[41] | |
46 y: MOV R1.y, QB[41].pop VEC_120 | |
47 x: MOV R1.x, QB[42].pop VEC_120 | |
y: MOV R4.y, QA[42].pop | |
48 x: MOV R3.x, QA[43] | |
49 x: MOV R6.x, QB[43].pop VEC_120 | |
50 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
24 JUMP POP_CNT(1) ADDR(27) | |
25 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
26 POP (1) ADDR(27) | |
27 ALU_PUSH_BEFORE: ADDR(385) CNT(19) | |
51 x: LDS_READ2_RET QAB, 2076, 28 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
52 x: LDS_READ2_RET QAB, 4124, 2072 | |
z: ADD_64 R1.z, PV51.w, R4.y | |
w: ADD_64 R1.w, PV51.z, R6.x | |
53 x: LDS_READ2_RET QAB, 24, 4120 | |
54 x: ADD_64 R8.x, R8.y, R1.w | |
y: ADD_64 R8.y, R8.x, R1.z | |
55 y: MOV R0.y, QA[51] | |
56 y: MOV R1.y, QB[51].pop VEC_120 | |
57 x: MOV R1.x, QB[52].pop VEC_120 | |
y: MOV R4.y, QA[52].pop | |
58 x: MOV R3.x, QA[53] | |
59 x: MOV R6.x, QB[53].pop VEC_120 | |
60 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
28 JUMP POP_CNT(1) ADDR(31) | |
29 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
30 POP (1) ADDR(31) | |
31 ALU_PUSH_BEFORE: ADDR(404) CNT(19) | |
61 x: LDS_READ2_RET QAB, 2084, 36 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
62 x: LDS_READ2_RET QAB, 4132, 2080 | |
z: ADD_64 R1.z, PV61.w, R4.y | |
w: ADD_64 R1.w, PV61.z, R6.x | |
63 x: LDS_READ2_RET QAB, 32, 4128 | |
64 x: ADD_64 R8.x, R8.y, R1.w | |
y: ADD_64 R8.y, R8.x, R1.z | |
65 y: MOV R0.y, QA[61] | |
66 y: MOV R1.y, QB[61].pop VEC_120 | |
67 x: MOV R1.x, QB[62].pop VEC_120 | |
y: MOV R4.y, QA[62].pop | |
68 x: MOV R3.x, QA[63] | |
69 x: MOV R6.x, QB[63].pop VEC_120 | |
70 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
32 JUMP POP_CNT(1) ADDR(35) | |
33 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
34 POP (1) ADDR(35) | |
35 ALU_PUSH_BEFORE: ADDR(423) CNT(19) | |
71 x: LDS_READ2_RET QAB, 2092, 44 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
72 x: LDS_READ2_RET QAB, 4140, 2088 | |
z: ADD_64 R1.z, PV71.w, R4.y | |
w: ADD_64 R1.w, PV71.z, R6.x | |
73 x: LDS_READ2_RET QAB, 40, 4136 | |
74 x: ADD_64 R6.x, R8.y, R1.w | |
y: ADD_64 R6.y, R8.x, R1.z | |
75 y: MOV R0.y, QA[71] | |
76 y: MOV R1.y, QB[71].pop VEC_120 | |
77 x: MOV R1.x, QB[72].pop VEC_120 | |
y: MOV R4.y, QA[72].pop | |
78 x: MOV R3.x, QA[73] | |
79 x: MOV R8.x, QB[73].pop VEC_120 | |
80 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
36 JUMP POP_CNT(1) ADDR(39) | |
37 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
38 POP (1) ADDR(39) | |
39 ALU_PUSH_BEFORE: ADDR(442) CNT(19) | |
81 x: LDS_READ2_RET QAB, 2100, 52 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
82 x: LDS_READ2_RET QAB, 4148, 2096 | |
z: ADD_64 R1.z, PV81.w, R4.y | |
w: ADD_64 R1.w, PV81.z, R8.x | |
83 x: LDS_READ2_RET QAB, 48, 4144 | |
84 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
85 y: MOV R0.y, QA[81] | |
86 y: MOV R1.y, QB[81].pop VEC_120 | |
87 x: MOV R1.x, QB[82].pop VEC_120 | |
y: MOV R4.y, QA[82].pop | |
88 x: MOV R3.x, QA[83] | |
89 x: MOV R8.x, QB[83].pop VEC_120 | |
90 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
40 JUMP POP_CNT(1) ADDR(43) | |
41 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
42 POP (1) ADDR(43) | |
43 ALU_PUSH_BEFORE: ADDR(461) CNT(19) | |
91 x: LDS_READ2_RET QAB, 2108, 60 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
92 x: LDS_READ2_RET QAB, 4156, 2104 | |
z: ADD_64 R1.z, PV91.w, R4.y | |
w: ADD_64 R1.w, PV91.z, R8.x | |
93 x: LDS_READ2_RET QAB, 56, 4152 | |
94 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
95 y: MOV R0.y, QA[91] | |
96 y: MOV R1.y, QB[91].pop VEC_120 | |
97 x: MOV R1.x, QB[92].pop VEC_120 | |
y: MOV R4.y, QA[92].pop | |
98 x: MOV R3.x, QA[93] | |
99 x: MOV R8.x, QB[93].pop VEC_120 | |
100 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
44 JUMP POP_CNT(1) ADDR(47) | |
45 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
46 POP (1) ADDR(47) | |
47 ALU_PUSH_BEFORE: ADDR(480) CNT(19) | |
101 x: LDS_READ2_RET QAB, 2116, 68 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
102 x: LDS_READ2_RET QAB, 4164, 2112 | |
z: ADD_64 R1.z, PV101.w, R4.y | |
w: ADD_64 R1.w, PV101.z, R8.x | |
103 x: LDS_READ2_RET QAB, 64, 4160 | |
104 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
105 y: MOV R0.y, QA[101] | |
106 y: MOV R1.y, QB[101].pop VEC_120 | |
107 x: MOV R1.x, QB[102].pop VEC_120 | |
y: MOV R4.y, QA[102].pop | |
108 x: MOV R3.x, QA[103] | |
109 x: MOV R8.x, QB[103].pop VEC_120 | |
110 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
48 JUMP POP_CNT(1) ADDR(51) | |
49 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
50 POP (1) ADDR(51) | |
51 ALU_PUSH_BEFORE: ADDR(499) CNT(19) | |
111 x: LDS_READ2_RET QAB, 2124, 76 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
112 x: LDS_READ2_RET QAB, 4172, 2120 | |
z: ADD_64 R1.z, PV111.w, R4.y | |
w: ADD_64 R1.w, PV111.z, R8.x | |
113 x: LDS_READ2_RET QAB, 72, 4168 | |
114 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
115 y: MOV R0.y, QA[111] | |
116 y: MOV R1.y, QB[111].pop VEC_120 | |
117 x: MOV R1.x, QB[112].pop VEC_120 | |
y: MOV R4.y, QA[112].pop | |
118 x: MOV R3.x, QA[113] | |
119 x: MOV R8.x, QB[113].pop VEC_120 | |
120 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
52 JUMP POP_CNT(1) ADDR(55) | |
53 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
54 POP (1) ADDR(55) | |
55 ALU_PUSH_BEFORE: ADDR(518) CNT(19) | |
121 x: LDS_READ2_RET QAB, 2132, 84 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
122 x: LDS_READ2_RET QAB, 4180, 2128 | |
z: ADD_64 R1.z, PV121.w, R4.y | |
w: ADD_64 R1.w, PV121.z, R8.x | |
123 x: LDS_READ2_RET QAB, 80, 4176 | |
124 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
125 y: MOV R0.y, QA[121] | |
126 y: MOV R1.y, QB[121].pop VEC_120 | |
127 x: MOV R1.x, QB[122].pop VEC_120 | |
y: MOV R4.y, QA[122].pop | |
128 x: MOV R3.x, QA[123] | |
129 x: MOV R8.x, QB[123].pop VEC_120 | |
130 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
56 JUMP POP_CNT(1) ADDR(59) | |
57 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
58 POP (1) ADDR(59) | |
59 ALU_PUSH_BEFORE: ADDR(537) CNT(19) | |
131 x: LDS_READ2_RET QAB, 2140, 92 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
132 x: LDS_READ2_RET QAB, 4188, 2136 | |
z: ADD_64 R1.z, PV131.w, R4.y | |
w: ADD_64 R1.w, PV131.z, R8.x | |
133 x: LDS_READ2_RET QAB, 88, 4184 | |
134 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
135 y: MOV R0.y, QA[131] | |
136 y: MOV R1.y, QB[131].pop VEC_120 | |
137 x: MOV R1.x, QB[132].pop VEC_120 | |
y: MOV R4.y, QA[132].pop | |
138 x: MOV R3.x, QA[133] | |
139 x: MOV R8.x, QB[133].pop VEC_120 | |
140 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
60 JUMP POP_CNT(1) ADDR(63) | |
61 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
62 POP (1) ADDR(63) | |
63 ALU_PUSH_BEFORE: ADDR(556) CNT(19) | |
141 x: LDS_READ2_RET QAB, 2148, 100 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
142 x: LDS_READ2_RET QAB, 4196, 2144 | |
z: ADD_64 R1.z, PV141.w, R4.y | |
w: ADD_64 R1.w, PV141.z, R8.x | |
143 x: LDS_READ2_RET QAB, 96, 4192 | |
144 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
145 y: MOV R0.y, QA[141] | |
146 y: MOV R1.y, QB[141].pop VEC_120 | |
147 x: MOV R1.x, QB[142].pop VEC_120 | |
y: MOV R4.y, QA[142].pop | |
148 x: MOV R3.x, QA[143] | |
149 x: MOV R8.x, QB[143].pop VEC_120 | |
150 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
64 JUMP POP_CNT(1) ADDR(67) | |
65 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
66 POP (1) ADDR(67) | |
67 ALU_PUSH_BEFORE: ADDR(575) CNT(19) | |
151 x: LDS_READ2_RET QAB, 2156, 108 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
152 x: LDS_READ2_RET QAB, 4204, 2152 | |
z: ADD_64 R1.z, PV151.w, R4.y | |
w: ADD_64 R1.w, PV151.z, R8.x | |
153 x: LDS_READ2_RET QAB, 104, 4200 | |
154 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
155 y: MOV R0.y, QA[151] | |
156 y: MOV R1.y, QB[151].pop VEC_120 | |
157 x: MOV R1.x, QB[152].pop VEC_120 | |
y: MOV R4.y, QA[152].pop | |
158 x: MOV R3.x, QA[153] | |
159 x: MOV R8.x, QB[153].pop VEC_120 | |
160 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
68 JUMP POP_CNT(1) ADDR(71) | |
69 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
70 POP (1) ADDR(71) | |
71 ALU_PUSH_BEFORE: ADDR(594) CNT(19) | |
161 x: LDS_READ2_RET QAB, 2164, 116 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
162 x: LDS_READ2_RET QAB, 4212, 2160 | |
z: ADD_64 R1.z, PV161.w, R4.y | |
w: ADD_64 R1.w, PV161.z, R8.x | |
163 x: LDS_READ2_RET QAB, 112, 4208 | |
164 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
165 y: MOV R0.y, QA[161] | |
166 y: MOV R1.y, QB[161].pop VEC_120 | |
167 x: MOV R1.x, QB[162].pop VEC_120 | |
y: MOV R4.y, QA[162].pop | |
168 x: MOV R3.x, QA[163] | |
169 x: MOV R8.x, QB[163].pop VEC_120 | |
170 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
72 JUMP POP_CNT(1) ADDR(75) | |
73 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
74 POP (1) ADDR(75) | |
75 ALU_PUSH_BEFORE: ADDR(613) CNT(19) | |
171 x: LDS_READ2_RET QAB, 2172, 124 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
172 x: LDS_READ2_RET QAB, 4220, 2168 | |
z: ADD_64 R1.z, PV171.w, R4.y | |
w: ADD_64 R1.w, PV171.z, R8.x | |
173 x: LDS_READ2_RET QAB, 120, 4216 | |
174 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
175 y: MOV R0.y, QA[171] | |
176 y: MOV R1.y, QB[171].pop VEC_120 | |
177 x: MOV R1.x, QB[172].pop VEC_120 | |
y: MOV R4.y, QA[172].pop | |
178 x: MOV R3.x, QA[173] | |
179 x: MOV R8.x, QB[173].pop VEC_120 | |
180 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
76 JUMP POP_CNT(1) ADDR(79) | |
77 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
78 POP (1) ADDR(79) | |
79 ALU_PUSH_BEFORE: ADDR(632) CNT(19) | |
181 x: LDS_READ2_RET QAB, 2180, 132 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
182 x: LDS_READ2_RET QAB, 4228, 2176 | |
z: ADD_64 R1.z, PV181.w, R4.y | |
w: ADD_64 R1.w, PV181.z, R8.x | |
183 x: LDS_READ2_RET QAB, 128, 4224 | |
184 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
185 y: MOV R0.y, QA[181] | |
186 y: MOV R1.y, QB[181].pop VEC_120 | |
187 x: MOV R1.x, QB[182].pop VEC_120 | |
y: MOV R4.y, QA[182].pop | |
188 x: MOV R3.x, QA[183] | |
189 x: MOV R8.x, QB[183].pop VEC_120 | |
190 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
80 JUMP POP_CNT(1) ADDR(83) | |
81 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
82 POP (1) ADDR(83) | |
83 ALU_PUSH_BEFORE: ADDR(651) CNT(19) | |
191 x: LDS_READ2_RET QAB, 2188, 140 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
192 x: LDS_READ2_RET QAB, 4236, 2184 | |
z: ADD_64 R1.z, PV191.w, R4.y | |
w: ADD_64 R1.w, PV191.z, R8.x | |
193 x: LDS_READ2_RET QAB, 136, 4232 | |
194 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
195 y: MOV R0.y, QA[191] | |
196 y: MOV R1.y, QB[191].pop VEC_120 | |
197 x: MOV R1.x, QB[192].pop VEC_120 | |
y: MOV R4.y, QA[192].pop | |
198 x: MOV R3.x, QA[193] | |
199 x: MOV R8.x, QB[193].pop VEC_120 | |
200 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
84 JUMP POP_CNT(1) ADDR(87) | |
85 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
86 POP (1) ADDR(87) | |
87 ALU_PUSH_BEFORE: ADDR(670) CNT(19) | |
201 x: LDS_READ2_RET QAB, 2196, 148 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
202 x: LDS_READ2_RET QAB, 4244, 2192 | |
z: ADD_64 R1.z, PV201.w, R4.y | |
w: ADD_64 R1.w, PV201.z, R8.x | |
203 x: LDS_READ2_RET QAB, 144, 4240 | |
204 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
205 y: MOV R0.y, QA[201] | |
206 y: MOV R1.y, QB[201].pop VEC_120 | |
207 x: MOV R1.x, QB[202].pop VEC_120 | |
y: MOV R4.y, QA[202].pop | |
208 x: MOV R3.x, QA[203] | |
209 x: MOV R8.x, QB[203].pop VEC_120 | |
210 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
88 JUMP POP_CNT(1) ADDR(91) | |
89 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
90 POP (1) ADDR(91) | |
91 ALU_PUSH_BEFORE: ADDR(689) CNT(19) | |
211 x: LDS_READ2_RET QAB, 2204, 156 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
212 x: LDS_READ2_RET QAB, 4252, 2200 | |
z: ADD_64 R1.z, PV211.w, R4.y | |
w: ADD_64 R1.w, PV211.z, R8.x | |
213 x: LDS_READ2_RET QAB, 152, 4248 | |
214 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
215 y: MOV R0.y, QA[211] | |
216 y: MOV R1.y, QB[211].pop VEC_120 | |
217 x: MOV R1.x, QB[212].pop VEC_120 | |
y: MOV R4.y, QA[212].pop | |
218 x: MOV R3.x, QA[213] | |
219 x: MOV R8.x, QB[213].pop VEC_120 | |
220 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
92 JUMP POP_CNT(1) ADDR(95) | |
93 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
94 POP (1) ADDR(95) | |
95 ALU_PUSH_BEFORE: ADDR(708) CNT(19) | |
221 x: LDS_READ2_RET QAB, 2212, 164 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
222 x: LDS_READ2_RET QAB, 4260, 2208 | |
z: ADD_64 R1.z, PV221.w, R4.y | |
w: ADD_64 R1.w, PV221.z, R8.x | |
223 x: LDS_READ2_RET QAB, 160, 4256 | |
224 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
225 y: MOV R0.y, QA[221] | |
226 y: MOV R1.y, QB[221].pop VEC_120 | |
227 x: MOV R1.x, QB[222].pop VEC_120 | |
y: MOV R4.y, QA[222].pop | |
228 x: MOV R3.x, QA[223] | |
229 x: MOV R8.x, QB[223].pop VEC_120 | |
230 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
96 JUMP POP_CNT(1) ADDR(99) | |
97 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
98 POP (1) ADDR(99) | |
99 ALU_PUSH_BEFORE: ADDR(727) CNT(19) | |
231 x: LDS_READ2_RET QAB, 2220, 172 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
232 x: LDS_READ2_RET QAB, 4268, 2216 | |
z: ADD_64 R1.z, PV231.w, R4.y | |
w: ADD_64 R1.w, PV231.z, R8.x | |
233 x: LDS_READ2_RET QAB, 168, 4264 | |
234 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
235 y: MOV R0.y, QA[231] | |
236 y: MOV R1.y, QB[231].pop VEC_120 | |
237 x: MOV R1.x, QB[232].pop VEC_120 | |
y: MOV R4.y, QA[232].pop | |
238 x: MOV R3.x, QA[233] | |
239 x: MOV R8.x, QB[233].pop VEC_120 | |
240 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
100 JUMP POP_CNT(1) ADDR(103) | |
101 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
102 POP (1) ADDR(103) | |
103 ALU_PUSH_BEFORE: ADDR(746) CNT(19) | |
241 x: LDS_READ2_RET QAB, 2228, 180 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
242 x: LDS_READ2_RET QAB, 4276, 2224 | |
z: ADD_64 R1.z, PV241.w, R4.y | |
w: ADD_64 R1.w, PV241.z, R8.x | |
243 x: LDS_READ2_RET QAB, 176, 4272 | |
244 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
245 y: MOV R0.y, QA[241] | |
246 y: MOV R1.y, QB[241].pop VEC_120 | |
247 x: MOV R1.x, QB[242].pop VEC_120 | |
y: MOV R4.y, QA[242].pop | |
248 x: MOV R3.x, QA[243] | |
249 x: MOV R8.x, QB[243].pop VEC_120 | |
250 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
104 JUMP POP_CNT(1) ADDR(107) | |
105 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
106 POP (1) ADDR(107) | |
107 ALU_PUSH_BEFORE: ADDR(765) CNT(19) | |
251 x: LDS_READ2_RET QAB, 2236, 188 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
252 x: LDS_READ2_RET QAB, 4284, 2232 | |
z: ADD_64 R1.z, PV251.w, R4.y | |
w: ADD_64 R1.w, PV251.z, R8.x | |
253 x: LDS_READ2_RET QAB, 184, 4280 | |
254 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
255 y: MOV R0.y, QA[251] | |
256 y: MOV R1.y, QB[251].pop VEC_120 | |
257 x: MOV R1.x, QB[252].pop VEC_120 | |
y: MOV R4.y, QA[252].pop | |
258 x: MOV R3.x, QA[253] | |
259 x: MOV R8.x, QB[253].pop VEC_120 | |
260 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
108 JUMP POP_CNT(1) ADDR(111) | |
109 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
110 POP (1) ADDR(111) | |
111 ALU_PUSH_BEFORE: ADDR(784) CNT(19) | |
261 x: LDS_READ2_RET QAB, 2244, 196 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
262 x: LDS_READ2_RET QAB, 4292, 2240 | |
z: ADD_64 R1.z, PV261.w, R4.y | |
w: ADD_64 R1.w, PV261.z, R8.x | |
263 x: LDS_READ2_RET QAB, 192, 4288 | |
264 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
265 y: MOV R0.y, QA[261] | |
266 y: MOV R1.y, QB[261].pop VEC_120 | |
267 x: MOV R1.x, QB[262].pop VEC_120 | |
y: MOV R4.y, QA[262].pop | |
268 x: MOV R3.x, QA[263] | |
269 x: MOV R8.x, QB[263].pop VEC_120 | |
270 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
112 JUMP POP_CNT(1) ADDR(115) | |
113 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
114 POP (1) ADDR(115) | |
115 ALU_PUSH_BEFORE: ADDR(803) CNT(19) | |
271 x: LDS_READ2_RET QAB, 2252, 204 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
272 x: LDS_READ2_RET QAB, 4300, 2248 | |
z: ADD_64 R1.z, PV271.w, R4.y | |
w: ADD_64 R1.w, PV271.z, R8.x | |
273 x: LDS_READ2_RET QAB, 200, 4296 | |
274 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
275 y: MOV R0.y, QA[271] | |
276 y: MOV R1.y, QB[271].pop VEC_120 | |
277 x: MOV R1.x, QB[272].pop VEC_120 | |
y: MOV R4.y, QA[272].pop | |
278 x: MOV R3.x, QA[273] | |
279 x: MOV R8.x, QB[273].pop VEC_120 | |
280 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
116 JUMP POP_CNT(1) ADDR(119) | |
117 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
118 POP (1) ADDR(119) | |
119 ALU_PUSH_BEFORE: ADDR(822) CNT(19) | |
281 x: LDS_READ2_RET QAB, 2260, 212 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
282 x: LDS_READ2_RET QAB, 4308, 2256 | |
z: ADD_64 R1.z, PV281.w, R4.y | |
w: ADD_64 R1.w, PV281.z, R8.x | |
283 x: LDS_READ2_RET QAB, 208, 4304 | |
284 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
285 y: MOV R0.y, QA[281] | |
286 y: MOV R1.y, QB[281].pop VEC_120 | |
287 x: MOV R1.x, QB[282].pop VEC_120 | |
y: MOV R4.y, QA[282].pop | |
288 x: MOV R3.x, QA[283] | |
289 x: MOV R8.x, QB[283].pop VEC_120 | |
290 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
120 JUMP POP_CNT(1) ADDR(123) | |
121 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
122 POP (1) ADDR(123) | |
123 ALU_PUSH_BEFORE: ADDR(841) CNT(19) | |
291 x: LDS_READ2_RET QAB, 2268, 220 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
292 x: LDS_READ2_RET QAB, 4316, 2264 | |
z: ADD_64 R1.z, PV291.w, R4.y | |
w: ADD_64 R1.w, PV291.z, R8.x | |
293 x: LDS_READ2_RET QAB, 216, 4312 | |
294 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
295 y: MOV R0.y, QA[291] | |
296 y: MOV R1.y, QB[291].pop VEC_120 | |
297 x: MOV R1.x, QB[292].pop VEC_120 | |
y: MOV R4.y, QA[292].pop | |
298 x: MOV R3.x, QA[293] | |
299 x: MOV R8.x, QB[293].pop VEC_120 | |
300 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
124 JUMP POP_CNT(1) ADDR(127) | |
125 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
126 POP (1) ADDR(127) | |
127 ALU_PUSH_BEFORE: ADDR(860) CNT(19) | |
301 x: LDS_READ2_RET QAB, 2276, 228 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
302 x: LDS_READ2_RET QAB, 4324, 2272 | |
z: ADD_64 R1.z, PV301.w, R4.y | |
w: ADD_64 R1.w, PV301.z, R8.x | |
303 x: LDS_READ2_RET QAB, 224, 4320 | |
304 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
305 y: MOV R0.y, QA[301] | |
306 y: MOV R1.y, QB[301].pop VEC_120 | |
307 x: MOV R1.x, QB[302].pop VEC_120 | |
y: MOV R4.y, QA[302].pop | |
308 x: MOV R3.x, QA[303] | |
309 x: MOV R8.x, QB[303].pop VEC_120 | |
310 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
128 JUMP POP_CNT(1) ADDR(131) | |
129 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
130 POP (1) ADDR(131) | |
131 ALU_PUSH_BEFORE: ADDR(879) CNT(19) | |
311 x: LDS_READ2_RET QAB, 2284, 236 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
312 x: LDS_READ2_RET QAB, 4332, 2280 | |
z: ADD_64 R1.z, PV311.w, R4.y | |
w: ADD_64 R1.w, PV311.z, R8.x | |
313 x: LDS_READ2_RET QAB, 232, 4328 | |
314 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
315 y: MOV R0.y, QA[311] | |
316 y: MOV R1.y, QB[311].pop VEC_120 | |
317 x: MOV R1.x, QB[312].pop VEC_120 | |
y: MOV R4.y, QA[312].pop | |
318 x: MOV R3.x, QA[313] | |
319 x: MOV R8.x, QB[313].pop VEC_120 | |
320 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
132 JUMP POP_CNT(1) ADDR(135) | |
133 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
134 POP (1) ADDR(135) | |
135 ALU_PUSH_BEFORE: ADDR(898) CNT(19) | |
321 x: LDS_READ2_RET QAB, 2292, 244 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
322 x: LDS_READ2_RET QAB, 4340, 2288 | |
z: ADD_64 R1.z, PV321.w, R4.y | |
w: ADD_64 R1.w, PV321.z, R8.x | |
323 x: LDS_READ2_RET QAB, 240, 4336 | |
324 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
325 y: MOV R0.y, QA[321] | |
326 y: MOV R1.y, QB[321].pop VEC_120 | |
327 x: MOV R1.x, QB[322].pop VEC_120 | |
y: MOV R4.y, QA[322].pop | |
328 x: MOV R3.x, QA[323] | |
329 x: MOV R8.x, QB[323].pop VEC_120 | |
330 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
136 JUMP POP_CNT(1) ADDR(139) | |
137 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
138 POP (1) ADDR(139) | |
139 ALU_PUSH_BEFORE: ADDR(917) CNT(19) | |
331 x: LDS_READ2_RET QAB, 2300, 252 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
332 x: LDS_READ2_RET QAB, 4348, 2296 | |
z: ADD_64 R1.z, PV331.w, R4.y | |
w: ADD_64 R1.w, PV331.z, R8.x | |
333 x: LDS_READ2_RET QAB, 248, 4344 | |
334 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
335 y: MOV R0.y, QA[331] | |
336 y: MOV R1.y, QB[331].pop VEC_120 | |
337 x: MOV R1.x, QB[332].pop VEC_120 | |
y: MOV R4.y, QA[332].pop | |
338 x: MOV R3.x, QA[333] | |
339 x: MOV R8.x, QB[333].pop VEC_120 | |
340 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
140 JUMP POP_CNT(1) ADDR(143) | |
141 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
142 POP (1) ADDR(143) | |
143 ALU_PUSH_BEFORE: ADDR(936) CNT(19) | |
341 x: LDS_READ2_RET QAB, 2308, 260 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
342 x: LDS_READ2_RET QAB, 4356, 2304 | |
z: ADD_64 R1.z, PV341.w, R4.y | |
w: ADD_64 R1.w, PV341.z, R8.x | |
343 x: LDS_READ2_RET QAB, 256, 4352 | |
344 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
345 y: MOV R0.y, QA[341] | |
346 y: MOV R1.y, QB[341].pop VEC_120 | |
347 x: MOV R1.x, QB[342].pop VEC_120 | |
y: MOV R4.y, QA[342].pop | |
348 x: MOV R3.x, QA[343] | |
349 x: MOV R8.x, QB[343].pop VEC_120 | |
350 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
144 JUMP POP_CNT(1) ADDR(147) | |
145 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
146 POP (1) ADDR(147) | |
147 ALU_PUSH_BEFORE: ADDR(955) CNT(19) | |
351 x: LDS_READ2_RET QAB, 2316, 268 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
352 x: LDS_READ2_RET QAB, 4364, 2312 | |
z: ADD_64 R1.z, PV351.w, R4.y | |
w: ADD_64 R1.w, PV351.z, R8.x | |
353 x: LDS_READ2_RET QAB, 264, 4360 | |
354 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
355 y: MOV R0.y, QA[351] | |
356 y: MOV R1.y, QB[351].pop VEC_120 | |
357 x: MOV R1.x, QB[352].pop VEC_120 | |
y: MOV R4.y, QA[352].pop | |
358 x: MOV R3.x, QA[353] | |
359 x: MOV R8.x, QB[353].pop VEC_120 | |
360 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
148 JUMP POP_CNT(1) ADDR(151) | |
149 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
150 POP (1) ADDR(151) | |
151 ALU_PUSH_BEFORE: ADDR(974) CNT(19) | |
361 x: LDS_READ2_RET QAB, 2324, 276 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
362 x: LDS_READ2_RET QAB, 4372, 2320 | |
z: ADD_64 R1.z, PV361.w, R4.y | |
w: ADD_64 R1.w, PV361.z, R8.x | |
363 x: LDS_READ2_RET QAB, 272, 4368 | |
364 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
365 y: MOV R0.y, QA[361] | |
366 y: MOV R1.y, QB[361].pop VEC_120 | |
367 x: MOV R1.x, QB[362].pop VEC_120 | |
y: MOV R4.y, QA[362].pop | |
368 x: MOV R3.x, QA[363] | |
369 x: MOV R8.x, QB[363].pop VEC_120 | |
370 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
152 JUMP POP_CNT(1) ADDR(155) | |
153 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
154 POP (1) ADDR(155) | |
155 ALU_PUSH_BEFORE: ADDR(993) CNT(19) | |
371 x: LDS_READ2_RET QAB, 2332, 284 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
372 x: LDS_READ2_RET QAB, 4380, 2328 | |
z: ADD_64 R1.z, PV371.w, R4.y | |
w: ADD_64 R1.w, PV371.z, R8.x | |
373 x: LDS_READ2_RET QAB, 280, 4376 | |
374 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
375 y: MOV R0.y, QA[371] | |
376 y: MOV R1.y, QB[371].pop VEC_120 | |
377 x: MOV R1.x, QB[372].pop VEC_120 | |
y: MOV R4.y, QA[372].pop | |
378 x: MOV R3.x, QA[373] | |
379 x: MOV R8.x, QB[373].pop VEC_120 | |
380 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
156 JUMP POP_CNT(1) ADDR(159) | |
157 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
158 POP (1) ADDR(159) | |
159 ALU_PUSH_BEFORE: ADDR(1012) CNT(19) | |
381 x: LDS_READ2_RET QAB, 2340, 292 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
382 x: LDS_READ2_RET QAB, 4388, 2336 | |
z: ADD_64 R1.z, PV381.w, R4.y | |
w: ADD_64 R1.w, PV381.z, R8.x | |
383 x: LDS_READ2_RET QAB, 288, 4384 | |
384 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
385 y: MOV R0.y, QA[381] | |
386 y: MOV R1.y, QB[381].pop VEC_120 | |
387 x: MOV R1.x, QB[382].pop VEC_120 | |
y: MOV R4.y, QA[382].pop | |
388 x: MOV R3.x, QA[383] | |
389 x: MOV R8.x, QB[383].pop VEC_120 | |
390 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
160 JUMP POP_CNT(1) ADDR(163) | |
161 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
162 POP (1) ADDR(163) | |
163 ALU_PUSH_BEFORE: ADDR(1031) CNT(19) | |
391 x: LDS_READ2_RET QAB, 2348, 300 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
392 x: LDS_READ2_RET QAB, 4396, 2344 | |
z: ADD_64 R1.z, PV391.w, R4.y | |
w: ADD_64 R1.w, PV391.z, R8.x | |
393 x: LDS_READ2_RET QAB, 296, 4392 | |
394 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
395 y: MOV R0.y, QA[391] | |
396 y: MOV R1.y, QB[391].pop VEC_120 | |
397 x: MOV R1.x, QB[392].pop VEC_120 | |
y: MOV R4.y, QA[392].pop | |
398 x: MOV R3.x, QA[393] | |
399 x: MOV R8.x, QB[393].pop VEC_120 | |
400 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
164 JUMP POP_CNT(1) ADDR(167) | |
165 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
166 POP (1) ADDR(167) | |
167 ALU_PUSH_BEFORE: ADDR(1050) CNT(19) | |
401 x: LDS_READ2_RET QAB, 2356, 308 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
402 x: LDS_READ2_RET QAB, 4404, 2352 | |
z: ADD_64 R1.z, PV401.w, R4.y | |
w: ADD_64 R1.w, PV401.z, R8.x | |
403 x: LDS_READ2_RET QAB, 304, 4400 | |
404 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
405 y: MOV R0.y, QA[401] | |
406 y: MOV R1.y, QB[401].pop VEC_120 | |
407 x: MOV R1.x, QB[402].pop VEC_120 | |
y: MOV R4.y, QA[402].pop | |
408 x: MOV R3.x, QA[403] | |
409 x: MOV R8.x, QB[403].pop VEC_120 | |
410 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
168 JUMP POP_CNT(1) ADDR(171) | |
169 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
170 POP (1) ADDR(171) | |
171 ALU_PUSH_BEFORE: ADDR(1069) CNT(19) | |
411 x: LDS_READ2_RET QAB, 2364, 316 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
412 x: LDS_READ2_RET QAB, 4412, 2360 | |
z: ADD_64 R1.z, PV411.w, R4.y | |
w: ADD_64 R1.w, PV411.z, R8.x | |
413 x: LDS_READ2_RET QAB, 312, 4408 | |
414 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
415 y: MOV R0.y, QA[411] | |
416 y: MOV R1.y, QB[411].pop VEC_120 | |
417 x: MOV R1.x, QB[412].pop VEC_120 | |
y: MOV R4.y, QA[412].pop | |
418 x: MOV R3.x, QA[413] | |
419 x: MOV R8.x, QB[413].pop VEC_120 | |
420 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
172 JUMP POP_CNT(1) ADDR(175) | |
173 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
174 POP (1) ADDR(175) | |
175 ALU_PUSH_BEFORE: ADDR(1088) CNT(19) | |
421 x: LDS_READ2_RET QAB, 2372, 324 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
422 x: LDS_READ2_RET QAB, 4420, 2368 | |
z: ADD_64 R1.z, PV421.w, R4.y | |
w: ADD_64 R1.w, PV421.z, R8.x | |
423 x: LDS_READ2_RET QAB, 320, 4416 | |
424 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
425 y: MOV R0.y, QA[421] | |
426 y: MOV R1.y, QB[421].pop VEC_120 | |
427 x: MOV R1.x, QB[422].pop VEC_120 | |
y: MOV R4.y, QA[422].pop | |
428 x: MOV R3.x, QA[423] | |
429 x: MOV R8.x, QB[423].pop VEC_120 | |
430 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
176 JUMP POP_CNT(1) ADDR(179) | |
177 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
178 POP (1) ADDR(179) | |
179 ALU_PUSH_BEFORE: ADDR(1107) CNT(19) | |
431 x: LDS_READ2_RET QAB, 2380, 332 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
432 x: LDS_READ2_RET QAB, 4428, 2376 | |
z: ADD_64 R1.z, PV431.w, R4.y | |
w: ADD_64 R1.w, PV431.z, R8.x | |
433 x: LDS_READ2_RET QAB, 328, 4424 | |
434 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
435 y: MOV R0.y, QA[431] | |
436 y: MOV R1.y, QB[431].pop VEC_120 | |
437 x: MOV R1.x, QB[432].pop VEC_120 | |
y: MOV R4.y, QA[432].pop | |
438 x: MOV R3.x, QA[433] | |
439 x: MOV R8.x, QB[433].pop VEC_120 | |
440 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
180 JUMP POP_CNT(1) ADDR(183) | |
181 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
182 POP (1) ADDR(183) | |
183 ALU_PUSH_BEFORE: ADDR(1126) CNT(19) | |
441 x: LDS_READ2_RET QAB, 2388, 340 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
442 x: LDS_READ2_RET QAB, 4436, 2384 | |
z: ADD_64 R1.z, PV441.w, R4.y | |
w: ADD_64 R1.w, PV441.z, R8.x | |
443 x: LDS_READ2_RET QAB, 336, 4432 | |
444 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
445 y: MOV R0.y, QA[441] | |
446 y: MOV R1.y, QB[441].pop VEC_120 | |
447 x: MOV R1.x, QB[442].pop VEC_120 | |
y: MOV R4.y, QA[442].pop | |
448 x: MOV R3.x, QA[443] | |
449 x: MOV R8.x, QB[443].pop VEC_120 | |
450 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
184 JUMP POP_CNT(1) ADDR(187) | |
185 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
186 POP (1) ADDR(187) | |
187 ALU_PUSH_BEFORE: ADDR(1145) CNT(19) | |
451 x: LDS_READ2_RET QAB, 2396, 348 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
452 x: LDS_READ2_RET QAB, 4444, 2392 | |
z: ADD_64 R1.z, PV451.w, R4.y | |
w: ADD_64 R1.w, PV451.z, R8.x | |
453 x: LDS_READ2_RET QAB, 344, 4440 | |
454 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
455 y: MOV R0.y, QA[451] | |
456 y: MOV R1.y, QB[451].pop VEC_120 | |
457 x: MOV R1.x, QB[452].pop VEC_120 | |
y: MOV R4.y, QA[452].pop | |
458 x: MOV R3.x, QA[453] | |
459 x: MOV R8.x, QB[453].pop VEC_120 | |
460 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
188 JUMP POP_CNT(1) ADDR(191) | |
189 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
190 POP (1) ADDR(191) | |
191 ALU_PUSH_BEFORE: ADDR(1164) CNT(19) | |
461 x: LDS_READ2_RET QAB, 2404, 356 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
462 x: LDS_READ2_RET QAB, 4452, 2400 | |
z: ADD_64 R1.z, PV461.w, R4.y | |
w: ADD_64 R1.w, PV461.z, R8.x | |
463 x: LDS_READ2_RET QAB, 352, 4448 | |
464 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
465 y: MOV R0.y, QA[461] | |
466 y: MOV R1.y, QB[461].pop VEC_120 | |
467 x: MOV R1.x, QB[462].pop VEC_120 | |
y: MOV R4.y, QA[462].pop | |
468 x: MOV R3.x, QA[463] | |
469 x: MOV R8.x, QB[463].pop VEC_120 | |
470 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
192 JUMP POP_CNT(1) ADDR(195) | |
193 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
194 POP (1) ADDR(195) | |
195 ALU_PUSH_BEFORE: ADDR(1183) CNT(19) | |
471 x: LDS_READ2_RET QAB, 2412, 364 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
472 x: LDS_READ2_RET QAB, 4460, 2408 | |
z: ADD_64 R1.z, PV471.w, R4.y | |
w: ADD_64 R1.w, PV471.z, R8.x | |
473 x: LDS_READ2_RET QAB, 360, 4456 | |
474 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
475 y: MOV R0.y, QA[471] | |
476 y: MOV R1.y, QB[471].pop VEC_120 | |
477 x: MOV R1.x, QB[472].pop VEC_120 | |
y: MOV R4.y, QA[472].pop | |
478 x: MOV R3.x, QA[473] | |
479 x: MOV R8.x, QB[473].pop VEC_120 | |
480 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
196 JUMP POP_CNT(1) ADDR(199) | |
197 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
198 POP (1) ADDR(199) | |
199 ALU_PUSH_BEFORE: ADDR(1202) CNT(19) | |
481 x: LDS_READ2_RET QAB, 2420, 372 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
482 x: LDS_READ2_RET QAB, 4468, 2416 | |
z: ADD_64 R1.z, PV481.w, R4.y | |
w: ADD_64 R1.w, PV481.z, R8.x | |
483 x: LDS_READ2_RET QAB, 368, 4464 | |
484 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
485 y: MOV R0.y, QA[481] | |
486 y: MOV R1.y, QB[481].pop VEC_120 | |
487 x: MOV R1.x, QB[482].pop VEC_120 | |
y: MOV R4.y, QA[482].pop | |
488 x: MOV R3.x, QA[483] | |
489 x: MOV R8.x, QB[483].pop VEC_120 | |
490 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
200 JUMP POP_CNT(1) ADDR(203) | |
201 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
202 POP (1) ADDR(203) | |
203 ALU_PUSH_BEFORE: ADDR(1221) CNT(19) | |
491 x: LDS_READ2_RET QAB, 2428, 380 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
492 x: LDS_READ2_RET QAB, 4476, 2424 | |
z: ADD_64 R1.z, PV491.w, R4.y | |
w: ADD_64 R1.w, PV491.z, R8.x | |
493 x: LDS_READ2_RET QAB, 376, 4472 | |
494 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
495 y: MOV R0.y, QA[491] | |
496 y: MOV R1.y, QB[491].pop VEC_120 | |
497 x: MOV R1.x, QB[492].pop VEC_120 | |
y: MOV R4.y, QA[492].pop | |
498 x: MOV R3.x, QA[493] | |
499 x: MOV R8.x, QB[493].pop VEC_120 | |
500 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
204 JUMP POP_CNT(1) ADDR(207) | |
205 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
206 POP (1) ADDR(207) | |
207 ALU_PUSH_BEFORE: ADDR(1240) CNT(19) | |
501 x: LDS_READ2_RET QAB, 2436, 388 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
502 x: LDS_READ2_RET QAB, 4484, 2432 | |
z: ADD_64 R1.z, PV501.w, R4.y | |
w: ADD_64 R1.w, PV501.z, R8.x | |
503 x: LDS_READ2_RET QAB, 384, 4480 | |
504 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
505 y: MOV R0.y, QA[501] | |
506 y: MOV R1.y, QB[501].pop VEC_120 | |
507 x: MOV R1.x, QB[502].pop VEC_120 | |
y: MOV R4.y, QA[502].pop | |
508 x: MOV R3.x, QA[503] | |
509 x: MOV R8.x, QB[503].pop VEC_120 | |
510 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
208 JUMP POP_CNT(1) ADDR(211) | |
209 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
210 POP (1) ADDR(211) | |
211 ALU_PUSH_BEFORE: ADDR(1259) CNT(19) | |
511 x: LDS_READ2_RET QAB, 2444, 396 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
512 x: LDS_READ2_RET QAB, 4492, 2440 | |
z: ADD_64 R1.z, PV511.w, R4.y | |
w: ADD_64 R1.w, PV511.z, R8.x | |
513 x: LDS_READ2_RET QAB, 392, 4488 | |
514 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
515 y: MOV R0.y, QA[511] | |
516 y: MOV R1.y, QB[511].pop VEC_120 | |
517 x: MOV R1.x, QB[512].pop VEC_120 | |
y: MOV R4.y, QA[512].pop | |
518 x: MOV R3.x, QA[513] | |
519 x: MOV R8.x, QB[513].pop VEC_120 | |
520 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
212 JUMP POP_CNT(1) ADDR(215) | |
213 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
214 POP (1) ADDR(215) | |
215 ALU_PUSH_BEFORE: ADDR(1278) CNT(19) | |
521 x: LDS_READ2_RET QAB, 2452, 404 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
522 x: LDS_READ2_RET QAB, 4500, 2448 | |
z: ADD_64 R1.z, PV521.w, R4.y | |
w: ADD_64 R1.w, PV521.z, R8.x | |
523 x: LDS_READ2_RET QAB, 400, 4496 | |
524 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
525 y: MOV R0.y, QA[521] | |
526 y: MOV R1.y, QB[521].pop VEC_120 | |
527 x: MOV R1.x, QB[522].pop VEC_120 | |
y: MOV R4.y, QA[522].pop | |
528 x: MOV R3.x, QA[523] | |
529 x: MOV R8.x, QB[523].pop VEC_120 | |
530 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
216 JUMP POP_CNT(1) ADDR(219) | |
217 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
218 POP (1) ADDR(219) | |
219 ALU: ADDR(1297) CNT(18) | |
531 x: LDS_READ2_RET QAB, 2460, 412 | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R3.x, R1.x | |
532 x: LDS_READ2_RET QAB, 4508, 2456 | |
z: ADD_64 R1.z, PV531.w, R4.y | |
w: ADD_64 R1.w, PV531.z, R8.x | |
533 x: LDS_READ2_RET QAB, 408, 4504 | |
534 x: ADD_64 R6.x, R6.y, R1.w | |
y: ADD_64 R6.y, R6.x, R1.z | |
535 y: MOV R0.y, QA[531] | |
536 y: MOV R1.y, QB[531].pop VEC_120 | |
537 x: MOV R1.x, QB[532].pop VEC_120 | |
y: MOV R4.y, QA[532].pop | |
538 x: MOV R3.x, QA[533] | |
539 x: MOV R8.x, QB[533].pop VEC_120 | |
220 PUSH ADDR(225) | |
221 ALU: ADDR(1315) CNT(1) | |
540 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
222 JUMP POP_CNT(1) ADDR(225) | |
223 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
224 POP (1) ADDR(225) | |
225 ALU: ADDR(1316) CNT(6) | |
541 x: ADD_64 ____, R1.y, R0.y | |
y: ADD_64 ____, R3.x, R1.x | |
542 z: ADD_64 ____, PV541.y, R4.y | |
w: ADD_64 ____, PV541.x, R8.x | |
543 x: ADD_64 R6.x, R6.y, PV542.w | |
y: ADD_64 R6.y, R6.x, PV542.z | |
226 WAIT_ACK: Outstanding_acks <= 0 | |
227 ALU: ADDR(1322) CNT(1) | |
544 x: GROUP_BARRIER ____ | |
228 ENDLOOP i2 PASS_JUMP_ADDR(8) | |
229 ALU_PUSH_BEFORE: ADDR(1323) CNT(2) | |
545 y: SETE_INT R4.y, R0.z, 0.0f | |
546 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
230 JUMP POP_CNT(1) ADDR(233) | |
231 MEM_RAT_INC_UINT_ACK: RAT(11)[R2], R5, MARK VPM | |
232 POP (1) ADDR(233) | |
233 ALU: ADDR(1325) CNT(14) | |
547 y: LSHL ____, R0.x, 3 | |
548 x: LDS_WRITE_REL (offset: 1) ____, PV547.y, R7.x, R2.y | |
y: ADD_INT ____, PV547.y, 2048 | |
z: ADD_INT R0.z, PV547.y, 4096 | |
549 x: LDS_WRITE ____, PV548.y, R7.x | |
y: ADD_INT R0.y, PV548.z, 4 | |
w: ADD_INT ____, PV548.y, 4 | |
550 x: LDS_WRITE ____, PV549.w, R2.y | |
551 x: LDS_WRITE ____, R0.z, R7.x | |
552 x: LDS_XCHG_RET QA, R0.y, R2.y | |
553 y: MOV ____, QA[552].pop | |
234 WAIT_ACK: Outstanding_acks <= 0 | |
235 ALU: ADDR(1339) CNT(1) | |
554 x: GROUP_BARRIER ____ | |
236 ALU: ADDR(1340) CNT(1) | |
555 w: MOV R0.w, 0.0f | |
237 LOOP_NO_AL i1 FAIL_JUMP_ADDR(270) | |
238 ALU: ADDR(1341) CNT(20) | |
556 y: LSHL R2.y, R0.w, 3 | |
557 x: LDS_READ_RET QA, PV556.y | |
y: ADD_INT R0.y, PV556.y, 4100 | |
z: ADD_INT ____, PV556.y, 2052 | |
w: ADD_INT ____, PV556.y, 4 | |
558 x: LDS_READ2_RET QAB, PV557.z, PV557.w | |
z: ADD_INT R0.z, 4096, R2.y | |
w: ADD_INT ____, 2048, R2.y | |
559 x: LDS_READ2_RET QAB, R0.y, PV558.w | |
560 x: LDS_READ_RET QA, R0.z | |
561 x: MOV R0.x, QA[557].pop | |
562 y: MOV R0.y, QA[558] | |
563 y: MOV R1.y, QB[558].pop VEC_120 | |
564 x: MOV R1.x, QB[559].pop VEC_120 | |
y: MOV R5.y, QA[559].pop | |
565 x: MOV R2.x, QA[560].pop | |
239 PUSH ADDR(244) | |
240 ALU: ADDR(1361) CNT(1) | |
566 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
241 JUMP POP_CNT(1) ADDR(244) | |
242 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
243 POP (1) ADDR(244) | |
244 ALU: ADDR(1362) CNT(29) | |
567 x: ADD_INT ____, 2056, R2.y | |
y: ADD_INT ____, 8, R2.y | |
z: ADD_INT R1.z, R0.w, 1 | |
w: ADD_INT R0.w, 4104, R2.y | |
568 x: LDS_READ2_RET QAB, PV567.x, PV567.y | |
w: LSHL ____, PV567.z, 3 | |
569 x: LDS_READ_RET QA, R0.w | |
y: ADD_INT ____, PV568.w, 4 | |
z: ADD_INT R0.z, PV568.w, 4100 | |
w: ADD_INT ____, PV568.w, 2052 | |
570 x: LDS_READ2_RET QAB, PV569.w, PV569.y | |
z: ADD_64 ____, R1.y, R0.y | |
w: ADD_64 ____, R0.x, R1.x | |
571 x: LDS_READ_RET QA, R0.z | |
z: ADD_64 ____, PV570.w, R5.y | |
w: ADD_64 ____, PV570.z, R2.x | |
572 x: ADD_64 R1.x, R6.y, PV571.w | |
y: ADD_64 R1.y, R6.x, PV571.z | |
573 x: MOV R0.x, QA[568] | |
574 x: MOV R2.x, QB[568].pop VEC_120 | |
575 x: MOV R3.x, QA[569].pop | |
576 y: MOV R0.y, QA[570] | |
577 y: MOV R5.y, QB[570].pop VEC_120 | |
578 y: MOV R6.y, QA[571].pop | |
245 PUSH ADDR(250) | |
246 ALU: ADDR(1391) CNT(1) | |
579 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
247 JUMP POP_CNT(1) ADDR(250) | |
248 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
249 POP (1) ADDR(250) | |
250 ALU: ADDR(1392) CNT(29) | |
580 x: ADD_INT ____, 2064, R2.y | |
y: ADD_INT ____, 16, R2.y | |
z: ADD_INT R1.z, R1.z, 1 | |
w: ADD_INT R0.w, 4112, R2.y | |
581 x: LDS_READ2_RET QAB, PV580.x, PV580.y | |
w: LSHL ____, PV580.z, 3 | |
582 x: LDS_READ_RET QA, R0.w | |
y: ADD_INT ____, PV581.w, 4 | |
z: ADD_INT R0.z, PV581.w, 4100 | |
w: ADD_INT ____, PV581.w, 2052 | |
583 x: LDS_READ2_RET QAB, PV582.w, PV582.y | |
z: ADD_64 ____, R5.y, R0.y | |
w: ADD_64 ____, R2.x, R0.x | |
584 x: LDS_READ_RET QA, R0.z | |
z: ADD_64 ____, PV583.w, R6.y | |
w: ADD_64 ____, PV583.z, R3.x | |
585 x: ADD_64 R1.x, R1.y, PV584.w | |
y: ADD_64 R1.y, R1.x, PV584.z | |
586 x: MOV R0.x, QA[581] | |
587 x: MOV R2.x, QB[581].pop VEC_120 | |
588 x: MOV R3.x, QA[582].pop | |
589 y: MOV R0.y, QA[583] | |
590 y: MOV R5.y, QB[583].pop VEC_120 | |
591 y: MOV R6.y, QA[584].pop | |
251 PUSH ADDR(256) | |
252 ALU: ADDR(1421) CNT(1) | |
592 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
253 JUMP POP_CNT(1) ADDR(256) | |
254 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
255 POP (1) ADDR(256) | |
256 ALU: ADDR(1422) CNT(29) | |
593 x: ADD_INT ____, 2072, R2.y | |
y: ADD_INT ____, 24, R2.y | |
z: ADD_INT R1.z, R1.z, 1 | |
w: ADD_INT R0.w, 4120, R2.y | |
594 x: LDS_READ2_RET QAB, PV593.x, PV593.y | |
w: LSHL ____, PV593.z, 3 | |
595 x: LDS_READ_RET QA, R0.w | |
y: ADD_INT ____, PV594.w, 4 | |
z: ADD_INT R0.z, PV594.w, 4100 | |
w: ADD_INT ____, PV594.w, 2052 | |
596 x: LDS_READ2_RET QAB, PV595.w, PV595.y | |
z: ADD_64 ____, R5.y, R0.y | |
w: ADD_64 ____, R2.x, R0.x | |
597 x: LDS_READ_RET QA, R0.z | |
z: ADD_64 ____, PV596.w, R6.y | |
w: ADD_64 ____, PV596.z, R3.x | |
598 x: ADD_64 R1.x, R1.y, PV597.w | |
y: ADD_64 R1.y, R1.x, PV597.z | |
599 x: MOV R0.x, QA[594] | |
600 x: MOV R2.x, QB[594].pop VEC_120 | |
601 x: MOV R3.x, QA[595].pop | |
602 y: MOV R0.y, QA[596] | |
603 y: MOV R5.y, QB[596].pop VEC_120 | |
604 y: MOV R6.y, QA[597].pop | |
257 PUSH ADDR(262) | |
258 ALU: ADDR(1451) CNT(1) | |
605 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
259 JUMP POP_CNT(1) ADDR(262) | |
260 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
261 POP (1) ADDR(262) | |
262 ALU: ADDR(1452) CNT(29) | |
606 x: ADD_INT ____, 2080, R2.y | |
y: ADD_INT ____, 32, R2.y | |
z: ADD_INT R1.z, R1.z, 1 | |
w: ADD_INT R0.w, 4128, R2.y | |
607 x: LDS_READ2_RET QAB, PV606.x, PV606.y | |
w: LSHL ____, PV606.z, 3 | |
608 x: LDS_READ_RET QA, R0.w | |
y: ADD_INT ____, PV607.w, 4 | |
z: ADD_INT R0.z, PV607.w, 4100 | |
w: ADD_INT ____, PV607.w, 2052 | |
609 x: LDS_READ2_RET QAB, PV608.w, PV608.y | |
z: ADD_64 ____, R5.y, R0.y | |
w: ADD_64 ____, R2.x, R0.x | |
610 x: LDS_READ_RET QA, R0.z | |
z: ADD_64 ____, PV609.w, R6.y | |
w: ADD_64 ____, PV609.z, R3.x | |
611 x: ADD_64 R1.x, R1.y, PV610.w | |
y: ADD_64 R1.y, R1.x, PV610.z | |
612 x: MOV R0.x, QA[607] | |
613 x: MOV R2.x, QB[607].pop VEC_120 | |
614 x: MOV R3.x, QA[608].pop | |
615 y: MOV R0.y, QA[609] | |
616 y: MOV R2.y, QB[609].pop VEC_120 | |
617 y: MOV R5.y, QA[610].pop | |
263 PUSH ADDR(268) | |
264 ALU: ADDR(1481) CNT(1) | |
618 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
265 JUMP POP_CNT(1) ADDR(268) | |
266 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
267 POP (1) ADDR(268) | |
268 ALU: ADDR(1482) CNT(7) | |
619 x: ADD_64 ____, R2.y, R0.y | |
y: ADD_64 ____, R2.x, R0.x | |
w: ADD_INT R0.w, R1.z, 1 | |
620 z: ADD_64 ____, PV619.y, R5.y | |
w: ADD_64 ____, PV619.x, R3.x | |
621 x: ADD_64 R6.x, R1.y, PV620.w | |
y: ADD_64 R6.y, R1.x, PV620.z | |
269 ENDLOOP i1 PASS_JUMP_ADDR(238) | |
270 ALU: ADDR(1489) CNT(20) | |
622 z: LSHL R0.z, R0.w, 3 | |
623 x: LDS_READ_RET QA, PV622.z | |
y: ADD_INT R0.y, PV622.z, 4100 | |
z: ADD_INT ____, PV622.z, 2052 | |
w: ADD_INT ____, PV622.z, 4 | |
624 x: LDS_READ2_RET QAB, PV623.z, PV623.w | |
z: ADD_INT R0.z, 4096, R0.z | |
w: ADD_INT ____, 2048, R0.z | |
625 x: LDS_READ2_RET QAB, R0.y, PV624.w | |
626 x: LDS_READ_RET QA, R0.z | |
627 x: MOV R0.x, QA[623].pop | |
628 y: MOV R0.y, QA[624] | |
629 y: MOV R1.y, QB[624].pop VEC_120 | |
630 x: MOV R1.x, QB[625].pop VEC_120 | |
y: MOV R2.y, QA[625].pop | |
631 x: MOV R2.x, QA[626].pop | |
271 PUSH ADDR(276) | |
272 ALU: ADDR(1509) CNT(1) | |
632 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED | |
273 JUMP POP_CNT(1) ADDR(276) | |
274 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM | |
275 POP (1) ADDR(276) | |
276 ALU: ADDR(1510) CNT(6) | |
633 x: ADD_64 ____, R1.y, R0.y | |
y: ADD_64 ____, R0.x, R1.x | |
634 z: ADD_64 ____, PV633.y, R2.y | |
w: ADD_64 ____, PV633.x, R2.x | |
635 x: ADD_64 R4.x, R6.y, PV634.w | |
y: ADD_64 R4.y, R6.x, PV634.z | |
277 WAIT_ACK: Outstanding_acks <= 0 | |
278 ALU: ADDR(1516) CNT(1) | |
636 x: GROUP_BARRIER ____ | |
279 ALU: ADDR(1517) CNT(5) KCACHE0(CB1:0-15) | |
637 x: MOV R1.x, R4.y | |
y: ADD_INT ____, KC0[2].x, R3.y | |
638 x: LSHR R0.x, PV637.y, 2 | |
639 x: ADD_INT R2.x, 1, PV638.x | |
280 MEM_RAT_STORE_DWORD__NI: RAT(11)[R0].x___, R4, MARK VPM | |
281 MEM_RAT_STORE_DWORD__NI: RAT(11)[R2].x___, R1, MARK VPM | |
282 END | |
END_OF_PROGRAM | |
; ----------------- CS Data ------------------------ | |
; Input Semantic Mappings | |
; No input mappings | |
GprPoolSize = 0 | |
CodeLen = 12320;Bytes | |
PGM_END_CF = 0; words(64 bit) | |
PGM_END_ALU = 0; words(64 bit) | |
PGM_END_FETCH = 0; words(64 bit) | |
MaxScratchRegsNeeded = 0 | |
;AluPacking = 0.0 | |
;AluClauses = 0 | |
;PowerThrottleRate = 0.0 | |
; texResourceUsage[0] = 0x00000000 | |
; texResourceUsage[1] = 0x00000000 | |
; texResourceUsage[2] = 0x00000000 | |
; texResourceUsage[3] = 0x00000000 | |
; texResourceUsage[4] = 0x00000000 | |
; texResourceUsage[5] = 0x00000000 | |
; texResourceUsage[6] = 0x00000000 | |
; texResourceUsage[7] = 0x00000000 | |
; fetch4ResourceUsage[0] = 0x00000000 | |
; fetch4ResourceUsage[1] = 0x00000000 | |
; fetch4ResourceUsage[2] = 0x00000000 | |
; fetch4ResourceUsage[3] = 0x00000000 | |
; fetch4ResourceUsage[4] = 0x00000000 | |
; fetch4ResourceUsage[5] = 0x00000000 | |
; fetch4ResourceUsage[6] = 0x00000000 | |
; fetch4ResourceUsage[7] = 0x00000000 | |
; texSamplerUsage = 0x00000000 | |
; constBufUsage = 0x00000000 | |
ResourcesAffectAlphaOutput[0] = 0x00000000 | |
ResourcesAffectAlphaOutput[1] = 0x00000000 | |
ResourcesAffectAlphaOutput[2] = 0x00000000 | |
ResourcesAffectAlphaOutput[3] = 0x00000000 | |
ResourcesAffectAlphaOutput[4] = 0x00000000 | |
ResourcesAffectAlphaOutput[5] = 0x00000000 | |
ResourcesAffectAlphaOutput[6] = 0x00000000 | |
ResourcesAffectAlphaOutput[7] = 0x00000000 | |
;SQ_PGM_RESOURCES = 0x30000309 | |
SQ_PGM_RESOURCES:NUM_GPRS = 9 | |
SQ_PGM_RESOURCES:STACK_SIZE = 3 | |
SQ_PGM_RESOURCES:PRIME_CACHE_ENABLE = 1 | |
;SQ_PGM_RESOURCES_2 = 0x000000C0 | |
SQ_LDS_ALLOC:SIZE = 0x00002000 | |
; RatOpIsUsed = 0x800 | |
; RatAtomicOpIsUsed = 0x800 | |
; NumThreadPerGroupFlattened = 256 | |
; NumThreadPerGroup_x = 256 | |
; NumThreadPerGroup_y = 1 | |
; NumThreadPerGroup_z = 1 | |
; SetBufferForNumGroup = true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment