Skip to content

Instantly share code, notes, and snippets.

@arsenm
Created October 18, 2011 16:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arsenm/1295851 to your computer and use it in GitHub Desktop.
Save arsenm/1295851 to your computer and use it in GitHub Desktop.
AMD iteration bug?
/* Launch with global size = 6144, local size = 256
Linux x86_64 (kernel 3.0), Catalyst 11.9, SDK 2.5, building for Cayman
*/
#if cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#elif cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#else
#error Missing double precision extension
#endif
__attribute__ ((reqd_work_group_size(256, 1, 1)))
__kernel void iterationBug(__global volatile int* debug,
__global volatile double* restrict posX,
__global volatile double* restrict accX)
{
__local double xs[256];
__local double ys[256];
__local double zs[256];
int i = get_global_id(0);
double px = posX[i];
double ax = 0.0;
for (int j = 0; j < 24; ++j)
{
/* If I remove this "j" loop, the inner loop is executed the
* correct number of times */
if (i == 0)
{
/* This should be 24, and it is. */
atomic_inc(&debug[0]);
}
xs[get_local_id(0)] = px;
ys[get_local_id(0)] = px;
zs[get_local_id(0)] = px;
barrier(CLK_LOCAL_MEM_FENCE);
/* The loop executes the correct number of times if I unroll it */
//#pragma unroll 8
for (int k = 0; k < 256; ++k)
{
/* Changing the amount of work in here seems to change the
number of iterations which actually happen. If I reduce
the work, it happens correctly.
*/
ax += xs[k] + ys[k] + zs[k];
if (i == 0)
{
/* At end this should be equal to 24 * 256. However in this case it is only 1452 */
atomic_inc(&debug[1]);
}
}
barrier(CLK_LOCAL_MEM_FENCE);
}
accX[i] = ax;
}
mdef(16383)_out(1)_in(2)
mov r0, in0
mov r1, in1
div_zeroop(infinity) r0.x___, r0.x, r1.x
mov out0, r0
mend
il_cs_2_0
dcl_cb cb0[10] ; Constant buffer that holds ABI data
dcl_literal l0, 4, 1, 2, 3
dcl_literal l1, 0x00FFFFFF, -1, -2, -3
dcl_literal l2, 0x0000FFFF, 0xFFFFFFFE,0x000000FF,0xFFFFFFFC
dcl_literal l3, 24, 16, 8, 0xFFFFFFFF
dcl_literal l4, 0xFFFFFF00, 0xFFFF0000, 0xFF00FFFF, 0xFFFF00FF
dcl_literal l5, 0, 4, 8, 12
dcl_literal l6, 32, 32, 32, 32
dcl_literal l7, 24, 31, 16, 31
call 1041;$
endmain
func 1041 ; __OpenCL_iterationBug_kernel
mov r1013, cb0[8].x
mov r1019, l1.0
dcl_num_thread_per_group 256, 1, 1
dcl_lds_id(1) 32768
dcl_raw_uav_id(11)
dcl_arena_uav_id(8)
mov r0.z, vThreadGrpIdFlat.x
mov r1022.xyz0, vTidInGrp.xyz
mov r1023.xyz0, vThreadGrpId.xyz
imad r1021.xyz0, r1023.xyz0, cb0[1].xyz0, r1022.xyz0
iadd r1021.xyz0, r1021.xyz0, cb0[6].xyz0
iadd r1023.xyz0, r1023.xyz0, cb0[7].xyz0
mov r1023.w, r0.z
ishl r1023.w, r1023.w, l0.z
mov r1018.x, l0.0
dcl_literal l14, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f32:i32 0
dcl_literal l18, 0x00000001, 0x00000001, 0x00000001, 0x00000001; f32:i32 1
dcl_literal l11, 0x00000003, 0x00000003, 0x00000003, 0x00000003; f32:i32 3
dcl_literal l10, 0x00000004, 0x00000004, 0x00000004, 0x00000004; f32:i32 4
dcl_literal l19, 0x0000000b, 0x0000000b, 0x0000000b, 0x0000000b; f32:i32 11
dcl_literal l21, 0x0000000c, 0x0000000c, 0x0000000c, 0x0000000c; f32:i32 12
dcl_literal l13, 0x00000018, 0x00000018, 0x00000018, 0x00000018; f32:i32 24
dcl_literal l20, 0x00000100, 0x00000100, 0x00000100, 0x00000100; f32:i32 256
dcl_literal l16, 0x00000800, 0x00000800, 0x00000800, 0x00000800; f32:i32 2048
dcl_literal l17, 0x00001000, 0x00001000, 0x00001000, 0x00001000; f32:i32 4096
dcl_literal l15, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff; f32:i32 4294967295
dcl_literal l12, 0x00000000, 0x00000000, 0x00000000, 0x00000000; f64:i64 0
dcl_cb cb1[3]
; Kernel arg setup: debug
mov r1, cb1[0]
; Kernel arg setup: posX
mov r2, cb1[1]
; Kernel arg setup: accX
mov r3, cb1[2]
call 1061 ; iterationBug
ret
endfunc ; __OpenCL_iterationBug_kernel
;ARGSTART:__OpenCL_iterationBug_kernel
;version:2:0:68
;device:cayman
;uniqueid:1041
;memory:hwprivate:0
;memory:hwregion:0
;memory:hwlocal:0
;cws:256:1:1
;pointer:debug:i32:1:1:0:uav:11:8
;pointer:posX:double:1:1:16:uav:11:16
;pointer:accX:double:1:1:32:uav:11:16
;function:1:1061
;uavid:11
;ARGEND:__OpenCL_iterationBug_kernel
func 1061 ; iterationBug ; @__OpenCL_iterationBug_kernel
; BB#0: ; %entry
mov r254, r3
mov r256, r1
mov r257, l10.xxxx
iadd r257.x___, r256.xxxx, r257.xxxx
mov r258, r1021.xyz0
mov r258, r258.x000
mov r259, l11.xxxx
ishl r260.x___, r258.xxxx, r259.xxxx
iadd r255.x___, r2.xxxx, r260.xxxx
mov r1010.x___, r255.xxxx
uav_raw_load_id(11)_cached r1011.xy__, r1010.xxxx
mov r255.xy__, r1011.xyxy
mov r260, l12.xyxy
mov r261, l13.xxxx
mov r262, l14.xxxx
mov r263, l15.xxxx
mov r264, l16.xxxx
mov r265, l17.xxxx
mov r266, l18.xxxx
mov r267, l19.xxxx
mov r268, l20.xxxx
mov r269, l21.xxxx
whileloop
ieq r270.x___, r258.xxxx, r262.xxxx
if_logicalnz r270.xxxx
uav_uinc_id(11) r256.xxxx, r263.xxxx
else
endif
mov r270, r1022.xyz0
mov r270, r270.x000
ishl r270.x___, r270.xxxx, r259.xxxx
mov r271, l14.xxxx
iadd r270.x___, r271.xxxx, r270.xxxx
mov r1011.xy__, r255.xyxy
mov r1010.x___, r270.xxxx
lds_store_vec_id(1) mem.xy__, r1010.xxxx, r1011.xyxy, r1011.xyxy
mov r270, r1022.xyz0
mov r270, r270.x000
ishl r270.x___, r270.xxxx, r259.xxxx
iadd r270.x___, r264.xxxx, r270.xxxx
mov r1011.xy__, r255.xyxy
mov r1010.x___, r270.xxxx
lds_store_vec_id(1) mem.xy__, r1010.xxxx, r1011.xyxy, r1011.xyxy
mov r270, r1022.xyz0
mov r270, r270.x000
ishl r270.x___, r270.xxxx, r259.xxxx
iadd r270.x___, r265.xxxx, r270.xxxx
mov r1011.xy__, r255.xyxy
mov r1010.x___, r270.xxxx
lds_store_vec_id(1) mem.xy__, r1010.xxxx, r1011.xyxy, r1011.xyxy
fence_threads_memory_lds
whileloop
ishl r270.x___, r271.xxxx, r259.xxxx
iadd r272.x___, r264.xxxx, r270.xxxx
mov r1010.x___, r272.xxxx
lds_load_vec_id(1) r1011.xy__, r1010.xxxx, r1010.xxxx
mov r272.xy__, r1011.xyxy
iadd r273.x___, r262.xxxx, r270.xxxx
mov r1010.x___, r273.xxxx
lds_load_vec_id(1) r1011.xy__, r1010.xxxx, r1010.xxxx
mov r273.xy__, r1011.xyxy
iadd r270.x___, r265.xxxx, r270.xxxx
mov r1010.x___, r270.xxxx
lds_load_vec_id(1) r1011.xy__, r1010.xxxx, r1010.xxxx
mov r270.xy__, r1011.xyxy
ieq r274.x___, r258.xxxx, r262.xxxx
if_logicalnz r274.xxxx
uav_uinc_id(11) r257.xxxx, r263.xxxx
else
endif
dadd r272.xy__, r273.xyxy, r272.xyxy
dadd r270.xy__, r272.xyxy, r270.xyxy
dadd r260.xy__, r260.xyxy, r270.xyxy
iadd r271.x___, r271.xxxx, r266.xxxx
ieq r270.x___, r271.xxxx, r268.xxxx
break_logicalnz r270.xxxx
endloop
fence_threads_memory_lds
iadd r261.x___, r261.xxxx, r263.xxxx
ieq r271.x___, r261.xxxx, r262.xxxx
break_logicalnz r271.xxxx
endloop
mov r255, l11.xxxx
ishl r255.x___, r258.xxxx, r255.xxxx
iadd r254.x___, r254.xxxx, r255.xxxx
mov r1011.xy__, r260.xyxy
mov r1010.x___, r254.xxxx
uav_raw_store_id(11) mem.xy__, r1010.xxxx, r1011.xyxy
ret
endfunc ; iterationBug
;ARGSTART:iterationBug
;uniqueid:1061
;memory:hwregion:0
;memory:hwlocal:6144
;ARGEND:iterationBug
mdef(232)_out(1)_in(2)
mov r0, in0
mov r1, in1
dcl_literal l9, 12, 2, 0x7ff00000, 0x00100000
ixor r2.x, r0.y, r1.y
ige r3.x, r0.y_abs, l9.z
ige r3.y, r1.y_abs, l9.z
ilt r3.z, r0.y_abs, l9.w
ilt r3.w, r1.y_abs, l9.w
ixor r2.x, r2.x_abs, r2.x
dfrexp r20, r0
dfrexp r21, r1
ishl r0.z, r0.y, l9.x
ishl r1.z, r1.y, l9.x
ior r0.z, r0.z, r0.x
ior r1.z, r1.z, r1.x
ieq r0.z, r0.z, r0.0
ieq r1.z, r1.z, r0.0
mov r20.w, r20.wwww_abs
mov r21.w, r21.wwww_abs
dcl_literal l1, 0x00000000, 0x3ff00000, 0x00000001, 0
dcl_literal l2, 0x16f0068e, 0x40075048, 0x00000000, 0xc0000000
dcl_literal l3, 0x00040000, 0x00000300, 0, 0
drcp_zeroop(zero) r22.xy, r21.zw
dmad r23.xy, r21.zw, r22.xy, l2.zw
dmul r22.xy, r22.xy, r23.xy_neg(yw)
dmad r23.xy, r21.zw, r22.xy, l2.zw
dmul r22.xy, r22.xy, r23.xy_neg(yw)
dcl_literal l6, 0x80000000, 0x7fffffff, 0xf8000000, 0xffffffff
dmul r10.xy, r22.xy, r20.zw
dmad r23.xy, r21.zw, r10.xy_neg(yw), r20.zw
dmad r10.xy, r22.xy, r23.xy, r10.xy
dmad r23.xy, r21.zw, r22.xy_neg(yw), l1.xy
dmul r23.xy, r23.xy, r22.xy
dmad r24.xy, r21.zw, r10.xy_neg(yw), r20.zw
dmul r25.xy, r23.xy, r24.xy
dmad r23.xy, r22.xy, r24.xy, r25.xy
ior r23.y, r23.y, l3.x
inegate r21.y, r21.y
iadd r20.x, r20.y, r21.y
imin r20.y, r20.x, l3.y
inegate r21.y, r20.y
iadd r20.x, r20.x, r21.y
dldexp r11.xy, r10.xy, r20.y
dldexp r12.xy, r11.xy, r21.y
dadd r10.xy, r10.xy, r12.xy_neg(yw)
dadd r10.xy, r10.xy, r23.xy
dldexp r12.xy, l1.xy, r20.y
dmad r10.xy, r10.xy, r12.xy, r11.xy
dldexp r10.xy, r10.xy, r20.x
imad r4.x, r3.x, l9.y, r3.z
imad r4.y, r3.y, l9.y, r3.w
imad r4.x, r4.x, l9.y, r0.z
imad r4.y, r4.y, l9.y, r1.z
ishl r4.x, r4.x, l9.y
ishl r4.y, r4.y, l9.y
dcl_literal l7, 0x80008000, 0x66ff66ff, 0x26f326f3, 52
dcl_literal l8, 0x0048c000, 0x00123000, 0x80000000, 31
ushr r4.z, l8.x, r4.x
ushr r4.w, l8.y, r4.y
ior r4.w, r4.w, r4.z
ishl r5.x, l7.x, r4.w
ishl r5.y, l7.y, r4.w
ishl r5.z, l7.z, r4.w
ishr r5.x, r5.x, l8.w
ishr r5.y, r5.y, l8.w
ishr r5.z, r5.z, l8.w
ishl r5.y, r5.y, l7.w
and r10.xy, r10.xy, r5.xx
ior r10.y, r10.y, r5.y_abs
ior r10.x, r10.x, r5.z
ixor r10.y, r10.y_abs, r2.x
mov r0.xy, r10.xy
mov out0, r0
mend
mdef(129)_out(1)_in(1)
mov r0, in0
dcl_literal l1, 0x00000001, 0x3ff00000, 0x000fffff, 0x3ff80000
dcl_literal l2, 0xfff80000, 0x3fe00000, -1, 0
dcl_literal l3, 0x10000000, 256, -128, 0x7FF00000
ilt r10.x, r0.y, l2.0
deq r10.y, r0.xy, l2.00
ult r10.z, r0.y, l3.x
deq r10.w, r0.xy, l3.0w
ior r10.w, r10.w, r10.y
cmov_logical r4.x, r10.z, l3.y, l3.0
cmov_logical r10.z, r10.z, l3.z, l3.0
dldexp r5.xy, r0.xy, r4.x
dsqrt r1.xy, r5.xy
mov r1.x, l2.0
drcp_zeroop(infinity) r3.xy, r1.xy
mov r3.x, l2.0
dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw)
dldexp r4.xy, r4.xy, l2.z
dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy
dmad r4.xy, r1.xy_neg(yw), r3.xy, l1.0y
dmad r3.xy, r3.xy, r4.xy, r3.xy
dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw)
dldexp r4.xy, r4.xy, l2.z
dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy
dmad r4.xy, r1.xy, r1.xy, r5.xy_neg(yw)
dldexp r4.xy, r4.xy, l2.z
dmad r1.xy, r4.xy, r3.xy_neg(yw), r1.xy
dldexp r1.xy, r1.xy, r10.z
cmov_logical r1.xy, r10.xx, l2.0x, r1.xy
cmov_logical r0.xy, r10.ww, r0.xy, r1.xy
mov out0, r0
mend
end
ShaderType = IL_SHADER_COMPUTE
TargetChip = c
; ------------- SC_SRCSHADER Dump ------------------
SC_SHADERSTATE: u32NumIntVSConst = 0
SC_SHADERSTATE: u32NumIntPSConst = 0
SC_SHADERSTATE: u32NumIntGSConst = 0
SC_SHADERSTATE: u32NumBoolVSConst = 0
SC_SHADERSTATE: u32NumBoolPSConst = 0
SC_SHADERSTATE: u32NumBoolGSConst = 0
SC_SHADERSTATE: u32NumFloatVSConst = 0
SC_SHADERSTATE: u32NumFloatPSConst = 0
SC_SHADERSTATE: u32NumFloatGSConst = 0
fConstantsAvailable = 1025537139
iConstantsAvailable = 2573
bConstantsAvailable = 1634494817
u32SCOptions[0] = 0x01A00000 SCOption_IGNORE_SAMPLE_L_BUG SCOption_FLOAT_DO_NOT_DIST SCOption_FLOAT_DO_NOT_REASSOC
u32SCOptions[1] = 0x00202000 SCOption_R600_ERROR_ON_DOUBLE_MEMEXP SCOption_SET_VPM_FOR_SCATTER
u32SCOptions[2] = 0x00020041 SCOption_R800_UAV_NONARRAY_FIXUP SCOption_R800_UAV_NONUAV_SYNC_WORKAROUND_BUG216513_1 SCOption_R900_BRANCH_IN_NESTED_LOOPS_WORKAROUND_BUG281276
; -------- Disassembly --------------------
00 ALU: ADDR(288) CNT(24) KCACHE0(CB0:0-15) KCACHE1(CB1:0-15)
0 x: MULLO_INT ____, R1.x, KC0[1].x
y: MULLO_INT ____, R1.x, KC0[1].x
z: MULLO_INT ____, R1.x, KC0[1].x
w: MULLO_INT ____, R1.x, KC0[1].x
1 x: MBCNT_32LO_ACCUM_PREV_INT ____, -1
y: MBCNT_32HI_INT ____, -1
z: MULADD_UINT24 R127.z, SE_ID, 256, WAVE_ID
w: ADD_INT ____, R0.x, PV0.y
2 x: MULADD_UINT24 R3.x, PV1.z, 64, PV1.x
z: ADD_INT R0.z, PV1.w, KC0[6].x
w: ADD_INT R1.w, KC1[0].x, 4
3 x: MOV R5.x, -1
y: LSHL R3.y, PV2.z, 3
4 x: MOV R6.x, 0.0f
y: MOV R1.y, R3.x
z: ADD_INT ____, KC1[1].x, PV3.y
5 x: LSHR R1.x, PV4.z, 2
y: ADD_INT R2.y, R3.x, 32768
6 x: ADD_INT R2.x, PV5.x, 1
y: MOV R6.y, 0.0f
01 MEM_RAT_NOP_RTN: RAT(11)[R1], R1, MARK VPM
02 MEM_RAT_NOP_RTN_ACK: RAT(11)[R2], R2, MARK VPM
03 ALU: ADDR(312) CNT(4) KCACHE0(CB1:0-15)
7 x: LSHR R2.x, KC0[0].x, 2
8 x: LSHR R4.x, R1.w, 2
04 WAIT_ACK: Outstanding_acks <= 0
05 TEX: ADDR(1536) CNT(2)
9 VFETCH R7.x___, R3.x, fc165
FETCH_TYPE(NO_INDEX_OFFSET)
10 VFETCH R3.x___, R2.y, fc165
FETCH_TYPE(NO_INDEX_OFFSET)
06 ALU: ADDR(316) CNT(1)
11 y: MOV R2.y, R3.x
07 LOOP_NO_AL i2 FAIL_JUMP_ADDR(229)
08 ALU_PUSH_BEFORE: ADDR(317) CNT(2)
12 w: SETE_INT R0.w, R0.z, 0.0f
13 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
09 JUMP POP_CNT(1) ADDR(12)
10 MEM_RAT_INC_UINT_ACK: RAT(11)[R2], R5, MARK VPM
11 POP (1) ADDR(12)
12 ALU: ADDR(319) CNT(14)
14 y: LSHL ____, R0.x, 3
15 x: LDS_WRITE_REL (offset: 1) ____, PV14.y, R7.x, R2.y
y: ADD_INT ____, PV14.y, 2048
z: ADD_INT R1.z, PV14.y, 4096
16 x: LDS_WRITE ____, PV15.y, R7.x
y: ADD_INT R1.y, PV15.z, 4
w: ADD_INT ____, PV15.y, 4
17 x: LDS_WRITE ____, PV16.w, R2.y
18 x: LDS_WRITE ____, R1.z, R7.x
19 x: LDS_XCHG_RET QA, R1.y, R2.y
20 y: MOV ____, QA[19].pop
13 WAIT_ACK: Outstanding_acks <= 0
14 ALU: ADDR(333) CNT(1)
21 x: GROUP_BARRIER ____
15 ALU_PUSH_BEFORE: ADDR(334) CNT(13)
22 x: LDS_READ2_RET QAB, 2052, 4
23 x: LDS_READ2_RET QAB, 4100, 2048
24 x: LDS_READ2_RET QAB, 0.0f, 4096
25 y: MOV R0.y, QA[22]
26 y: MOV R1.y, QB[22].pop VEC_120
27 x: MOV R1.x, QB[23].pop VEC_120
y: MOV R4.y, QA[23].pop
28 x: MOV R3.x, QA[24]
29 x: MOV R8.x, QB[24].pop VEC_120
30 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
16 JUMP POP_CNT(1) ADDR(19)
17 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
18 POP (1) ADDR(19)
19 ALU_PUSH_BEFORE: ADDR(347) CNT(19)
31 x: LDS_READ2_RET QAB, 2060, 12
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
32 x: LDS_READ2_RET QAB, 4108, 2056
z: ADD_64 R1.z, PV31.w, R4.y
w: ADD_64 R1.w, PV31.z, R8.x
33 x: LDS_READ2_RET QAB, 8, 4104
34 x: ADD_64 R8.x, R6.y, R1.w
y: ADD_64 R8.y, R6.x, R1.z
35 y: MOV R0.y, QA[31]
36 y: MOV R1.y, QB[31].pop VEC_120
37 x: MOV R1.x, QB[32].pop VEC_120
y: MOV R4.y, QA[32].pop
38 x: MOV R3.x, QA[33]
39 x: MOV R6.x, QB[33].pop VEC_120
40 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
20 JUMP POP_CNT(1) ADDR(23)
21 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
22 POP (1) ADDR(23)
23 ALU_PUSH_BEFORE: ADDR(366) CNT(19)
41 x: LDS_READ2_RET QAB, 2068, 20
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
42 x: LDS_READ2_RET QAB, 4116, 2064
z: ADD_64 R1.z, PV41.w, R4.y
w: ADD_64 R1.w, PV41.z, R6.x
43 x: LDS_READ2_RET QAB, 16, 4112
44 x: ADD_64 R8.x, R8.y, R1.w
y: ADD_64 R8.y, R8.x, R1.z
45 y: MOV R0.y, QA[41]
46 y: MOV R1.y, QB[41].pop VEC_120
47 x: MOV R1.x, QB[42].pop VEC_120
y: MOV R4.y, QA[42].pop
48 x: MOV R3.x, QA[43]
49 x: MOV R6.x, QB[43].pop VEC_120
50 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
24 JUMP POP_CNT(1) ADDR(27)
25 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
26 POP (1) ADDR(27)
27 ALU_PUSH_BEFORE: ADDR(385) CNT(19)
51 x: LDS_READ2_RET QAB, 2076, 28
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
52 x: LDS_READ2_RET QAB, 4124, 2072
z: ADD_64 R1.z, PV51.w, R4.y
w: ADD_64 R1.w, PV51.z, R6.x
53 x: LDS_READ2_RET QAB, 24, 4120
54 x: ADD_64 R8.x, R8.y, R1.w
y: ADD_64 R8.y, R8.x, R1.z
55 y: MOV R0.y, QA[51]
56 y: MOV R1.y, QB[51].pop VEC_120
57 x: MOV R1.x, QB[52].pop VEC_120
y: MOV R4.y, QA[52].pop
58 x: MOV R3.x, QA[53]
59 x: MOV R6.x, QB[53].pop VEC_120
60 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
28 JUMP POP_CNT(1) ADDR(31)
29 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
30 POP (1) ADDR(31)
31 ALU_PUSH_BEFORE: ADDR(404) CNT(19)
61 x: LDS_READ2_RET QAB, 2084, 36
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
62 x: LDS_READ2_RET QAB, 4132, 2080
z: ADD_64 R1.z, PV61.w, R4.y
w: ADD_64 R1.w, PV61.z, R6.x
63 x: LDS_READ2_RET QAB, 32, 4128
64 x: ADD_64 R8.x, R8.y, R1.w
y: ADD_64 R8.y, R8.x, R1.z
65 y: MOV R0.y, QA[61]
66 y: MOV R1.y, QB[61].pop VEC_120
67 x: MOV R1.x, QB[62].pop VEC_120
y: MOV R4.y, QA[62].pop
68 x: MOV R3.x, QA[63]
69 x: MOV R6.x, QB[63].pop VEC_120
70 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
32 JUMP POP_CNT(1) ADDR(35)
33 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
34 POP (1) ADDR(35)
35 ALU_PUSH_BEFORE: ADDR(423) CNT(19)
71 x: LDS_READ2_RET QAB, 2092, 44
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
72 x: LDS_READ2_RET QAB, 4140, 2088
z: ADD_64 R1.z, PV71.w, R4.y
w: ADD_64 R1.w, PV71.z, R6.x
73 x: LDS_READ2_RET QAB, 40, 4136
74 x: ADD_64 R6.x, R8.y, R1.w
y: ADD_64 R6.y, R8.x, R1.z
75 y: MOV R0.y, QA[71]
76 y: MOV R1.y, QB[71].pop VEC_120
77 x: MOV R1.x, QB[72].pop VEC_120
y: MOV R4.y, QA[72].pop
78 x: MOV R3.x, QA[73]
79 x: MOV R8.x, QB[73].pop VEC_120
80 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
36 JUMP POP_CNT(1) ADDR(39)
37 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
38 POP (1) ADDR(39)
39 ALU_PUSH_BEFORE: ADDR(442) CNT(19)
81 x: LDS_READ2_RET QAB, 2100, 52
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
82 x: LDS_READ2_RET QAB, 4148, 2096
z: ADD_64 R1.z, PV81.w, R4.y
w: ADD_64 R1.w, PV81.z, R8.x
83 x: LDS_READ2_RET QAB, 48, 4144
84 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
85 y: MOV R0.y, QA[81]
86 y: MOV R1.y, QB[81].pop VEC_120
87 x: MOV R1.x, QB[82].pop VEC_120
y: MOV R4.y, QA[82].pop
88 x: MOV R3.x, QA[83]
89 x: MOV R8.x, QB[83].pop VEC_120
90 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
40 JUMP POP_CNT(1) ADDR(43)
41 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
42 POP (1) ADDR(43)
43 ALU_PUSH_BEFORE: ADDR(461) CNT(19)
91 x: LDS_READ2_RET QAB, 2108, 60
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
92 x: LDS_READ2_RET QAB, 4156, 2104
z: ADD_64 R1.z, PV91.w, R4.y
w: ADD_64 R1.w, PV91.z, R8.x
93 x: LDS_READ2_RET QAB, 56, 4152
94 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
95 y: MOV R0.y, QA[91]
96 y: MOV R1.y, QB[91].pop VEC_120
97 x: MOV R1.x, QB[92].pop VEC_120
y: MOV R4.y, QA[92].pop
98 x: MOV R3.x, QA[93]
99 x: MOV R8.x, QB[93].pop VEC_120
100 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
44 JUMP POP_CNT(1) ADDR(47)
45 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
46 POP (1) ADDR(47)
47 ALU_PUSH_BEFORE: ADDR(480) CNT(19)
101 x: LDS_READ2_RET QAB, 2116, 68
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
102 x: LDS_READ2_RET QAB, 4164, 2112
z: ADD_64 R1.z, PV101.w, R4.y
w: ADD_64 R1.w, PV101.z, R8.x
103 x: LDS_READ2_RET QAB, 64, 4160
104 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
105 y: MOV R0.y, QA[101]
106 y: MOV R1.y, QB[101].pop VEC_120
107 x: MOV R1.x, QB[102].pop VEC_120
y: MOV R4.y, QA[102].pop
108 x: MOV R3.x, QA[103]
109 x: MOV R8.x, QB[103].pop VEC_120
110 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
48 JUMP POP_CNT(1) ADDR(51)
49 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
50 POP (1) ADDR(51)
51 ALU_PUSH_BEFORE: ADDR(499) CNT(19)
111 x: LDS_READ2_RET QAB, 2124, 76
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
112 x: LDS_READ2_RET QAB, 4172, 2120
z: ADD_64 R1.z, PV111.w, R4.y
w: ADD_64 R1.w, PV111.z, R8.x
113 x: LDS_READ2_RET QAB, 72, 4168
114 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
115 y: MOV R0.y, QA[111]
116 y: MOV R1.y, QB[111].pop VEC_120
117 x: MOV R1.x, QB[112].pop VEC_120
y: MOV R4.y, QA[112].pop
118 x: MOV R3.x, QA[113]
119 x: MOV R8.x, QB[113].pop VEC_120
120 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
52 JUMP POP_CNT(1) ADDR(55)
53 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
54 POP (1) ADDR(55)
55 ALU_PUSH_BEFORE: ADDR(518) CNT(19)
121 x: LDS_READ2_RET QAB, 2132, 84
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
122 x: LDS_READ2_RET QAB, 4180, 2128
z: ADD_64 R1.z, PV121.w, R4.y
w: ADD_64 R1.w, PV121.z, R8.x
123 x: LDS_READ2_RET QAB, 80, 4176
124 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
125 y: MOV R0.y, QA[121]
126 y: MOV R1.y, QB[121].pop VEC_120
127 x: MOV R1.x, QB[122].pop VEC_120
y: MOV R4.y, QA[122].pop
128 x: MOV R3.x, QA[123]
129 x: MOV R8.x, QB[123].pop VEC_120
130 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
56 JUMP POP_CNT(1) ADDR(59)
57 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
58 POP (1) ADDR(59)
59 ALU_PUSH_BEFORE: ADDR(537) CNT(19)
131 x: LDS_READ2_RET QAB, 2140, 92
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
132 x: LDS_READ2_RET QAB, 4188, 2136
z: ADD_64 R1.z, PV131.w, R4.y
w: ADD_64 R1.w, PV131.z, R8.x
133 x: LDS_READ2_RET QAB, 88, 4184
134 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
135 y: MOV R0.y, QA[131]
136 y: MOV R1.y, QB[131].pop VEC_120
137 x: MOV R1.x, QB[132].pop VEC_120
y: MOV R4.y, QA[132].pop
138 x: MOV R3.x, QA[133]
139 x: MOV R8.x, QB[133].pop VEC_120
140 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
60 JUMP POP_CNT(1) ADDR(63)
61 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
62 POP (1) ADDR(63)
63 ALU_PUSH_BEFORE: ADDR(556) CNT(19)
141 x: LDS_READ2_RET QAB, 2148, 100
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
142 x: LDS_READ2_RET QAB, 4196, 2144
z: ADD_64 R1.z, PV141.w, R4.y
w: ADD_64 R1.w, PV141.z, R8.x
143 x: LDS_READ2_RET QAB, 96, 4192
144 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
145 y: MOV R0.y, QA[141]
146 y: MOV R1.y, QB[141].pop VEC_120
147 x: MOV R1.x, QB[142].pop VEC_120
y: MOV R4.y, QA[142].pop
148 x: MOV R3.x, QA[143]
149 x: MOV R8.x, QB[143].pop VEC_120
150 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
64 JUMP POP_CNT(1) ADDR(67)
65 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
66 POP (1) ADDR(67)
67 ALU_PUSH_BEFORE: ADDR(575) CNT(19)
151 x: LDS_READ2_RET QAB, 2156, 108
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
152 x: LDS_READ2_RET QAB, 4204, 2152
z: ADD_64 R1.z, PV151.w, R4.y
w: ADD_64 R1.w, PV151.z, R8.x
153 x: LDS_READ2_RET QAB, 104, 4200
154 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
155 y: MOV R0.y, QA[151]
156 y: MOV R1.y, QB[151].pop VEC_120
157 x: MOV R1.x, QB[152].pop VEC_120
y: MOV R4.y, QA[152].pop
158 x: MOV R3.x, QA[153]
159 x: MOV R8.x, QB[153].pop VEC_120
160 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
68 JUMP POP_CNT(1) ADDR(71)
69 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
70 POP (1) ADDR(71)
71 ALU_PUSH_BEFORE: ADDR(594) CNT(19)
161 x: LDS_READ2_RET QAB, 2164, 116
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
162 x: LDS_READ2_RET QAB, 4212, 2160
z: ADD_64 R1.z, PV161.w, R4.y
w: ADD_64 R1.w, PV161.z, R8.x
163 x: LDS_READ2_RET QAB, 112, 4208
164 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
165 y: MOV R0.y, QA[161]
166 y: MOV R1.y, QB[161].pop VEC_120
167 x: MOV R1.x, QB[162].pop VEC_120
y: MOV R4.y, QA[162].pop
168 x: MOV R3.x, QA[163]
169 x: MOV R8.x, QB[163].pop VEC_120
170 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
72 JUMP POP_CNT(1) ADDR(75)
73 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
74 POP (1) ADDR(75)
75 ALU_PUSH_BEFORE: ADDR(613) CNT(19)
171 x: LDS_READ2_RET QAB, 2172, 124
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
172 x: LDS_READ2_RET QAB, 4220, 2168
z: ADD_64 R1.z, PV171.w, R4.y
w: ADD_64 R1.w, PV171.z, R8.x
173 x: LDS_READ2_RET QAB, 120, 4216
174 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
175 y: MOV R0.y, QA[171]
176 y: MOV R1.y, QB[171].pop VEC_120
177 x: MOV R1.x, QB[172].pop VEC_120
y: MOV R4.y, QA[172].pop
178 x: MOV R3.x, QA[173]
179 x: MOV R8.x, QB[173].pop VEC_120
180 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
76 JUMP POP_CNT(1) ADDR(79)
77 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
78 POP (1) ADDR(79)
79 ALU_PUSH_BEFORE: ADDR(632) CNT(19)
181 x: LDS_READ2_RET QAB, 2180, 132
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
182 x: LDS_READ2_RET QAB, 4228, 2176
z: ADD_64 R1.z, PV181.w, R4.y
w: ADD_64 R1.w, PV181.z, R8.x
183 x: LDS_READ2_RET QAB, 128, 4224
184 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
185 y: MOV R0.y, QA[181]
186 y: MOV R1.y, QB[181].pop VEC_120
187 x: MOV R1.x, QB[182].pop VEC_120
y: MOV R4.y, QA[182].pop
188 x: MOV R3.x, QA[183]
189 x: MOV R8.x, QB[183].pop VEC_120
190 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
80 JUMP POP_CNT(1) ADDR(83)
81 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
82 POP (1) ADDR(83)
83 ALU_PUSH_BEFORE: ADDR(651) CNT(19)
191 x: LDS_READ2_RET QAB, 2188, 140
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
192 x: LDS_READ2_RET QAB, 4236, 2184
z: ADD_64 R1.z, PV191.w, R4.y
w: ADD_64 R1.w, PV191.z, R8.x
193 x: LDS_READ2_RET QAB, 136, 4232
194 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
195 y: MOV R0.y, QA[191]
196 y: MOV R1.y, QB[191].pop VEC_120
197 x: MOV R1.x, QB[192].pop VEC_120
y: MOV R4.y, QA[192].pop
198 x: MOV R3.x, QA[193]
199 x: MOV R8.x, QB[193].pop VEC_120
200 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
84 JUMP POP_CNT(1) ADDR(87)
85 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
86 POP (1) ADDR(87)
87 ALU_PUSH_BEFORE: ADDR(670) CNT(19)
201 x: LDS_READ2_RET QAB, 2196, 148
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
202 x: LDS_READ2_RET QAB, 4244, 2192
z: ADD_64 R1.z, PV201.w, R4.y
w: ADD_64 R1.w, PV201.z, R8.x
203 x: LDS_READ2_RET QAB, 144, 4240
204 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
205 y: MOV R0.y, QA[201]
206 y: MOV R1.y, QB[201].pop VEC_120
207 x: MOV R1.x, QB[202].pop VEC_120
y: MOV R4.y, QA[202].pop
208 x: MOV R3.x, QA[203]
209 x: MOV R8.x, QB[203].pop VEC_120
210 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
88 JUMP POP_CNT(1) ADDR(91)
89 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
90 POP (1) ADDR(91)
91 ALU_PUSH_BEFORE: ADDR(689) CNT(19)
211 x: LDS_READ2_RET QAB, 2204, 156
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
212 x: LDS_READ2_RET QAB, 4252, 2200
z: ADD_64 R1.z, PV211.w, R4.y
w: ADD_64 R1.w, PV211.z, R8.x
213 x: LDS_READ2_RET QAB, 152, 4248
214 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
215 y: MOV R0.y, QA[211]
216 y: MOV R1.y, QB[211].pop VEC_120
217 x: MOV R1.x, QB[212].pop VEC_120
y: MOV R4.y, QA[212].pop
218 x: MOV R3.x, QA[213]
219 x: MOV R8.x, QB[213].pop VEC_120
220 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
92 JUMP POP_CNT(1) ADDR(95)
93 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
94 POP (1) ADDR(95)
95 ALU_PUSH_BEFORE: ADDR(708) CNT(19)
221 x: LDS_READ2_RET QAB, 2212, 164
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
222 x: LDS_READ2_RET QAB, 4260, 2208
z: ADD_64 R1.z, PV221.w, R4.y
w: ADD_64 R1.w, PV221.z, R8.x
223 x: LDS_READ2_RET QAB, 160, 4256
224 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
225 y: MOV R0.y, QA[221]
226 y: MOV R1.y, QB[221].pop VEC_120
227 x: MOV R1.x, QB[222].pop VEC_120
y: MOV R4.y, QA[222].pop
228 x: MOV R3.x, QA[223]
229 x: MOV R8.x, QB[223].pop VEC_120
230 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
96 JUMP POP_CNT(1) ADDR(99)
97 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
98 POP (1) ADDR(99)
99 ALU_PUSH_BEFORE: ADDR(727) CNT(19)
231 x: LDS_READ2_RET QAB, 2220, 172
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
232 x: LDS_READ2_RET QAB, 4268, 2216
z: ADD_64 R1.z, PV231.w, R4.y
w: ADD_64 R1.w, PV231.z, R8.x
233 x: LDS_READ2_RET QAB, 168, 4264
234 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
235 y: MOV R0.y, QA[231]
236 y: MOV R1.y, QB[231].pop VEC_120
237 x: MOV R1.x, QB[232].pop VEC_120
y: MOV R4.y, QA[232].pop
238 x: MOV R3.x, QA[233]
239 x: MOV R8.x, QB[233].pop VEC_120
240 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
100 JUMP POP_CNT(1) ADDR(103)
101 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
102 POP (1) ADDR(103)
103 ALU_PUSH_BEFORE: ADDR(746) CNT(19)
241 x: LDS_READ2_RET QAB, 2228, 180
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
242 x: LDS_READ2_RET QAB, 4276, 2224
z: ADD_64 R1.z, PV241.w, R4.y
w: ADD_64 R1.w, PV241.z, R8.x
243 x: LDS_READ2_RET QAB, 176, 4272
244 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
245 y: MOV R0.y, QA[241]
246 y: MOV R1.y, QB[241].pop VEC_120
247 x: MOV R1.x, QB[242].pop VEC_120
y: MOV R4.y, QA[242].pop
248 x: MOV R3.x, QA[243]
249 x: MOV R8.x, QB[243].pop VEC_120
250 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
104 JUMP POP_CNT(1) ADDR(107)
105 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
106 POP (1) ADDR(107)
107 ALU_PUSH_BEFORE: ADDR(765) CNT(19)
251 x: LDS_READ2_RET QAB, 2236, 188
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
252 x: LDS_READ2_RET QAB, 4284, 2232
z: ADD_64 R1.z, PV251.w, R4.y
w: ADD_64 R1.w, PV251.z, R8.x
253 x: LDS_READ2_RET QAB, 184, 4280
254 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
255 y: MOV R0.y, QA[251]
256 y: MOV R1.y, QB[251].pop VEC_120
257 x: MOV R1.x, QB[252].pop VEC_120
y: MOV R4.y, QA[252].pop
258 x: MOV R3.x, QA[253]
259 x: MOV R8.x, QB[253].pop VEC_120
260 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
108 JUMP POP_CNT(1) ADDR(111)
109 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
110 POP (1) ADDR(111)
111 ALU_PUSH_BEFORE: ADDR(784) CNT(19)
261 x: LDS_READ2_RET QAB, 2244, 196
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
262 x: LDS_READ2_RET QAB, 4292, 2240
z: ADD_64 R1.z, PV261.w, R4.y
w: ADD_64 R1.w, PV261.z, R8.x
263 x: LDS_READ2_RET QAB, 192, 4288
264 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
265 y: MOV R0.y, QA[261]
266 y: MOV R1.y, QB[261].pop VEC_120
267 x: MOV R1.x, QB[262].pop VEC_120
y: MOV R4.y, QA[262].pop
268 x: MOV R3.x, QA[263]
269 x: MOV R8.x, QB[263].pop VEC_120
270 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
112 JUMP POP_CNT(1) ADDR(115)
113 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
114 POP (1) ADDR(115)
115 ALU_PUSH_BEFORE: ADDR(803) CNT(19)
271 x: LDS_READ2_RET QAB, 2252, 204
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
272 x: LDS_READ2_RET QAB, 4300, 2248
z: ADD_64 R1.z, PV271.w, R4.y
w: ADD_64 R1.w, PV271.z, R8.x
273 x: LDS_READ2_RET QAB, 200, 4296
274 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
275 y: MOV R0.y, QA[271]
276 y: MOV R1.y, QB[271].pop VEC_120
277 x: MOV R1.x, QB[272].pop VEC_120
y: MOV R4.y, QA[272].pop
278 x: MOV R3.x, QA[273]
279 x: MOV R8.x, QB[273].pop VEC_120
280 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
116 JUMP POP_CNT(1) ADDR(119)
117 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
118 POP (1) ADDR(119)
119 ALU_PUSH_BEFORE: ADDR(822) CNT(19)
281 x: LDS_READ2_RET QAB, 2260, 212
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
282 x: LDS_READ2_RET QAB, 4308, 2256
z: ADD_64 R1.z, PV281.w, R4.y
w: ADD_64 R1.w, PV281.z, R8.x
283 x: LDS_READ2_RET QAB, 208, 4304
284 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
285 y: MOV R0.y, QA[281]
286 y: MOV R1.y, QB[281].pop VEC_120
287 x: MOV R1.x, QB[282].pop VEC_120
y: MOV R4.y, QA[282].pop
288 x: MOV R3.x, QA[283]
289 x: MOV R8.x, QB[283].pop VEC_120
290 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
120 JUMP POP_CNT(1) ADDR(123)
121 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
122 POP (1) ADDR(123)
123 ALU_PUSH_BEFORE: ADDR(841) CNT(19)
291 x: LDS_READ2_RET QAB, 2268, 220
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
292 x: LDS_READ2_RET QAB, 4316, 2264
z: ADD_64 R1.z, PV291.w, R4.y
w: ADD_64 R1.w, PV291.z, R8.x
293 x: LDS_READ2_RET QAB, 216, 4312
294 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
295 y: MOV R0.y, QA[291]
296 y: MOV R1.y, QB[291].pop VEC_120
297 x: MOV R1.x, QB[292].pop VEC_120
y: MOV R4.y, QA[292].pop
298 x: MOV R3.x, QA[293]
299 x: MOV R8.x, QB[293].pop VEC_120
300 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
124 JUMP POP_CNT(1) ADDR(127)
125 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
126 POP (1) ADDR(127)
127 ALU_PUSH_BEFORE: ADDR(860) CNT(19)
301 x: LDS_READ2_RET QAB, 2276, 228
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
302 x: LDS_READ2_RET QAB, 4324, 2272
z: ADD_64 R1.z, PV301.w, R4.y
w: ADD_64 R1.w, PV301.z, R8.x
303 x: LDS_READ2_RET QAB, 224, 4320
304 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
305 y: MOV R0.y, QA[301]
306 y: MOV R1.y, QB[301].pop VEC_120
307 x: MOV R1.x, QB[302].pop VEC_120
y: MOV R4.y, QA[302].pop
308 x: MOV R3.x, QA[303]
309 x: MOV R8.x, QB[303].pop VEC_120
310 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
128 JUMP POP_CNT(1) ADDR(131)
129 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
130 POP (1) ADDR(131)
131 ALU_PUSH_BEFORE: ADDR(879) CNT(19)
311 x: LDS_READ2_RET QAB, 2284, 236
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
312 x: LDS_READ2_RET QAB, 4332, 2280
z: ADD_64 R1.z, PV311.w, R4.y
w: ADD_64 R1.w, PV311.z, R8.x
313 x: LDS_READ2_RET QAB, 232, 4328
314 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
315 y: MOV R0.y, QA[311]
316 y: MOV R1.y, QB[311].pop VEC_120
317 x: MOV R1.x, QB[312].pop VEC_120
y: MOV R4.y, QA[312].pop
318 x: MOV R3.x, QA[313]
319 x: MOV R8.x, QB[313].pop VEC_120
320 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
132 JUMP POP_CNT(1) ADDR(135)
133 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
134 POP (1) ADDR(135)
135 ALU_PUSH_BEFORE: ADDR(898) CNT(19)
321 x: LDS_READ2_RET QAB, 2292, 244
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
322 x: LDS_READ2_RET QAB, 4340, 2288
z: ADD_64 R1.z, PV321.w, R4.y
w: ADD_64 R1.w, PV321.z, R8.x
323 x: LDS_READ2_RET QAB, 240, 4336
324 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
325 y: MOV R0.y, QA[321]
326 y: MOV R1.y, QB[321].pop VEC_120
327 x: MOV R1.x, QB[322].pop VEC_120
y: MOV R4.y, QA[322].pop
328 x: MOV R3.x, QA[323]
329 x: MOV R8.x, QB[323].pop VEC_120
330 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
136 JUMP POP_CNT(1) ADDR(139)
137 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
138 POP (1) ADDR(139)
139 ALU_PUSH_BEFORE: ADDR(917) CNT(19)
331 x: LDS_READ2_RET QAB, 2300, 252
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
332 x: LDS_READ2_RET QAB, 4348, 2296
z: ADD_64 R1.z, PV331.w, R4.y
w: ADD_64 R1.w, PV331.z, R8.x
333 x: LDS_READ2_RET QAB, 248, 4344
334 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
335 y: MOV R0.y, QA[331]
336 y: MOV R1.y, QB[331].pop VEC_120
337 x: MOV R1.x, QB[332].pop VEC_120
y: MOV R4.y, QA[332].pop
338 x: MOV R3.x, QA[333]
339 x: MOV R8.x, QB[333].pop VEC_120
340 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
140 JUMP POP_CNT(1) ADDR(143)
141 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
142 POP (1) ADDR(143)
143 ALU_PUSH_BEFORE: ADDR(936) CNT(19)
341 x: LDS_READ2_RET QAB, 2308, 260
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
342 x: LDS_READ2_RET QAB, 4356, 2304
z: ADD_64 R1.z, PV341.w, R4.y
w: ADD_64 R1.w, PV341.z, R8.x
343 x: LDS_READ2_RET QAB, 256, 4352
344 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
345 y: MOV R0.y, QA[341]
346 y: MOV R1.y, QB[341].pop VEC_120
347 x: MOV R1.x, QB[342].pop VEC_120
y: MOV R4.y, QA[342].pop
348 x: MOV R3.x, QA[343]
349 x: MOV R8.x, QB[343].pop VEC_120
350 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
144 JUMP POP_CNT(1) ADDR(147)
145 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
146 POP (1) ADDR(147)
147 ALU_PUSH_BEFORE: ADDR(955) CNT(19)
351 x: LDS_READ2_RET QAB, 2316, 268
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
352 x: LDS_READ2_RET QAB, 4364, 2312
z: ADD_64 R1.z, PV351.w, R4.y
w: ADD_64 R1.w, PV351.z, R8.x
353 x: LDS_READ2_RET QAB, 264, 4360
354 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
355 y: MOV R0.y, QA[351]
356 y: MOV R1.y, QB[351].pop VEC_120
357 x: MOV R1.x, QB[352].pop VEC_120
y: MOV R4.y, QA[352].pop
358 x: MOV R3.x, QA[353]
359 x: MOV R8.x, QB[353].pop VEC_120
360 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
148 JUMP POP_CNT(1) ADDR(151)
149 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
150 POP (1) ADDR(151)
151 ALU_PUSH_BEFORE: ADDR(974) CNT(19)
361 x: LDS_READ2_RET QAB, 2324, 276
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
362 x: LDS_READ2_RET QAB, 4372, 2320
z: ADD_64 R1.z, PV361.w, R4.y
w: ADD_64 R1.w, PV361.z, R8.x
363 x: LDS_READ2_RET QAB, 272, 4368
364 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
365 y: MOV R0.y, QA[361]
366 y: MOV R1.y, QB[361].pop VEC_120
367 x: MOV R1.x, QB[362].pop VEC_120
y: MOV R4.y, QA[362].pop
368 x: MOV R3.x, QA[363]
369 x: MOV R8.x, QB[363].pop VEC_120
370 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
152 JUMP POP_CNT(1) ADDR(155)
153 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
154 POP (1) ADDR(155)
155 ALU_PUSH_BEFORE: ADDR(993) CNT(19)
371 x: LDS_READ2_RET QAB, 2332, 284
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
372 x: LDS_READ2_RET QAB, 4380, 2328
z: ADD_64 R1.z, PV371.w, R4.y
w: ADD_64 R1.w, PV371.z, R8.x
373 x: LDS_READ2_RET QAB, 280, 4376
374 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
375 y: MOV R0.y, QA[371]
376 y: MOV R1.y, QB[371].pop VEC_120
377 x: MOV R1.x, QB[372].pop VEC_120
y: MOV R4.y, QA[372].pop
378 x: MOV R3.x, QA[373]
379 x: MOV R8.x, QB[373].pop VEC_120
380 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
156 JUMP POP_CNT(1) ADDR(159)
157 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
158 POP (1) ADDR(159)
159 ALU_PUSH_BEFORE: ADDR(1012) CNT(19)
381 x: LDS_READ2_RET QAB, 2340, 292
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
382 x: LDS_READ2_RET QAB, 4388, 2336
z: ADD_64 R1.z, PV381.w, R4.y
w: ADD_64 R1.w, PV381.z, R8.x
383 x: LDS_READ2_RET QAB, 288, 4384
384 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
385 y: MOV R0.y, QA[381]
386 y: MOV R1.y, QB[381].pop VEC_120
387 x: MOV R1.x, QB[382].pop VEC_120
y: MOV R4.y, QA[382].pop
388 x: MOV R3.x, QA[383]
389 x: MOV R8.x, QB[383].pop VEC_120
390 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
160 JUMP POP_CNT(1) ADDR(163)
161 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
162 POP (1) ADDR(163)
163 ALU_PUSH_BEFORE: ADDR(1031) CNT(19)
391 x: LDS_READ2_RET QAB, 2348, 300
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
392 x: LDS_READ2_RET QAB, 4396, 2344
z: ADD_64 R1.z, PV391.w, R4.y
w: ADD_64 R1.w, PV391.z, R8.x
393 x: LDS_READ2_RET QAB, 296, 4392
394 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
395 y: MOV R0.y, QA[391]
396 y: MOV R1.y, QB[391].pop VEC_120
397 x: MOV R1.x, QB[392].pop VEC_120
y: MOV R4.y, QA[392].pop
398 x: MOV R3.x, QA[393]
399 x: MOV R8.x, QB[393].pop VEC_120
400 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
164 JUMP POP_CNT(1) ADDR(167)
165 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
166 POP (1) ADDR(167)
167 ALU_PUSH_BEFORE: ADDR(1050) CNT(19)
401 x: LDS_READ2_RET QAB, 2356, 308
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
402 x: LDS_READ2_RET QAB, 4404, 2352
z: ADD_64 R1.z, PV401.w, R4.y
w: ADD_64 R1.w, PV401.z, R8.x
403 x: LDS_READ2_RET QAB, 304, 4400
404 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
405 y: MOV R0.y, QA[401]
406 y: MOV R1.y, QB[401].pop VEC_120
407 x: MOV R1.x, QB[402].pop VEC_120
y: MOV R4.y, QA[402].pop
408 x: MOV R3.x, QA[403]
409 x: MOV R8.x, QB[403].pop VEC_120
410 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
168 JUMP POP_CNT(1) ADDR(171)
169 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
170 POP (1) ADDR(171)
171 ALU_PUSH_BEFORE: ADDR(1069) CNT(19)
411 x: LDS_READ2_RET QAB, 2364, 316
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
412 x: LDS_READ2_RET QAB, 4412, 2360
z: ADD_64 R1.z, PV411.w, R4.y
w: ADD_64 R1.w, PV411.z, R8.x
413 x: LDS_READ2_RET QAB, 312, 4408
414 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
415 y: MOV R0.y, QA[411]
416 y: MOV R1.y, QB[411].pop VEC_120
417 x: MOV R1.x, QB[412].pop VEC_120
y: MOV R4.y, QA[412].pop
418 x: MOV R3.x, QA[413]
419 x: MOV R8.x, QB[413].pop VEC_120
420 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
172 JUMP POP_CNT(1) ADDR(175)
173 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
174 POP (1) ADDR(175)
175 ALU_PUSH_BEFORE: ADDR(1088) CNT(19)
421 x: LDS_READ2_RET QAB, 2372, 324
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
422 x: LDS_READ2_RET QAB, 4420, 2368
z: ADD_64 R1.z, PV421.w, R4.y
w: ADD_64 R1.w, PV421.z, R8.x
423 x: LDS_READ2_RET QAB, 320, 4416
424 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
425 y: MOV R0.y, QA[421]
426 y: MOV R1.y, QB[421].pop VEC_120
427 x: MOV R1.x, QB[422].pop VEC_120
y: MOV R4.y, QA[422].pop
428 x: MOV R3.x, QA[423]
429 x: MOV R8.x, QB[423].pop VEC_120
430 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
176 JUMP POP_CNT(1) ADDR(179)
177 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
178 POP (1) ADDR(179)
179 ALU_PUSH_BEFORE: ADDR(1107) CNT(19)
431 x: LDS_READ2_RET QAB, 2380, 332
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
432 x: LDS_READ2_RET QAB, 4428, 2376
z: ADD_64 R1.z, PV431.w, R4.y
w: ADD_64 R1.w, PV431.z, R8.x
433 x: LDS_READ2_RET QAB, 328, 4424
434 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
435 y: MOV R0.y, QA[431]
436 y: MOV R1.y, QB[431].pop VEC_120
437 x: MOV R1.x, QB[432].pop VEC_120
y: MOV R4.y, QA[432].pop
438 x: MOV R3.x, QA[433]
439 x: MOV R8.x, QB[433].pop VEC_120
440 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
180 JUMP POP_CNT(1) ADDR(183)
181 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
182 POP (1) ADDR(183)
183 ALU_PUSH_BEFORE: ADDR(1126) CNT(19)
441 x: LDS_READ2_RET QAB, 2388, 340
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
442 x: LDS_READ2_RET QAB, 4436, 2384
z: ADD_64 R1.z, PV441.w, R4.y
w: ADD_64 R1.w, PV441.z, R8.x
443 x: LDS_READ2_RET QAB, 336, 4432
444 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
445 y: MOV R0.y, QA[441]
446 y: MOV R1.y, QB[441].pop VEC_120
447 x: MOV R1.x, QB[442].pop VEC_120
y: MOV R4.y, QA[442].pop
448 x: MOV R3.x, QA[443]
449 x: MOV R8.x, QB[443].pop VEC_120
450 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
184 JUMP POP_CNT(1) ADDR(187)
185 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
186 POP (1) ADDR(187)
187 ALU_PUSH_BEFORE: ADDR(1145) CNT(19)
451 x: LDS_READ2_RET QAB, 2396, 348
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
452 x: LDS_READ2_RET QAB, 4444, 2392
z: ADD_64 R1.z, PV451.w, R4.y
w: ADD_64 R1.w, PV451.z, R8.x
453 x: LDS_READ2_RET QAB, 344, 4440
454 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
455 y: MOV R0.y, QA[451]
456 y: MOV R1.y, QB[451].pop VEC_120
457 x: MOV R1.x, QB[452].pop VEC_120
y: MOV R4.y, QA[452].pop
458 x: MOV R3.x, QA[453]
459 x: MOV R8.x, QB[453].pop VEC_120
460 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
188 JUMP POP_CNT(1) ADDR(191)
189 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
190 POP (1) ADDR(191)
191 ALU_PUSH_BEFORE: ADDR(1164) CNT(19)
461 x: LDS_READ2_RET QAB, 2404, 356
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
462 x: LDS_READ2_RET QAB, 4452, 2400
z: ADD_64 R1.z, PV461.w, R4.y
w: ADD_64 R1.w, PV461.z, R8.x
463 x: LDS_READ2_RET QAB, 352, 4448
464 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
465 y: MOV R0.y, QA[461]
466 y: MOV R1.y, QB[461].pop VEC_120
467 x: MOV R1.x, QB[462].pop VEC_120
y: MOV R4.y, QA[462].pop
468 x: MOV R3.x, QA[463]
469 x: MOV R8.x, QB[463].pop VEC_120
470 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
192 JUMP POP_CNT(1) ADDR(195)
193 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
194 POP (1) ADDR(195)
195 ALU_PUSH_BEFORE: ADDR(1183) CNT(19)
471 x: LDS_READ2_RET QAB, 2412, 364
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
472 x: LDS_READ2_RET QAB, 4460, 2408
z: ADD_64 R1.z, PV471.w, R4.y
w: ADD_64 R1.w, PV471.z, R8.x
473 x: LDS_READ2_RET QAB, 360, 4456
474 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
475 y: MOV R0.y, QA[471]
476 y: MOV R1.y, QB[471].pop VEC_120
477 x: MOV R1.x, QB[472].pop VEC_120
y: MOV R4.y, QA[472].pop
478 x: MOV R3.x, QA[473]
479 x: MOV R8.x, QB[473].pop VEC_120
480 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
196 JUMP POP_CNT(1) ADDR(199)
197 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
198 POP (1) ADDR(199)
199 ALU_PUSH_BEFORE: ADDR(1202) CNT(19)
481 x: LDS_READ2_RET QAB, 2420, 372
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
482 x: LDS_READ2_RET QAB, 4468, 2416
z: ADD_64 R1.z, PV481.w, R4.y
w: ADD_64 R1.w, PV481.z, R8.x
483 x: LDS_READ2_RET QAB, 368, 4464
484 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
485 y: MOV R0.y, QA[481]
486 y: MOV R1.y, QB[481].pop VEC_120
487 x: MOV R1.x, QB[482].pop VEC_120
y: MOV R4.y, QA[482].pop
488 x: MOV R3.x, QA[483]
489 x: MOV R8.x, QB[483].pop VEC_120
490 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
200 JUMP POP_CNT(1) ADDR(203)
201 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
202 POP (1) ADDR(203)
203 ALU_PUSH_BEFORE: ADDR(1221) CNT(19)
491 x: LDS_READ2_RET QAB, 2428, 380
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
492 x: LDS_READ2_RET QAB, 4476, 2424
z: ADD_64 R1.z, PV491.w, R4.y
w: ADD_64 R1.w, PV491.z, R8.x
493 x: LDS_READ2_RET QAB, 376, 4472
494 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
495 y: MOV R0.y, QA[491]
496 y: MOV R1.y, QB[491].pop VEC_120
497 x: MOV R1.x, QB[492].pop VEC_120
y: MOV R4.y, QA[492].pop
498 x: MOV R3.x, QA[493]
499 x: MOV R8.x, QB[493].pop VEC_120
500 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
204 JUMP POP_CNT(1) ADDR(207)
205 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
206 POP (1) ADDR(207)
207 ALU_PUSH_BEFORE: ADDR(1240) CNT(19)
501 x: LDS_READ2_RET QAB, 2436, 388
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
502 x: LDS_READ2_RET QAB, 4484, 2432
z: ADD_64 R1.z, PV501.w, R4.y
w: ADD_64 R1.w, PV501.z, R8.x
503 x: LDS_READ2_RET QAB, 384, 4480
504 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
505 y: MOV R0.y, QA[501]
506 y: MOV R1.y, QB[501].pop VEC_120
507 x: MOV R1.x, QB[502].pop VEC_120
y: MOV R4.y, QA[502].pop
508 x: MOV R3.x, QA[503]
509 x: MOV R8.x, QB[503].pop VEC_120
510 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
208 JUMP POP_CNT(1) ADDR(211)
209 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
210 POP (1) ADDR(211)
211 ALU_PUSH_BEFORE: ADDR(1259) CNT(19)
511 x: LDS_READ2_RET QAB, 2444, 396
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
512 x: LDS_READ2_RET QAB, 4492, 2440
z: ADD_64 R1.z, PV511.w, R4.y
w: ADD_64 R1.w, PV511.z, R8.x
513 x: LDS_READ2_RET QAB, 392, 4488
514 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
515 y: MOV R0.y, QA[511]
516 y: MOV R1.y, QB[511].pop VEC_120
517 x: MOV R1.x, QB[512].pop VEC_120
y: MOV R4.y, QA[512].pop
518 x: MOV R3.x, QA[513]
519 x: MOV R8.x, QB[513].pop VEC_120
520 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
212 JUMP POP_CNT(1) ADDR(215)
213 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
214 POP (1) ADDR(215)
215 ALU_PUSH_BEFORE: ADDR(1278) CNT(19)
521 x: LDS_READ2_RET QAB, 2452, 404
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
522 x: LDS_READ2_RET QAB, 4500, 2448
z: ADD_64 R1.z, PV521.w, R4.y
w: ADD_64 R1.w, PV521.z, R8.x
523 x: LDS_READ2_RET QAB, 400, 4496
524 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
525 y: MOV R0.y, QA[521]
526 y: MOV R1.y, QB[521].pop VEC_120
527 x: MOV R1.x, QB[522].pop VEC_120
y: MOV R4.y, QA[522].pop
528 x: MOV R3.x, QA[523]
529 x: MOV R8.x, QB[523].pop VEC_120
530 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
216 JUMP POP_CNT(1) ADDR(219)
217 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
218 POP (1) ADDR(219)
219 ALU: ADDR(1297) CNT(18)
531 x: LDS_READ2_RET QAB, 2460, 412
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R3.x, R1.x
532 x: LDS_READ2_RET QAB, 4508, 2456
z: ADD_64 R1.z, PV531.w, R4.y
w: ADD_64 R1.w, PV531.z, R8.x
533 x: LDS_READ2_RET QAB, 408, 4504
534 x: ADD_64 R6.x, R6.y, R1.w
y: ADD_64 R6.y, R6.x, R1.z
535 y: MOV R0.y, QA[531]
536 y: MOV R1.y, QB[531].pop VEC_120
537 x: MOV R1.x, QB[532].pop VEC_120
y: MOV R4.y, QA[532].pop
538 x: MOV R3.x, QA[533]
539 x: MOV R8.x, QB[533].pop VEC_120
220 PUSH ADDR(225)
221 ALU: ADDR(1315) CNT(1)
540 x: PREDNE_INT ____, R0.w, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
222 JUMP POP_CNT(1) ADDR(225)
223 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
224 POP (1) ADDR(225)
225 ALU: ADDR(1316) CNT(6)
541 x: ADD_64 ____, R1.y, R0.y
y: ADD_64 ____, R3.x, R1.x
542 z: ADD_64 ____, PV541.y, R4.y
w: ADD_64 ____, PV541.x, R8.x
543 x: ADD_64 R6.x, R6.y, PV542.w
y: ADD_64 R6.y, R6.x, PV542.z
226 WAIT_ACK: Outstanding_acks <= 0
227 ALU: ADDR(1322) CNT(1)
544 x: GROUP_BARRIER ____
228 ENDLOOP i2 PASS_JUMP_ADDR(8)
229 ALU_PUSH_BEFORE: ADDR(1323) CNT(2)
545 y: SETE_INT R4.y, R0.z, 0.0f
546 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
230 JUMP POP_CNT(1) ADDR(233)
231 MEM_RAT_INC_UINT_ACK: RAT(11)[R2], R5, MARK VPM
232 POP (1) ADDR(233)
233 ALU: ADDR(1325) CNT(14)
547 y: LSHL ____, R0.x, 3
548 x: LDS_WRITE_REL (offset: 1) ____, PV547.y, R7.x, R2.y
y: ADD_INT ____, PV547.y, 2048
z: ADD_INT R0.z, PV547.y, 4096
549 x: LDS_WRITE ____, PV548.y, R7.x
y: ADD_INT R0.y, PV548.z, 4
w: ADD_INT ____, PV548.y, 4
550 x: LDS_WRITE ____, PV549.w, R2.y
551 x: LDS_WRITE ____, R0.z, R7.x
552 x: LDS_XCHG_RET QA, R0.y, R2.y
553 y: MOV ____, QA[552].pop
234 WAIT_ACK: Outstanding_acks <= 0
235 ALU: ADDR(1339) CNT(1)
554 x: GROUP_BARRIER ____
236 ALU: ADDR(1340) CNT(1)
555 w: MOV R0.w, 0.0f
237 LOOP_NO_AL i1 FAIL_JUMP_ADDR(270)
238 ALU: ADDR(1341) CNT(20)
556 y: LSHL R2.y, R0.w, 3
557 x: LDS_READ_RET QA, PV556.y
y: ADD_INT R0.y, PV556.y, 4100
z: ADD_INT ____, PV556.y, 2052
w: ADD_INT ____, PV556.y, 4
558 x: LDS_READ2_RET QAB, PV557.z, PV557.w
z: ADD_INT R0.z, 4096, R2.y
w: ADD_INT ____, 2048, R2.y
559 x: LDS_READ2_RET QAB, R0.y, PV558.w
560 x: LDS_READ_RET QA, R0.z
561 x: MOV R0.x, QA[557].pop
562 y: MOV R0.y, QA[558]
563 y: MOV R1.y, QB[558].pop VEC_120
564 x: MOV R1.x, QB[559].pop VEC_120
y: MOV R5.y, QA[559].pop
565 x: MOV R2.x, QA[560].pop
239 PUSH ADDR(244)
240 ALU: ADDR(1361) CNT(1)
566 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
241 JUMP POP_CNT(1) ADDR(244)
242 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
243 POP (1) ADDR(244)
244 ALU: ADDR(1362) CNT(29)
567 x: ADD_INT ____, 2056, R2.y
y: ADD_INT ____, 8, R2.y
z: ADD_INT R1.z, R0.w, 1
w: ADD_INT R0.w, 4104, R2.y
568 x: LDS_READ2_RET QAB, PV567.x, PV567.y
w: LSHL ____, PV567.z, 3
569 x: LDS_READ_RET QA, R0.w
y: ADD_INT ____, PV568.w, 4
z: ADD_INT R0.z, PV568.w, 4100
w: ADD_INT ____, PV568.w, 2052
570 x: LDS_READ2_RET QAB, PV569.w, PV569.y
z: ADD_64 ____, R1.y, R0.y
w: ADD_64 ____, R0.x, R1.x
571 x: LDS_READ_RET QA, R0.z
z: ADD_64 ____, PV570.w, R5.y
w: ADD_64 ____, PV570.z, R2.x
572 x: ADD_64 R1.x, R6.y, PV571.w
y: ADD_64 R1.y, R6.x, PV571.z
573 x: MOV R0.x, QA[568]
574 x: MOV R2.x, QB[568].pop VEC_120
575 x: MOV R3.x, QA[569].pop
576 y: MOV R0.y, QA[570]
577 y: MOV R5.y, QB[570].pop VEC_120
578 y: MOV R6.y, QA[571].pop
245 PUSH ADDR(250)
246 ALU: ADDR(1391) CNT(1)
579 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
247 JUMP POP_CNT(1) ADDR(250)
248 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
249 POP (1) ADDR(250)
250 ALU: ADDR(1392) CNT(29)
580 x: ADD_INT ____, 2064, R2.y
y: ADD_INT ____, 16, R2.y
z: ADD_INT R1.z, R1.z, 1
w: ADD_INT R0.w, 4112, R2.y
581 x: LDS_READ2_RET QAB, PV580.x, PV580.y
w: LSHL ____, PV580.z, 3
582 x: LDS_READ_RET QA, R0.w
y: ADD_INT ____, PV581.w, 4
z: ADD_INT R0.z, PV581.w, 4100
w: ADD_INT ____, PV581.w, 2052
583 x: LDS_READ2_RET QAB, PV582.w, PV582.y
z: ADD_64 ____, R5.y, R0.y
w: ADD_64 ____, R2.x, R0.x
584 x: LDS_READ_RET QA, R0.z
z: ADD_64 ____, PV583.w, R6.y
w: ADD_64 ____, PV583.z, R3.x
585 x: ADD_64 R1.x, R1.y, PV584.w
y: ADD_64 R1.y, R1.x, PV584.z
586 x: MOV R0.x, QA[581]
587 x: MOV R2.x, QB[581].pop VEC_120
588 x: MOV R3.x, QA[582].pop
589 y: MOV R0.y, QA[583]
590 y: MOV R5.y, QB[583].pop VEC_120
591 y: MOV R6.y, QA[584].pop
251 PUSH ADDR(256)
252 ALU: ADDR(1421) CNT(1)
592 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
253 JUMP POP_CNT(1) ADDR(256)
254 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
255 POP (1) ADDR(256)
256 ALU: ADDR(1422) CNT(29)
593 x: ADD_INT ____, 2072, R2.y
y: ADD_INT ____, 24, R2.y
z: ADD_INT R1.z, R1.z, 1
w: ADD_INT R0.w, 4120, R2.y
594 x: LDS_READ2_RET QAB, PV593.x, PV593.y
w: LSHL ____, PV593.z, 3
595 x: LDS_READ_RET QA, R0.w
y: ADD_INT ____, PV594.w, 4
z: ADD_INT R0.z, PV594.w, 4100
w: ADD_INT ____, PV594.w, 2052
596 x: LDS_READ2_RET QAB, PV595.w, PV595.y
z: ADD_64 ____, R5.y, R0.y
w: ADD_64 ____, R2.x, R0.x
597 x: LDS_READ_RET QA, R0.z
z: ADD_64 ____, PV596.w, R6.y
w: ADD_64 ____, PV596.z, R3.x
598 x: ADD_64 R1.x, R1.y, PV597.w
y: ADD_64 R1.y, R1.x, PV597.z
599 x: MOV R0.x, QA[594]
600 x: MOV R2.x, QB[594].pop VEC_120
601 x: MOV R3.x, QA[595].pop
602 y: MOV R0.y, QA[596]
603 y: MOV R5.y, QB[596].pop VEC_120
604 y: MOV R6.y, QA[597].pop
257 PUSH ADDR(262)
258 ALU: ADDR(1451) CNT(1)
605 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
259 JUMP POP_CNT(1) ADDR(262)
260 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
261 POP (1) ADDR(262)
262 ALU: ADDR(1452) CNT(29)
606 x: ADD_INT ____, 2080, R2.y
y: ADD_INT ____, 32, R2.y
z: ADD_INT R1.z, R1.z, 1
w: ADD_INT R0.w, 4128, R2.y
607 x: LDS_READ2_RET QAB, PV606.x, PV606.y
w: LSHL ____, PV606.z, 3
608 x: LDS_READ_RET QA, R0.w
y: ADD_INT ____, PV607.w, 4
z: ADD_INT R0.z, PV607.w, 4100
w: ADD_INT ____, PV607.w, 2052
609 x: LDS_READ2_RET QAB, PV608.w, PV608.y
z: ADD_64 ____, R5.y, R0.y
w: ADD_64 ____, R2.x, R0.x
610 x: LDS_READ_RET QA, R0.z
z: ADD_64 ____, PV609.w, R6.y
w: ADD_64 ____, PV609.z, R3.x
611 x: ADD_64 R1.x, R1.y, PV610.w
y: ADD_64 R1.y, R1.x, PV610.z
612 x: MOV R0.x, QA[607]
613 x: MOV R2.x, QB[607].pop VEC_120
614 x: MOV R3.x, QA[608].pop
615 y: MOV R0.y, QA[609]
616 y: MOV R2.y, QB[609].pop VEC_120
617 y: MOV R5.y, QA[610].pop
263 PUSH ADDR(268)
264 ALU: ADDR(1481) CNT(1)
618 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
265 JUMP POP_CNT(1) ADDR(268)
266 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
267 POP (1) ADDR(268)
268 ALU: ADDR(1482) CNT(7)
619 x: ADD_64 ____, R2.y, R0.y
y: ADD_64 ____, R2.x, R0.x
w: ADD_INT R0.w, R1.z, 1
620 z: ADD_64 ____, PV619.y, R5.y
w: ADD_64 ____, PV619.x, R3.x
621 x: ADD_64 R6.x, R1.y, PV620.w
y: ADD_64 R6.y, R1.x, PV620.z
269 ENDLOOP i1 PASS_JUMP_ADDR(238)
270 ALU: ADDR(1489) CNT(20)
622 z: LSHL R0.z, R0.w, 3
623 x: LDS_READ_RET QA, PV622.z
y: ADD_INT R0.y, PV622.z, 4100
z: ADD_INT ____, PV622.z, 2052
w: ADD_INT ____, PV622.z, 4
624 x: LDS_READ2_RET QAB, PV623.z, PV623.w
z: ADD_INT R0.z, 4096, R0.z
w: ADD_INT ____, 2048, R0.z
625 x: LDS_READ2_RET QAB, R0.y, PV624.w
626 x: LDS_READ_RET QA, R0.z
627 x: MOV R0.x, QA[623].pop
628 y: MOV R0.y, QA[624]
629 y: MOV R1.y, QB[624].pop VEC_120
630 x: MOV R1.x, QB[625].pop VEC_120
y: MOV R2.y, QA[625].pop
631 x: MOV R2.x, QA[626].pop
271 PUSH ADDR(276)
272 ALU: ADDR(1509) CNT(1)
632 x: PREDNE_INT ____, R4.y, 0.0f UPDATE_EXEC_MASK UPDATE_PRED
273 JUMP POP_CNT(1) ADDR(276)
274 MEM_RAT_INC_UINT_ACK: RAT(11)[R4], R5, MARK VPM
275 POP (1) ADDR(276)
276 ALU: ADDR(1510) CNT(6)
633 x: ADD_64 ____, R1.y, R0.y
y: ADD_64 ____, R0.x, R1.x
634 z: ADD_64 ____, PV633.y, R2.y
w: ADD_64 ____, PV633.x, R2.x
635 x: ADD_64 R4.x, R6.y, PV634.w
y: ADD_64 R4.y, R6.x, PV634.z
277 WAIT_ACK: Outstanding_acks <= 0
278 ALU: ADDR(1516) CNT(1)
636 x: GROUP_BARRIER ____
279 ALU: ADDR(1517) CNT(5) KCACHE0(CB1:0-15)
637 x: MOV R1.x, R4.y
y: ADD_INT ____, KC0[2].x, R3.y
638 x: LSHR R0.x, PV637.y, 2
639 x: ADD_INT R2.x, 1, PV638.x
280 MEM_RAT_STORE_DWORD__NI: RAT(11)[R0].x___, R4, MARK VPM
281 MEM_RAT_STORE_DWORD__NI: RAT(11)[R2].x___, R1, MARK VPM
282 END
END_OF_PROGRAM
; ----------------- CS Data ------------------------
; Input Semantic Mappings
; No input mappings
GprPoolSize = 0
CodeLen = 12320;Bytes
PGM_END_CF = 0; words(64 bit)
PGM_END_ALU = 0; words(64 bit)
PGM_END_FETCH = 0; words(64 bit)
MaxScratchRegsNeeded = 0
;AluPacking = 0.0
;AluClauses = 0
;PowerThrottleRate = 0.0
; texResourceUsage[0] = 0x00000000
; texResourceUsage[1] = 0x00000000
; texResourceUsage[2] = 0x00000000
; texResourceUsage[3] = 0x00000000
; texResourceUsage[4] = 0x00000000
; texResourceUsage[5] = 0x00000000
; texResourceUsage[6] = 0x00000000
; texResourceUsage[7] = 0x00000000
; fetch4ResourceUsage[0] = 0x00000000
; fetch4ResourceUsage[1] = 0x00000000
; fetch4ResourceUsage[2] = 0x00000000
; fetch4ResourceUsage[3] = 0x00000000
; fetch4ResourceUsage[4] = 0x00000000
; fetch4ResourceUsage[5] = 0x00000000
; fetch4ResourceUsage[6] = 0x00000000
; fetch4ResourceUsage[7] = 0x00000000
; texSamplerUsage = 0x00000000
; constBufUsage = 0x00000000
ResourcesAffectAlphaOutput[0] = 0x00000000
ResourcesAffectAlphaOutput[1] = 0x00000000
ResourcesAffectAlphaOutput[2] = 0x00000000
ResourcesAffectAlphaOutput[3] = 0x00000000
ResourcesAffectAlphaOutput[4] = 0x00000000
ResourcesAffectAlphaOutput[5] = 0x00000000
ResourcesAffectAlphaOutput[6] = 0x00000000
ResourcesAffectAlphaOutput[7] = 0x00000000
;SQ_PGM_RESOURCES = 0x30000309
SQ_PGM_RESOURCES:NUM_GPRS = 9
SQ_PGM_RESOURCES:STACK_SIZE = 3
SQ_PGM_RESOURCES:PRIME_CACHE_ENABLE = 1
;SQ_PGM_RESOURCES_2 = 0x000000C0
SQ_LDS_ALLOC:SIZE = 0x00002000
; RatOpIsUsed = 0x800
; RatAtomicOpIsUsed = 0x800
; NumThreadPerGroupFlattened = 256
; NumThreadPerGroup_x = 256
; NumThreadPerGroup_y = 1
; NumThreadPerGroup_z = 1
; SetBufferForNumGroup = true
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment