Created
February 5, 2023 23:23
-
-
Save jdavidberger/e24b9724efd946a62e9e12aaa8ebc00e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Status: Using GLEW 2.2.0 | |
GL version: 3.1 | |
GL Shading language version OpenGL ES GLSL ES 3.10 | |
Vendor Panfrost | |
Renderer Mali-G52 r1 (Panfrost) OpenGL ES 3.1 Mesa 22.3.4 | |
Invocations size 256 | |
Work group size 256 256 256 | |
Work group count 65535 65535 65535 | |
Local size 256 | |
compute shader ---------- | |
#define KERNEL compute_sp_v1 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE float | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 111 | |
void compute_sp_v1() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
DATATYPE x = _A; | |
DATATYPE y = DATATYPE(float(id) * SCALE); | |
for(int i=0; i<128; i++) | |
{ | |
MAD_16(x, y); | |
} | |
ptr[id] = y; | |
} | |
void main() {compute_sp_v1();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0xa4c5306d, 0xa0569abe, 0xd12df6c4, 0xb7754e85, 0x74e5b882} | |
name: GLSL2 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp float[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp float _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_4 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_5 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_6 = ishl ssa_4.y, ssa_5 | |
vec1 32 ssa_7 = iadd ssa_4.x, ssa_6 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_78 = insert_u16 ssa_4.z, ssa_1 | |
vec1 32 ssa_10 = iadd ssa_7, ssa_78 | |
vec1 32 ssa_12 = u2f32 ssa_10 | |
vec1 32 ssa_2 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_13 = fmul ssa_12, ssa_2 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_3 = load_const (0x00000080 = 0.000000) | |
vec1 32 ssa_11 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_14 = phi block_0: ssa_13, block_4: ssa_61 | |
vec1 32 ssa_15 = phi block_0: ssa_11, block_4: ssa_62 | |
vec1 32 ssa_16 = phi block_0: ssa_0, block_4: ssa_50 | |
vec1 32 ssa_17 = ige32 ssa_16, ssa_3 | |
/* succs: block_2 block_3 */ | |
if ssa_17 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_76 = ffma ssa_14, ssa_15, ssa_14 | |
vec1 32 ssa_75 = ffma ssa_76, ssa_14, ssa_76 | |
vec1 32 ssa_74 = ffma ssa_75, ssa_76, ssa_75 | |
vec1 32 ssa_73 = ffma ssa_74, ssa_75, ssa_74 | |
vec1 32 ssa_72 = ffma ssa_73, ssa_74, ssa_73 | |
vec1 32 ssa_71 = ffma ssa_72, ssa_73, ssa_72 | |
vec1 32 ssa_70 = ffma ssa_71, ssa_72, ssa_71 | |
vec1 32 ssa_69 = ffma ssa_70, ssa_71, ssa_70 | |
vec1 32 ssa_68 = ffma ssa_69, ssa_70, ssa_69 | |
vec1 32 ssa_67 = ffma ssa_68, ssa_69, ssa_68 | |
vec1 32 ssa_66 = ffma ssa_67, ssa_68, ssa_67 | |
vec1 32 ssa_65 = ffma ssa_66, ssa_67, ssa_66 | |
vec1 32 ssa_64 = ffma ssa_65, ssa_66, ssa_65 | |
vec1 32 ssa_63 = ffma ssa_64, ssa_65, ssa_64 | |
vec1 32 ssa_62 = ffma ssa_63, ssa_64, ssa_63 | |
vec1 32 ssa_61 = ffma ssa_62, ssa_63, ssa_62 | |
vec1 32 ssa_50 = iadd ssa_16, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_51 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_52 = ishl ssa_10, ssa_51 | |
vec1 64 ssa_53 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_54 = unpack_64_2x32_split_x ssa_53 | |
vec1 32 ssa_55 = unpack_64_2x32_split_y ssa_53 | |
vec1 32 ssa_56 = iadd ssa_54, ssa_52 | |
vec1 32 ssa_57 = ult32 ssa_56, ssa_54 | |
vec1 32 ssa_58 = b2i32 ssa_57 | |
vec1 32 ssa_59 = iadd ssa_58, ssa_55 | |
vec1 64 ssa_60 = pack_64_2x32_split ssa_56, ssa_59 | |
intrinsic store_global (ssa_14, ssa_60) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
81 = MOV.i32 r62 | |
80 = MOV.i32 r61 | |
79 = MOV.i32 r60 | |
6 = LSHIFT_OR.i32 80, #0x0, #0x8.b0 | |
7 = IADD.s32 79, 6 | |
78 = MKVEC.v2i16 #0x0.h00, 81.h00 | |
10 = IADD.s32 7, 78 | |
12 = U32_TO_F32 10 | |
13 = FMA.f32 12, #0x2edbe6ff, #0x0.neg | |
} -> block1 | |
block1 { | |
14 = PHI 13, 61 | |
15 = PHI u1, 62 | |
16 = PHI #0x0, 50 | |
17 = ICMP.s32.m1.ge 16, #0x80 | |
BRANCHZ.i16.eq 17.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
76 = FMA.f32 14, 15, 14 | |
75 = FMA.f32 76, 14, 76 | |
74 = FMA.f32 75, 76, 75 | |
73 = FMA.f32 74, 75, 74 | |
72 = FMA.f32 73, 74, 73 | |
71 = FMA.f32 72, 73, 72 | |
70 = FMA.f32 71, 72, 71 | |
69 = FMA.f32 70, 71, 70 | |
68 = FMA.f32 69, 70, 69 | |
67 = FMA.f32 68, 69, 68 | |
66 = FMA.f32 67, 68, 67 | |
65 = FMA.f32 66, 67, 66 | |
64 = FMA.f32 65, 66, 65 | |
63 = FMA.f32 64, 65, 64 | |
62 = FMA.f32 63, 64, 63 | |
61 = FMA.f32 62, 63, 62 | |
50 = IADD.s32 16, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
52 = LSHIFT_OR.i32 10, #0x0, #0x2.b0 | |
56 = IADD.s32 u0, 52 | |
58 = ICMP.u32.i1.lt 56, u0 | |
59 = IADD.s32 58, u0[1] | |
STORE.i32 14, 56, 59, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = U32_TO_F32 r0 | |
r1 = FMA.f32 r1, #0x2edbe6ff, #0x0.neg | |
r2 = MOV.i32 u1 | |
r3 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r4 = ICMP.s32.m1.ge r3, #0x80 | |
BRANCHZ.i16.eq r4.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r2 = FMA.f32 r1, r2, r1 | |
r1 = FMA.f32 r2, r1, r2 | |
r3 = IADD.s32 r3, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.x.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* NOP | |
+ _.h00 = U32_TO_F32 t1 | |
* r1 = FMA.f32 t1, fau.y, t.neg | |
+ NOP | |
* NOP | |
+ r2 = MOV.i32 fau.x | |
* NOP | |
+ r3 = MOV.i32 fau.x | |
2edbe6ff00000008 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r3, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000080 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r2 = FMA.f32 r1, r2, r1 | |
+ NOP | |
* r1 = FMA.f32 t0, r1, t0 | |
+ NOP | |
* r2 = FMA.f32 t0, r2, t0 | |
+ NOP | |
* r1 = FMA.f32 t0, r1, t0 | |
+ NOP | |
* r2 = FMA.f32 t0, r2, t0 | |
+ NOP | |
* r1 = FMA.f32 t0, r1, t0 | |
+ NOP | |
* r2 = FMA.f32 t0, r2, t0 | |
+ NOP | |
* r1 = FMA.f32 t0, r1, t0 | |
+ NOP | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* r2 = FMA.f32 r1, r2, r1 | |
+ NOP | |
* r1 = FMA.f32 t0, r1, t0 | |
+ NOP | |
* r2 = FMA.f32 t0, r2, t0 | |
+ NOP | |
* r1 = FMA.f32 t0, r1, t0 | |
+ NOP | |
* r2 = FMA.f32 t0, r2, t0 | |
+ NOP | |
* r1 = FMA.f32 t0, r1, t0 | |
+ NOP | |
* r2 = FMA.f32 t0, r2, t0 | |
+ r3 = IADD.s32 r3, fau.x | |
* r1 = FMA.f32 t0, r1, t0 | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) wait(0 ) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ NOP | |
* NOP | |
+ r0 = IADD.s32 fau.x, t0 | |
* NOP | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb r_uncond ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*NOP t0 | |
+U32_TO_F32 t1, t1 | |
*FMA.f32 r1:t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 r2:t1, u1.w0 | |
*NOP t0 | |
+MOV.i32 r3:t1, #0.x | |
} | |
clause_5: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r3, 0x00000080 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_10 | |
} | |
clause_8: | |
ds(0) nbb next_store dwb(0) | |
{ | |
*NOP t0 | |
+JUMP t1, clause_23 | |
} | |
clause_10: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r2:t0, r1, r2, r1 | |
+NOP t1 | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+NOP t1 | |
*FMA.f32 r2:t0, t0, r2, t0 | |
+NOP t1 | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+NOP t1 | |
*FMA.f32 r2:t0, t0, r2, t0 | |
+NOP t1 | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+NOP t1 | |
*FMA.f32 r2:t0, t0, r2, t0 | |
+NOP t1 | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+NOP t1 | |
} | |
clause_16: | |
ds(0) nbb r_uncond | |
{ | |
*FMA.f32 r2:t0, r1, r2, r1 | |
+NOP t1 | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+NOP t1 | |
*FMA.f32 r2:t0, t0, r2, t0 | |
+NOP t1 | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+NOP t1 | |
*FMA.f32 r2:t0, t0, r2, t0 | |
+NOP t1 | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+NOP t1 | |
*FMA.f32 r2:t0, t0, r2, t0 | |
+IADD.s32 r3:t1, r3, 0x00000001 /* 0.000000 */ | |
*FMA.f32 r1:t0, t0, r1, t0 | |
+JUMP t1, clause_5 | |
} | |
clause_23: | |
ds(0) eos store | |
{ | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*NOP t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
shader: MESA_SHADER_FRAGMENT | |
source_sha1: {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000} | |
name: GLSL0 | |
inputs: 0 | |
outputs: 1 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var uniform INTERP_MODE_NONE vec4 gl_CurrentAttribFrag2MESA (0, 0, 0) | |
decl_var shader_out INTERP_MODE_NONE vec4 gl_FragData[0] (FRAG_RESULT_DATA0.xyzw, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec4 32 ssa_1 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=16) | |
intrinsic store_output (ssa_1, ssa_0) (base=0, wrmask=xyzw /*15*/, component=0, src_type=float32 /*160*/, io location=4 slots=1 /*132*/, xfb() /*0*/, xfb2() /*0*/) /* gl_FragData[0] */ | |
/* succs: block_1 */ | |
block block_1: | |
} | |
block0 { | |
6 = MOV.i32 r60 | |
1 = COLLECT.i32 u0, u0[1], u1, u1[1] | |
7 = ATEST 6, u1[1], atest-param | |
8 = BLEND.f32 1, 7, blend_descriptor_0, blend_descriptor_0[1], _.h00, sr_count:4, sr_count_2:2 | |
} | |
block0 { | |
r3 = MOV.i32 u1[1] | |
r60 = ATEST r60, r3, atest-param | |
r2 = MOV.i32 u1 | |
r0 = MOV.i32 u0 | |
r1 = MOV.i32 u0[1] | |
r48 = BLEND.f32 r0, r60, blend_descriptor_0, blend_descriptor_0[1], _.h00, sr_count:4, sr_count_2:2 | |
} | |
block0 { | |
id(0) wait(6 ) nbb td | |
* NOP | |
+ r3 = MOV.i32 fau.y | |
* NOP | |
+ r60 = ATEST r60, t1, fau.x | |
id(0) wait(0 6 7 ) nbb r_uncond td | |
* NOP | |
+ r2 = MOV.i32 fau.x | |
* r0 = MOV.i32 fau.x | |
+ r1 = MOV.i32 fau.y | |
* NOP | |
+ r48 = BLEND.f32 r0, r60, fau.x, fau.y, _.h00, sr_count:4, sr_count_2:2 | |
} | |
slot 0 reads: r0 r1 r2 r3 | |
clause_0: | |
ds(0) nbb atest td ncph next_blend dwb(0, 6, 7) | |
{ | |
*NOP t0 | |
+MOV.i32 r3:t1, u1.w1 | |
*NOP t0 | |
+ATEST r60:t1, r60, t1, @r60 | |
} | |
clause_2: | |
ds(0) eos blend td | |
{ | |
*NOP t0 | |
+MOV.i32 r2:t1, u1.w0 | |
*MOV.i32 r0:t0, u0.w0 | |
+MOV.i32 r1:t1, u0.w1 | |
*NOP t0 | |
+BLEND r48:t1, r60, blend_descriptor_0.x, blend_descriptor_0.y, @r0 | |
} | |
shader: MESA_SHADER_VERTEX | |
source_sha1: {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000} | |
name: ARB0 | |
inputs: 0 | |
outputs: 1 | |
uniforms: 5 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var uniform INTERP_MODE_NONE vec4[5] state.matrix.mvp.transpose.row[0] (0, 0, 0) | |
decl_var shader_out INTERP_MODE_SMOOTH vec4 out_0 (VARYING_SLOT_POS.xyzw, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[5] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec4 32 ssa_1 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=4, align_offset=0, range_base=0, range=80) | |
vec1 32 ssa_2 = load_const (0x00000040 = 0.000000) | |
vec4 32 ssa_3 = intrinsic load_ubo (ssa_0, ssa_2) (access=0, align_mul=4, align_offset=0, range_base=0, range=80) | |
vec1 32 ssa_4 = fmul ssa_3.x, ssa_1.x | |
vec1 32 ssa_5 = fmul ssa_3.x, ssa_1.y | |
vec1 32 ssa_6 = fmul ssa_3.x, ssa_1.z | |
vec1 32 ssa_7 = fmul ssa_3.x, ssa_1.w | |
vec1 32 ssa_8 = load_const (0x00000010 = 0.000000) | |
vec4 32 ssa_9 = intrinsic load_ubo (ssa_0, ssa_8) (access=0, align_mul=4, align_offset=0, range_base=0, range=80) | |
vec1 32 ssa_10 = ffma ssa_3.y, ssa_9.x, ssa_4 | |
vec1 32 ssa_11 = ffma ssa_3.y, ssa_9.y, ssa_5 | |
vec1 32 ssa_12 = ffma ssa_3.y, ssa_9.z, ssa_6 | |
vec1 32 ssa_13 = ffma ssa_3.y, ssa_9.w, ssa_7 | |
vec1 32 ssa_14 = load_const (0x00000020 = 0.000000) | |
vec4 32 ssa_15 = intrinsic load_ubo (ssa_0, ssa_14) (access=0, align_mul=4, align_offset=0, range_base=0, range=80) | |
vec1 32 ssa_16 = ffma ssa_3.z, ssa_15.x, ssa_10 | |
vec1 32 ssa_17 = ffma ssa_3.z, ssa_15.y, ssa_11 | |
vec1 32 ssa_18 = ffma ssa_3.z, ssa_15.z, ssa_12 | |
vec1 32 ssa_19 = ffma ssa_3.z, ssa_15.w, ssa_13 | |
vec1 32 ssa_20 = load_const (0x00000030 = 0.000000) | |
vec4 32 ssa_21 = intrinsic load_ubo (ssa_0, ssa_20) (access=0, align_mul=4, align_offset=0, range_base=0, range=80) | |
vec1 32 ssa_22 = ffma ssa_3.w, ssa_21.x, ssa_16 | |
vec1 32 ssa_23 = ffma ssa_3.w, ssa_21.y, ssa_17 | |
vec1 32 ssa_24 = ffma ssa_3.w, ssa_21.z, ssa_18 | |
vec1 32 ssa_25 = ffma ssa_3.w, ssa_21.w, ssa_19 | |
vec3 32 ssa_26 = intrinsic load_viewport_scale () () | |
vec3 32 ssa_27 = intrinsic load_viewport_offset () () | |
vec1 32 ssa_28 = frcp ssa_25 | |
vec1 32 ssa_29 = fmul ssa_22, ssa_28 | |
vec1 32 ssa_30 = fmul ssa_23, ssa_28 | |
vec1 32 ssa_31 = fmul ssa_24, ssa_28 | |
vec1 32 ssa_32 = ffma ssa_29, ssa_26.x, ssa_27.x | |
vec1 32 ssa_33 = ffma ssa_30, ssa_26.y, ssa_27.y | |
vec1 32 ssa_34 = ffma ssa_31, ssa_26.z, ssa_27.z | |
vec4 32 ssa_35 = vec4 ssa_32, ssa_33, ssa_34, ssa_28 | |
intrinsic store_output (ssa_35, ssa_0) (base=0, wrmask=xyzw /*15*/, component=0, src_type=float32 /*160*/, io location=0 slots=1 /*128*/, xfb() /*0*/, xfb2() /*0*/) /* out_0 */ | |
/* succs: block_1 */ | |
block block_1: | |
} | |
block0 { | |
63 = MOV.i32 r59 | |
62 = MOV.i32 r58 | |
4 = FMA.f32 u3[1], u3, #0x0.neg | |
5 = FMA.f32 u3[1], u4, #0x0.neg | |
6 = FMA.f32 u3[1], u4[1], #0x0.neg | |
7 = FMA.f32 u3[1], u11, #0x0.neg | |
10 = FMA.f32 u5[1], u5, 4 | |
11 = FMA.f32 u5[1], u6, 5 | |
12 = FMA.f32 u5[1], u6[1], 6 | |
13 = FMA.f32 u5[1], u11[1], 7 | |
16 = FMA.f32 u7[1], u7, 10 | |
17 = FMA.f32 u7[1], u8, 11 | |
18 = FMA.f32 u7[1], u8[1], 12 | |
19 = FMA.f32 u7[1], u12, 13 | |
22 = FMA.f32 u9[1], u9, 16 | |
23 = FMA.f32 u9[1], u10, 17 | |
24 = FMA.f32 u9[1], u10[1], 18 | |
25 = FMA.f32 u9[1], u12[1], 19 | |
28 = FRCP.f32 25 | |
29 = FMA.f32 22, 28, #0x0.neg | |
30 = FMA.f32 23, 28, #0x0.neg | |
31 = FMA.f32 24, 28, #0x0.neg | |
32 = FMA.f32 29, u0, u0[1] | |
33 = FMA.f32 30, u1, u1[1] | |
34 = FMA.f32 31, u2, u2[1] | |
35 = COLLECT.i32 32, 33, 34, 28 | |
ST_CVT.f32.v4 35, 62, 63, #0x105e000 | |
} | |
block0 { | |
r0 = MOV.i32 u11 | |
r0 = FMA.f32 u3[1], r0, #0x0.neg | |
r1 = MOV.i32 u11[1] | |
r0 = FMA.f32 u5[1], r1, r0 | |
r1 = MOV.i32 u4[1] | |
r1 = FMA.f32 u3[1], r1, #0x0.neg | |
r2 = MOV.i32 u6[1] | |
r1 = FMA.f32 u5[1], r2, r1 | |
r2 = MOV.i32 u4 | |
r2 = FMA.f32 u3[1], r2, #0x0.neg | |
r3 = MOV.i32 u6 | |
r2 = FMA.f32 u5[1], r3, r2 | |
r3 = MOV.i32 u12 | |
r0 = FMA.f32 u7[1], r3, r0 | |
r3 = MOV.i32 u8[1] | |
r1 = FMA.f32 u7[1], r3, r1 | |
r3 = MOV.i32 u8 | |
r2 = FMA.f32 u7[1], r3, r2 | |
r3 = MOV.i32 u12[1] | |
r0 = FMA.f32 u9[1], r3, r0 | |
r3 = MOV.i32 u10[1] | |
r1 = FMA.f32 u9[1], r3, r1 | |
r3 = MOV.i32 u10 | |
r2 = FMA.f32 u9[1], r3, r2 | |
r3 = FMA.f32 u3[1], u3, #0x0.neg | |
r3 = FMA.f32 u5[1], u5, r3 | |
r3 = FMA.f32 u7[1], u7, r3 | |
r0 = FRCP.f32 r0 | |
r3 = FMA.f32 u9[1], u9, r3 | |
r1 = FMA.f32 r1, r0, #0x0.neg | |
r2 = FMA.f32 r2, r0, #0x0.neg | |
r3 = FMA.f32 r3, r0, #0x0.neg | |
r1 = FMA.f32 r1, u2, u2[1] | |
r2 = FMA.f32 r2, u1, u1[1] | |
r3 = FMA.f32 r3, u0, u0[1] | |
r4 = MOV.i32 r2 | |
r5 = MOV.i32 r1 | |
r6 = MOV.i32 r0 | |
ST_CVT.f32.v4 r3, r58, r59, #0x105e000 | |
} | |
block0 { | |
id(0) nbb | |
* NOP | |
+ _.h00 = MOV.i32 fau.x | |
* r0 = FMA.f32 fau.y, t1, t.neg | |
+ NOP | |
* NOP | |
+ r1 = MOV.i32 fau.y | |
id(0) nbb | |
* r0 = FMA.f32 fau.y, r1, r0 | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* r1 = FMA.f32 fau.y, t1, t.neg | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* r1 = FMA.f32 fau.y, t1, r1 | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.x | |
* r2 = FMA.f32 fau.y, t1, t.neg | |
+ NOP | |
* NOP | |
+ r3 = MOV.i32 fau.x | |
id(0) nbb | |
* r2 = FMA.f32 fau.y, r3, r2 | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.x | |
* r0 = FMA.f32 fau.y, t1, r0 | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* r1 = FMA.f32 fau.y, t1, r1 | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.x | |
* r2 = FMA.f32 fau.y, t1, r2 | |
+ NOP | |
* NOP | |
+ r3 = MOV.i32 fau.y | |
id(0) nbb | |
* r0 = FMA.f32 fau.y, r3, r0 | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* r1 = FMA.f32 fau.y, t1, r1 | |
+ NOP | |
* NOP | |
+ _.h00 = MOV.i32 fau.x | |
* r2 = FMA.f32 fau.y, t1, r2 | |
+ NOP | |
* _.h00 = FMA.f32 fau.y, fau.x, t.neg | |
+ NOP | |
* _.h00 = FMA.f32 fau.y, fau.x, t0 | |
+ NOP | |
* r3 = FMA.f32 fau.y, fau.x, t0 | |
+ NOP | |
id(0) wait(0 ) nbb r_uncond | |
* r3 = FMA.f32 fau.y, fau.x, r3 | |
+ r0 = FRCP.f32 r0 | |
* r1 = FMA.f32 r1, t1, t.neg | |
+ NOP | |
* r2 = FMA.f32 r2, r0, t.neg | |
+ NOP | |
* r3 = FMA.f32 r3, r0, t.neg | |
+ NOP | |
* _.h00 = FMA.f32 r2, fau.x, fau.y | |
+ r4 = MOV.i32 t | |
* _.h00 = FMA.f32 r1, fau.x, fau.y | |
+ r5 = MOV.i32 t | |
* r3 = FMA.f32 r3, fau.x, fau.y | |
+ r6 = MOV.i32 r0 | |
* NOP | |
+ ST_CVT.f32.v4 r3, r58, r59, fau.y | |
105e00000000000 | |
} | |
slot 0 reads: r3 r4 r5 r6 | |
clause_0: | |
ds(0) nbb ncph | |
{ | |
*NOP t0 | |
+MOV.i32 t1, u11.w0 | |
*FMA.f32 r0:t0, u3.w1, t1, #0.neg | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 r1:t1, u11.w1 | |
} | |
clause_3: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r0:t0, u5.w1, r1, r0 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u4.w1 | |
*FMA.f32 r1:t0, u3.w1, t1, #0.neg | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u6.w1 | |
*FMA.f32 r1:t0, u5.w1, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u4.w0 | |
*FMA.f32 r2:t0, u3.w1, t1, #0.neg | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 r3:t1, u6.w0 | |
} | |
clause_9: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r2:t0, u5.w1, r3, r2 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u12.w0 | |
*FMA.f32 r0:t0, u7.w1, t1, r0 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u8.w1 | |
*FMA.f32 r1:t0, u7.w1, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u8.w0 | |
*FMA.f32 r2:t0, u7.w1, t1, r2 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 r3:t1, u12.w1 | |
} | |
clause_15: | |
ds(0) nbb ncph next_store dwb(0) | |
{ | |
*FMA.f32 r0:t0, u9.w1, r3, r0 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u10.w1 | |
*FMA.f32 r1:t0, u9.w1, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+MOV.i32 t1, u10.w0 | |
*FMA.f32 r2:t0, u9.w1, t1, r2 | |
+NOP t1 | |
*FMA.f32 t0, u3.w1, u3.w0, #0.neg | |
+NOP t1 | |
*FMA.f32 t0, u5.w1, u5.w0, t0 | |
+NOP t1 | |
*FMA.f32 r3:t0, u7.w1, u7.w0, t0 | |
+NOP t1 | |
} | |
clause_21: | |
ds(0) eos store | |
{ | |
*FMA.f32 r3:t0, u9.w1, u9.w0, r3 | |
+FRCP.f32 r0:t1, r0 | |
*FMA.f32 r1:t0, r1, t1, #0.neg | |
+NOP t1 | |
*FMA.f32 r2:t0, r2, r0, #0.neg | |
+NOP t1 | |
*FMA.f32 r3:t0, r3, r0, #0.neg | |
+NOP t1 | |
*FMA.f32 t0, r2, u1.w0, u1.w1 | |
+MOV.i32 r4:t1, t | |
*FMA.f32 t0, r1, u2.w0, u2.w1 | |
+MOV.i32 r5:t1, t | |
*FMA.f32 r3:t0, r3, u0.w0, u0.w1 | |
+MOV.i32 r6:t1, r0 | |
*NOP t0 | |
+ST_CVT.v4 t1, r58, r59, 0x0105e000 /* 0.000000 */, @r3 | |
} | |
shader: MESA_SHADER_VERTEX | |
source_sha1: {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000} | |
name: ARB0 | |
inputs: 0 | |
outputs: 1 | |
uniforms: 5 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var uniform INTERP_MODE_NONE vec4[5] state.matrix.mvp.transpose.row[0] (0, 0, 0) | |
decl_var shader_out INTERP_MODE_SMOOTH vec4 out_0 (VARYING_SLOT_POS.xyzw, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[5] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
/* succs: block_1 */ | |
block block_1: | |
} | |
block0 { | |
} | |
block0 { | |
} | |
block0 { | |
} | |
ccdd1a79 compute_sp_v1_float 22.094 GFLOPs 12.150ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v2 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE float | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 128 | |
void compute_sp_v2() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec2 x = vec2(_A, (_A+DATATYPE(1))); | |
vec2 y = vec2((float(id) * SCALE), (float(id) * SCALE)); | |
for(int i=0; i<64; i++) | |
{ | |
MAD_16(x, y); | |
} | |
ptr[id] = (y.x) + (y.y); | |
} | |
void main() {compute_sp_v2();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0xc009c35c, 0x704fbf5f, 0x33c55587, 0x620877d8, 0x4a008af3} | |
name: GLSL4 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp float[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp float _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_5 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_6 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_7 = ishl ssa_5.y, ssa_6 | |
vec1 32 ssa_8 = iadd ssa_5.x, ssa_7 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_131 = insert_u16 ssa_5.z, ssa_1 | |
vec1 32 ssa_11 = iadd ssa_8, ssa_131 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_12 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_2 = load_const (0x3f800000 = 1.000000) | |
vec1 32 ssa_13 = fadd ssa_12, ssa_2 | |
vec1 32 ssa_14 = u2f32 ssa_11 | |
vec1 32 ssa_3 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_15 = fmul ssa_14, ssa_3 | |
vec1 32 ssa_4 = load_const (0x00000040 = 0.000000) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_16 = phi block_0: ssa_15, block_4: ssa_99 | |
vec1 32 ssa_17 = phi block_0: ssa_15, block_4: ssa_98 | |
vec1 32 ssa_18 = phi block_0: ssa_12, block_4: ssa_101 | |
vec1 32 ssa_19 = phi block_0: ssa_13, block_4: ssa_100 | |
vec1 32 ssa_20 = phi block_0: ssa_0, block_4: ssa_86 | |
vec1 32 ssa_21 = ige32 ssa_20, ssa_4 | |
/* succs: block_2 block_3 */ | |
if ssa_21 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_129 = ffma ssa_16, ssa_18, ssa_16 | |
vec1 32 ssa_128 = ffma ssa_17, ssa_19, ssa_17 | |
vec1 32 ssa_127 = ffma ssa_129, ssa_16, ssa_129 | |
vec1 32 ssa_126 = ffma ssa_128, ssa_17, ssa_128 | |
vec1 32 ssa_125 = ffma ssa_127, ssa_129, ssa_127 | |
vec1 32 ssa_124 = ffma ssa_126, ssa_128, ssa_126 | |
vec1 32 ssa_123 = ffma ssa_125, ssa_127, ssa_125 | |
vec1 32 ssa_122 = ffma ssa_124, ssa_126, ssa_124 | |
vec1 32 ssa_121 = ffma ssa_123, ssa_125, ssa_123 | |
vec1 32 ssa_120 = ffma ssa_122, ssa_124, ssa_122 | |
vec1 32 ssa_119 = ffma ssa_121, ssa_123, ssa_121 | |
vec1 32 ssa_118 = ffma ssa_120, ssa_122, ssa_120 | |
vec1 32 ssa_117 = ffma ssa_119, ssa_121, ssa_119 | |
vec1 32 ssa_116 = ffma ssa_118, ssa_120, ssa_118 | |
vec1 32 ssa_115 = ffma ssa_117, ssa_119, ssa_117 | |
vec1 32 ssa_114 = ffma ssa_116, ssa_118, ssa_116 | |
vec1 32 ssa_113 = ffma ssa_115, ssa_117, ssa_115 | |
vec1 32 ssa_112 = ffma ssa_114, ssa_116, ssa_114 | |
vec1 32 ssa_111 = ffma ssa_113, ssa_115, ssa_113 | |
vec1 32 ssa_110 = ffma ssa_112, ssa_114, ssa_112 | |
vec1 32 ssa_109 = ffma ssa_111, ssa_113, ssa_111 | |
vec1 32 ssa_108 = ffma ssa_110, ssa_112, ssa_110 | |
vec1 32 ssa_107 = ffma ssa_109, ssa_111, ssa_109 | |
vec1 32 ssa_106 = ffma ssa_108, ssa_110, ssa_108 | |
vec1 32 ssa_105 = ffma ssa_107, ssa_109, ssa_107 | |
vec1 32 ssa_104 = ffma ssa_106, ssa_108, ssa_106 | |
vec1 32 ssa_103 = ffma ssa_105, ssa_107, ssa_105 | |
vec1 32 ssa_102 = ffma ssa_104, ssa_106, ssa_104 | |
vec1 32 ssa_101 = ffma ssa_103, ssa_105, ssa_103 | |
vec1 32 ssa_100 = ffma ssa_102, ssa_104, ssa_102 | |
vec1 32 ssa_99 = ffma ssa_101, ssa_103, ssa_101 | |
vec1 32 ssa_98 = ffma ssa_100, ssa_102, ssa_100 | |
vec1 32 ssa_86 = iadd ssa_20, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_87 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_88 = ishl ssa_11, ssa_87 | |
vec1 32 ssa_89 = fadd ssa_16, ssa_17 | |
vec1 64 ssa_90 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_91 = unpack_64_2x32_split_x ssa_90 | |
vec1 32 ssa_92 = unpack_64_2x32_split_y ssa_90 | |
vec1 32 ssa_93 = iadd ssa_91, ssa_88 | |
vec1 32 ssa_94 = ult32 ssa_93, ssa_91 | |
vec1 32 ssa_95 = b2i32 ssa_94 | |
vec1 32 ssa_96 = iadd ssa_95, ssa_92 | |
vec1 64 ssa_97 = pack_64_2x32_split ssa_93, ssa_96 | |
intrinsic store_global (ssa_89, ssa_97) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
134 = MOV.i32 r62 | |
133 = MOV.i32 r61 | |
132 = MOV.i32 r60 | |
7 = LSHIFT_OR.i32 133, #0x0, #0x8.b0 | |
8 = IADD.s32 132, 7 | |
131 = MKVEC.v2i16 #0x0.h00, 134.h00 | |
11 = IADD.s32 8, 131 | |
13 = FADD.f32 u1, #0x3f800000 | |
14 = U32_TO_F32 11 | |
15 = FMA.f32 14, #0x2edbe6ff, #0x0.neg | |
} -> block1 | |
block1 { | |
16 = PHI 15, 99 | |
17 = PHI 15, 98 | |
18 = PHI u1, 101 | |
19 = PHI 13, 100 | |
20 = PHI #0x0, 86 | |
21 = ICMP.s32.m1.ge 20, #0x40 | |
BRANCHZ.i16.eq 21.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
129 = FMA.f32 16, 18, 16 | |
128 = FMA.f32 17, 19, 17 | |
127 = FMA.f32 129, 16, 129 | |
126 = FMA.f32 128, 17, 128 | |
125 = FMA.f32 127, 129, 127 | |
124 = FMA.f32 126, 128, 126 | |
123 = FMA.f32 125, 127, 125 | |
122 = FMA.f32 124, 126, 124 | |
121 = FMA.f32 123, 125, 123 | |
120 = FMA.f32 122, 124, 122 | |
119 = FMA.f32 121, 123, 121 | |
118 = FMA.f32 120, 122, 120 | |
117 = FMA.f32 119, 121, 119 | |
116 = FMA.f32 118, 120, 118 | |
115 = FMA.f32 117, 119, 117 | |
114 = FMA.f32 116, 118, 116 | |
113 = FMA.f32 115, 117, 115 | |
112 = FMA.f32 114, 116, 114 | |
111 = FMA.f32 113, 115, 113 | |
110 = FMA.f32 112, 114, 112 | |
109 = FMA.f32 111, 113, 111 | |
108 = FMA.f32 110, 112, 110 | |
107 = FMA.f32 109, 111, 109 | |
106 = FMA.f32 108, 110, 108 | |
105 = FMA.f32 107, 109, 107 | |
104 = FMA.f32 106, 108, 106 | |
103 = FMA.f32 105, 107, 105 | |
102 = FMA.f32 104, 106, 104 | |
101 = FMA.f32 103, 105, 103 | |
100 = FMA.f32 102, 104, 102 | |
99 = FMA.f32 101, 103, 101 | |
98 = FMA.f32 100, 102, 100 | |
86 = IADD.s32 20, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
88 = LSHIFT_OR.i32 11, #0x0, #0x2.b0 | |
89 = FADD.f32 16, 17 | |
93 = IADD.s32 u0, 88 | |
95 = ICMP.u32.i1.lt 93, u0 | |
96 = IADD.s32 95, u0[1] | |
STORE.i32 89, 93, 96, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x3f800000 | |
r1 = FADD.f32 u1, r1 | |
r2 = U32_TO_F32 r0 | |
r2 = FMA.f32 r2, #0x2edbe6ff, #0x0.neg | |
r3 = MOV.i32 r2 | |
r4 = MOV.i32 u1 | |
r5 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r6 = ICMP.s32.m1.ge r5, #0x40 | |
BRANCHZ.i16.eq r6.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r4 = FMA.f32 r2, r4, r2 | |
r1 = FMA.f32 r3, r1, r3 | |
r2 = FMA.f32 r4, r2, r4 | |
r3 = FMA.f32 r1, r3, r1 | |
r5 = IADD.s32 r5, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = FADD.f32 r2, r3 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.y.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* NOP | |
+ _.h00 = U32_TO_F32 t1 | |
* r2 = FMA.f32 t1, fau.x, t.neg | |
+ _.h00 = MOV.i32 fau.y | |
* NOP | |
+ r1 = FADD.f32 fau.x, t1 | |
* r3 = MOV.i32 r2 | |
+ r4 = MOV.i32 fau.x | |
* NOP | |
+ r5 = MOV.i32 fau.x | |
800000000 3f8000002edbe6ff | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r5, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000040 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ NOP | |
* r3 = FMA.f32 r1, r3, r1 | |
+ NOP | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ NOP | |
* r3 = FMA.f32 r1, r3, r1 | |
+ NOP | |
id(0) nbb | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ NOP | |
* r3 = FMA.f32 r1, r3, r1 | |
+ NOP | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ NOP | |
* r3 = FMA.f32 r1, r3, r1 | |
+ NOP | |
id(0) nbb | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ NOP | |
* r3 = FMA.f32 r1, r3, r1 | |
+ NOP | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ NOP | |
* r3 = FMA.f32 r1, r3, r1 | |
+ NOP | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ NOP | |
* r3 = FMA.f32 r1, r3, r1 | |
+ NOP | |
* r4 = FMA.f32 r2, r4, r2 | |
+ NOP | |
* r1 = FMA.f32 r3, r1, r3 | |
+ NOP | |
* r2 = FMA.f32 r4, r2, r4 | |
+ r5 = IADD.s32 r5, fau.x | |
* r3 = FMA.f32 r1, r3, r1 | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) wait(0 ) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ NOP | |
* NOP | |
+ r0 = IADD.s32 fau.x, t0 | |
* r1 = FADD.f32 r2, r3 | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb r_uncond ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*NOP t0 | |
+U32_TO_F32 t1, t1 | |
*FMA.f32 r2:t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+MOV.i32 t1, 0x3f800000 /* 1.000000 */ | |
*NOP t0 | |
+FADD.f32 r1:t1, u1.w0, t1 | |
*MOV.i32 r3:t0, r2 | |
+MOV.i32 r4:t1, u1.w0 | |
*NOP t0 | |
+MOV.i32 r5:t1, #0.x | |
} | |
clause_6: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r5, 0x00000040 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_11 | |
} | |
clause_9: | |
ds(0) nbb next_store dwb(0) | |
{ | |
*NOP t0 | |
+JUMP t1, clause_36 | |
} | |
clause_11: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+NOP t1 | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+NOP t1 | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+NOP t1 | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+NOP t1 | |
} | |
clause_17: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+NOP t1 | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+NOP t1 | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+NOP t1 | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+NOP t1 | |
} | |
clause_23: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+NOP t1 | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+NOP t1 | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+NOP t1 | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+NOP t1 | |
} | |
clause_29: | |
ds(0) nbb r_uncond | |
{ | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+NOP t1 | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+NOP t1 | |
*FMA.f32 r4:t0, r2, r4, r2 | |
+NOP t1 | |
*FMA.f32 r1:t0, r3, r1, r3 | |
+NOP t1 | |
*FMA.f32 r2:t0, r4, r2, r4 | |
+IADD.s32 r5:t1, r5, 0x00000001 /* 0.000000 */ | |
*FMA.f32 r3:t0, r1, r3, r1 | |
+JUMP t1, clause_6 | |
} | |
clause_36: | |
ds(0) eos store | |
{ | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*FADD.f32 r1:t0, r2, r3 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
36483168 compute_sp_v2_float 23.489 GFLOPs 11.428ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v4 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE float | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 145 | |
void compute_sp_v4() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec4 x = vec4(_A, (_A+DATATYPE(1)), (_A+DATATYPE(2)), (_A+DATATYPE(3))); | |
vec4 y = vec4((float(id) * SCALE), (float(id) * SCALE), (float(id) * SCALE), (float(id) * SCALE)); | |
for(int i=0; i<32; i++) | |
{ | |
MAD_16(x, y); | |
} | |
ptr[id] = (y.x) + (y.y) + (y.z) + (y.w); | |
} | |
void main() {compute_sp_v4();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0x48ee6346, 0xd3c6fdf4, 0x78082e2c, 0xa4f173ed, 0x74ccc824} | |
name: GLSL6 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp float[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp float _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_7 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_8 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_9 = ishl ssa_7.y, ssa_8 | |
vec1 32 ssa_10 = iadd ssa_7.x, ssa_9 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_237 = insert_u16 ssa_7.z, ssa_1 | |
vec1 32 ssa_13 = iadd ssa_10, ssa_237 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_14 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_2 = load_const (0x3f800000 = 1.000000) | |
vec1 32 ssa_15 = fadd ssa_14, ssa_2 | |
vec1 32 ssa_3 = load_const (0x40000000 = 2.000000) | |
vec1 32 ssa_16 = fadd ssa_14, ssa_3 | |
vec1 32 ssa_4 = load_const (0x40400000 = 3.000000) | |
vec1 32 ssa_17 = fadd ssa_14, ssa_4 | |
vec1 32 ssa_18 = u2f32 ssa_13 | |
vec1 32 ssa_5 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_19 = fmul ssa_18, ssa_5 | |
vec1 32 ssa_6 = load_const (0x00000020 = 0.000000) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_20 = phi block_0: ssa_19, block_4: ssa_175 | |
vec1 32 ssa_21 = phi block_0: ssa_19, block_4: ssa_174 | |
vec1 32 ssa_22 = phi block_0: ssa_19, block_4: ssa_173 | |
vec1 32 ssa_23 = phi block_0: ssa_19, block_4: ssa_172 | |
vec1 32 ssa_24 = phi block_0: ssa_14, block_4: ssa_179 | |
vec1 32 ssa_25 = phi block_0: ssa_15, block_4: ssa_178 | |
vec1 32 ssa_26 = phi block_0: ssa_16, block_4: ssa_177 | |
vec1 32 ssa_27 = phi block_0: ssa_17, block_4: ssa_176 | |
vec1 32 ssa_28 = phi block_0: ssa_0, block_4: ssa_158 | |
vec1 32 ssa_29 = ige32 ssa_28, ssa_6 | |
/* succs: block_2 block_3 */ | |
if ssa_29 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_235 = ffma ssa_20, ssa_24, ssa_20 | |
vec1 32 ssa_234 = ffma ssa_21, ssa_25, ssa_21 | |
vec1 32 ssa_233 = ffma ssa_22, ssa_26, ssa_22 | |
vec1 32 ssa_232 = ffma ssa_23, ssa_27, ssa_23 | |
vec1 32 ssa_231 = ffma ssa_235, ssa_20, ssa_235 | |
vec1 32 ssa_230 = ffma ssa_234, ssa_21, ssa_234 | |
vec1 32 ssa_229 = ffma ssa_233, ssa_22, ssa_233 | |
vec1 32 ssa_228 = ffma ssa_232, ssa_23, ssa_232 | |
vec1 32 ssa_227 = ffma ssa_231, ssa_235, ssa_231 | |
vec1 32 ssa_226 = ffma ssa_230, ssa_234, ssa_230 | |
vec1 32 ssa_225 = ffma ssa_229, ssa_233, ssa_229 | |
vec1 32 ssa_224 = ffma ssa_228, ssa_232, ssa_228 | |
vec1 32 ssa_223 = ffma ssa_227, ssa_231, ssa_227 | |
vec1 32 ssa_222 = ffma ssa_226, ssa_230, ssa_226 | |
vec1 32 ssa_221 = ffma ssa_225, ssa_229, ssa_225 | |
vec1 32 ssa_220 = ffma ssa_224, ssa_228, ssa_224 | |
vec1 32 ssa_219 = ffma ssa_223, ssa_227, ssa_223 | |
vec1 32 ssa_218 = ffma ssa_222, ssa_226, ssa_222 | |
vec1 32 ssa_217 = ffma ssa_221, ssa_225, ssa_221 | |
vec1 32 ssa_216 = ffma ssa_220, ssa_224, ssa_220 | |
vec1 32 ssa_215 = ffma ssa_219, ssa_223, ssa_219 | |
vec1 32 ssa_214 = ffma ssa_218, ssa_222, ssa_218 | |
vec1 32 ssa_213 = ffma ssa_217, ssa_221, ssa_217 | |
vec1 32 ssa_212 = ffma ssa_216, ssa_220, ssa_216 | |
vec1 32 ssa_211 = ffma ssa_215, ssa_219, ssa_215 | |
vec1 32 ssa_210 = ffma ssa_214, ssa_218, ssa_214 | |
vec1 32 ssa_209 = ffma ssa_213, ssa_217, ssa_213 | |
vec1 32 ssa_208 = ffma ssa_212, ssa_216, ssa_212 | |
vec1 32 ssa_207 = ffma ssa_211, ssa_215, ssa_211 | |
vec1 32 ssa_206 = ffma ssa_210, ssa_214, ssa_210 | |
vec1 32 ssa_205 = ffma ssa_209, ssa_213, ssa_209 | |
vec1 32 ssa_204 = ffma ssa_208, ssa_212, ssa_208 | |
vec1 32 ssa_203 = ffma ssa_207, ssa_211, ssa_207 | |
vec1 32 ssa_202 = ffma ssa_206, ssa_210, ssa_206 | |
vec1 32 ssa_201 = ffma ssa_205, ssa_209, ssa_205 | |
vec1 32 ssa_200 = ffma ssa_204, ssa_208, ssa_204 | |
vec1 32 ssa_199 = ffma ssa_203, ssa_207, ssa_203 | |
vec1 32 ssa_198 = ffma ssa_202, ssa_206, ssa_202 | |
vec1 32 ssa_197 = ffma ssa_201, ssa_205, ssa_201 | |
vec1 32 ssa_196 = ffma ssa_200, ssa_204, ssa_200 | |
vec1 32 ssa_195 = ffma ssa_199, ssa_203, ssa_199 | |
vec1 32 ssa_194 = ffma ssa_198, ssa_202, ssa_198 | |
vec1 32 ssa_193 = ffma ssa_197, ssa_201, ssa_197 | |
vec1 32 ssa_192 = ffma ssa_196, ssa_200, ssa_196 | |
vec1 32 ssa_191 = ffma ssa_195, ssa_199, ssa_195 | |
vec1 32 ssa_190 = ffma ssa_194, ssa_198, ssa_194 | |
vec1 32 ssa_189 = ffma ssa_193, ssa_197, ssa_193 | |
vec1 32 ssa_188 = ffma ssa_192, ssa_196, ssa_192 | |
vec1 32 ssa_187 = ffma ssa_191, ssa_195, ssa_191 | |
vec1 32 ssa_186 = ffma ssa_190, ssa_194, ssa_190 | |
vec1 32 ssa_185 = ffma ssa_189, ssa_193, ssa_189 | |
vec1 32 ssa_184 = ffma ssa_188, ssa_192, ssa_188 | |
vec1 32 ssa_183 = ffma ssa_187, ssa_191, ssa_187 | |
vec1 32 ssa_182 = ffma ssa_186, ssa_190, ssa_186 | |
vec1 32 ssa_181 = ffma ssa_185, ssa_189, ssa_185 | |
vec1 32 ssa_180 = ffma ssa_184, ssa_188, ssa_184 | |
vec1 32 ssa_179 = ffma ssa_183, ssa_187, ssa_183 | |
vec1 32 ssa_178 = ffma ssa_182, ssa_186, ssa_182 | |
vec1 32 ssa_177 = ffma ssa_181, ssa_185, ssa_181 | |
vec1 32 ssa_176 = ffma ssa_180, ssa_184, ssa_180 | |
vec1 32 ssa_175 = ffma ssa_179, ssa_183, ssa_179 | |
vec1 32 ssa_174 = ffma ssa_178, ssa_182, ssa_178 | |
vec1 32 ssa_173 = ffma ssa_177, ssa_181, ssa_177 | |
vec1 32 ssa_172 = ffma ssa_176, ssa_180, ssa_176 | |
vec1 32 ssa_158 = iadd ssa_28, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_159 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_160 = ishl ssa_13, ssa_159 | |
vec1 32 ssa_161 = fadd ssa_20, ssa_21 | |
vec1 32 ssa_162 = fadd ssa_161, ssa_22 | |
vec1 32 ssa_163 = fadd ssa_162, ssa_23 | |
vec1 64 ssa_164 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_165 = unpack_64_2x32_split_x ssa_164 | |
vec1 32 ssa_166 = unpack_64_2x32_split_y ssa_164 | |
vec1 32 ssa_167 = iadd ssa_165, ssa_160 | |
vec1 32 ssa_168 = ult32 ssa_167, ssa_165 | |
vec1 32 ssa_169 = b2i32 ssa_168 | |
vec1 32 ssa_170 = iadd ssa_169, ssa_166 | |
vec1 64 ssa_171 = pack_64_2x32_split ssa_167, ssa_170 | |
intrinsic store_global (ssa_163, ssa_171) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
240 = MOV.i32 r62 | |
239 = MOV.i32 r61 | |
238 = MOV.i32 r60 | |
9 = LSHIFT_OR.i32 239, #0x0, #0x8.b0 | |
10 = IADD.s32 238, 9 | |
237 = MKVEC.v2i16 #0x0.h00, 240.h00 | |
13 = IADD.s32 10, 237 | |
15 = FADD.f32 u1, #0x3f800000 | |
16 = FADD.f32 u1, #0x40000000 | |
17 = FADD.f32 u1, #0x40400000 | |
18 = U32_TO_F32 13 | |
19 = FMA.f32 18, #0x2edbe6ff, #0x0.neg | |
} -> block1 | |
block1 { | |
20 = PHI 19, 175 | |
21 = PHI 19, 174 | |
22 = PHI 19, 173 | |
23 = PHI 19, 172 | |
24 = PHI u1, 179 | |
25 = PHI 15, 178 | |
26 = PHI 16, 177 | |
27 = PHI 17, 176 | |
28 = PHI #0x0, 158 | |
29 = ICMP.s32.m1.ge 28, #0x20 | |
BRANCHZ.i16.eq 29.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
235 = FMA.f32 20, 24, 20 | |
234 = FMA.f32 21, 25, 21 | |
233 = FMA.f32 22, 26, 22 | |
232 = FMA.f32 23, 27, 23 | |
231 = FMA.f32 235, 20, 235 | |
230 = FMA.f32 234, 21, 234 | |
229 = FMA.f32 233, 22, 233 | |
228 = FMA.f32 232, 23, 232 | |
227 = FMA.f32 231, 235, 231 | |
226 = FMA.f32 230, 234, 230 | |
225 = FMA.f32 229, 233, 229 | |
224 = FMA.f32 228, 232, 228 | |
223 = FMA.f32 227, 231, 227 | |
222 = FMA.f32 226, 230, 226 | |
221 = FMA.f32 225, 229, 225 | |
220 = FMA.f32 224, 228, 224 | |
219 = FMA.f32 223, 227, 223 | |
218 = FMA.f32 222, 226, 222 | |
217 = FMA.f32 221, 225, 221 | |
216 = FMA.f32 220, 224, 220 | |
215 = FMA.f32 219, 223, 219 | |
214 = FMA.f32 218, 222, 218 | |
213 = FMA.f32 217, 221, 217 | |
212 = FMA.f32 216, 220, 216 | |
211 = FMA.f32 215, 219, 215 | |
210 = FMA.f32 214, 218, 214 | |
209 = FMA.f32 213, 217, 213 | |
208 = FMA.f32 212, 216, 212 | |
207 = FMA.f32 211, 215, 211 | |
206 = FMA.f32 210, 214, 210 | |
205 = FMA.f32 209, 213, 209 | |
204 = FMA.f32 208, 212, 208 | |
203 = FMA.f32 207, 211, 207 | |
202 = FMA.f32 206, 210, 206 | |
201 = FMA.f32 205, 209, 205 | |
200 = FMA.f32 204, 208, 204 | |
199 = FMA.f32 203, 207, 203 | |
198 = FMA.f32 202, 206, 202 | |
197 = FMA.f32 201, 205, 201 | |
196 = FMA.f32 200, 204, 200 | |
195 = FMA.f32 199, 203, 199 | |
194 = FMA.f32 198, 202, 198 | |
193 = FMA.f32 197, 201, 197 | |
192 = FMA.f32 196, 200, 196 | |
191 = FMA.f32 195, 199, 195 | |
190 = FMA.f32 194, 198, 194 | |
189 = FMA.f32 193, 197, 193 | |
188 = FMA.f32 192, 196, 192 | |
187 = FMA.f32 191, 195, 191 | |
186 = FMA.f32 190, 194, 190 | |
185 = FMA.f32 189, 193, 189 | |
184 = FMA.f32 188, 192, 188 | |
183 = FMA.f32 187, 191, 187 | |
182 = FMA.f32 186, 190, 186 | |
181 = FMA.f32 185, 189, 185 | |
180 = FMA.f32 184, 188, 184 | |
179 = FMA.f32 183, 187, 183 | |
178 = FMA.f32 182, 186, 182 | |
177 = FMA.f32 181, 185, 181 | |
176 = FMA.f32 180, 184, 180 | |
175 = FMA.f32 179, 183, 179 | |
174 = FMA.f32 178, 182, 178 | |
173 = FMA.f32 177, 181, 177 | |
172 = FMA.f32 176, 180, 176 | |
158 = IADD.s32 28, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
160 = LSHIFT_OR.i32 13, #0x0, #0x2.b0 | |
161 = FADD.f32 20, 21 | |
162 = FADD.f32 161, 22 | |
163 = FADD.f32 162, 23 | |
167 = IADD.s32 u0, 160 | |
169 = ICMP.u32.i1.lt 167, u0 | |
170 = IADD.s32 169, u0[1] | |
STORE.i32 163, 167, 170, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x3f800000 | |
r1 = FADD.f32 u1, r1 | |
r2 = MOV.i32 #0x40000000 | |
r2 = FADD.f32 u1, r2 | |
r3 = MOV.i32 #0x40400000 | |
r3 = FADD.f32 u1, r3 | |
r4 = U32_TO_F32 r0 | |
r4 = FMA.f32 r4, #0x2edbe6ff, #0x0.neg | |
r5 = MOV.i32 r4 | |
r6 = MOV.i32 r4 | |
r7 = MOV.i32 r4 | |
r8 = MOV.i32 u1 | |
r9 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r10 = ICMP.s32.m1.ge r9, #0x20 | |
BRANCHZ.i16.eq r10.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r1 = FMA.f32 r5, r1, r5 | |
r2 = FMA.f32 r6, r2, r6 | |
r3 = FMA.f32 r7, r3, r7 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r1, r5, r1 | |
r6 = FMA.f32 r2, r6, r2 | |
r7 = FMA.f32 r3, r7, r3 | |
r9 = IADD.s32 r9, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = FADD.f32 r4, r5 | |
r1 = FADD.f32 r1, r6 | |
r1 = FADD.f32 r1, r7 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.y.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
800000000 | |
id(0) nbb r_uncond | |
* NOP | |
+ _.h00 = MOV.i32 fau.x | |
* NOP | |
+ r1 = FADD.f32 fau.x, t1 | |
* r2 = MOV.i32 fau.y | |
+ _.h00 = U32_TO_F32 r0 | |
* r4 = FMA.f32 t1, fau.x, t.neg | |
+ _.h00 = MOV.i32 fau.y | |
* r2 = FADD.f32 fau.x, r2 | |
+ r3 = FADD.f32 fau.x, t1 | |
* r5 = MOV.i32 r4 | |
+ r6 = MOV.i32 r4 | |
* r7 = MOV.i32 r4 | |
+ r8 = MOV.i32 fau.x | |
* NOP | |
+ r9 = MOV.i32 fau.x | |
404000002edbe6ff 400000003f800000 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r9, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000020 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ NOP | |
* r7 = FMA.f32 r3, r7, r3 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ NOP | |
* r7 = FMA.f32 r3, r7, r3 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ NOP | |
* r7 = FMA.f32 r3, r7, r3 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ NOP | |
* r7 = FMA.f32 r3, r7, r3 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ NOP | |
* r7 = FMA.f32 r3, r7, r3 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ NOP | |
* r7 = FMA.f32 r3, r7, r3 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ NOP | |
* r7 = FMA.f32 r3, r7, r3 | |
+ NOP | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r1 = FMA.f32 r5, r1, r5 | |
+ NOP | |
* r2 = FMA.f32 r6, r2, r6 | |
+ NOP | |
* r3 = FMA.f32 r7, r3, r7 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r1, r5, r1 | |
+ NOP | |
* r6 = FMA.f32 r2, r6, r2 | |
+ r9 = IADD.s32 r9, fau.x | |
* r7 = FMA.f32 r3, r7, r3 | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) wait(0 ) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ NOP | |
* _.h00 = FADD.f32 r4, r5 | |
+ r0 = IADD.s32 fau.x, t0 | |
* _.h00 = FADD.f32 t0, r6 | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* r1 = FADD.f32 t0, r7 | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
} | |
clause_3: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+MOV.i32 t1, 0x3f800000 /* 1.000000 */ | |
*NOP t0 | |
+FADD.f32 r1:t1, u1.w0, t1 | |
*MOV.i32 r2:t0, 0x40000000 /* 2.000000 */ | |
+U32_TO_F32 t1, r0 | |
*FMA.f32 r4:t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+MOV.i32 t1, 0x40400000 /* 3.000000 */ | |
*FADD.f32 r2:t0, u1.w0, r2 | |
+FADD.f32 r3:t1, u1.w0, t1 | |
*MOV.i32 r5:t0, r4 | |
+MOV.i32 r6:t1, r4 | |
*MOV.i32 r7:t0, r4 | |
+MOV.i32 r8:t1, u1.w0 | |
*NOP t0 | |
+MOV.i32 r9:t1, #0.x | |
} | |
clause_10: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r9, 0x00000020 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_15 | |
} | |
clause_13: | |
ds(0) nbb next_store dwb(0) | |
{ | |
*NOP t0 | |
+JUMP t1, clause_64 | |
} | |
clause_15: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+NOP t1 | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+NOP t1 | |
} | |
clause_21: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+NOP t1 | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+NOP t1 | |
} | |
clause_27: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+NOP t1 | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+NOP t1 | |
} | |
clause_33: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+NOP t1 | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+NOP t1 | |
} | |
clause_39: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+NOP t1 | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+NOP t1 | |
} | |
clause_45: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+NOP t1 | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+NOP t1 | |
} | |
clause_51: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+NOP t1 | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+NOP t1 | |
} | |
clause_57: | |
ds(0) nbb r_uncond | |
{ | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r1:t0, r5, r1, r5 | |
+NOP t1 | |
*FMA.f32 r2:t0, r6, r2, r6 | |
+NOP t1 | |
*FMA.f32 r3:t0, r7, r3, r7 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r1, r5, r1 | |
+NOP t1 | |
*FMA.f32 r6:t0, r2, r6, r2 | |
+IADD.s32 r9:t1, r9, 0x00000001 /* 0.000000 */ | |
*FMA.f32 r7:t0, r3, r7, r3 | |
+JUMP t1, clause_10 | |
} | |
clause_64: | |
ds(0) eos store | |
{ | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+NOP t1 | |
*FADD.f32 t0, r4, r5 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*FADD.f32 t0, t0, r6 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*FADD.f32 r1:t0, t0, r7 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
d1ede77f compute_sp_v4_float 24.112 GFLOPs 11.133ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v8 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE float | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 162 | |
void compute_sp_v8() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec8 x = VEC8(_A, (_A+DATATYPE(1)), (_A+DATATYPE(2)), (_A+DATATYPE(3)), (_A+DATATYPE(4)), (_A+DATATYPE(5)), (_A+DATATYPE(6)), (_A+DATATYPE(7))); | |
vec8 y = VEC8_S(DATATYPE(float(id) * SCALE)); | |
#undef mad | |
#define mad mad8 | |
for(int i=0; i<16; i++) | |
{ | |
MAD_16(x, y); | |
} | |
vec4 s = y.d0 + y.d1; | |
vec2 t = s.xy + s.zw; | |
ptr[id] = t.x + t.y; | |
} | |
void main() {compute_sp_v8();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0xe904ad44, 0x6f83e415, 0x6ab7fd0e, 0x33347baa, 0x854f8dc7} | |
name: GLSL8 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp float[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp float _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_11 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_12 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_13 = ishl ssa_11.y, ssa_12 | |
vec1 32 ssa_14 = iadd ssa_11.x, ssa_13 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_448 = insert_u16 ssa_11.z, ssa_1 | |
vec1 32 ssa_16 = iadd ssa_14, ssa_448 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_17 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_2 = load_const (0x3f800000 = 1.000000) | |
vec1 32 ssa_18 = fadd ssa_17, ssa_2 | |
vec1 32 ssa_3 = load_const (0x40000000 = 2.000000) | |
vec1 32 ssa_19 = fadd ssa_17, ssa_3 | |
vec1 32 ssa_4 = load_const (0x40400000 = 3.000000) | |
vec1 32 ssa_20 = fadd ssa_17, ssa_4 | |
vec1 32 ssa_5 = load_const (0x40800000 = 4.000000) | |
vec1 32 ssa_21 = fadd ssa_17, ssa_5 | |
vec1 32 ssa_6 = load_const (0x40a00000 = 5.000000) | |
vec1 32 ssa_22 = fadd ssa_17, ssa_6 | |
vec1 32 ssa_7 = load_const (0x40c00000 = 6.000000) | |
vec1 32 ssa_23 = fadd ssa_17, ssa_7 | |
vec1 32 ssa_8 = load_const (0x40e00000 = 7.000000) | |
vec1 32 ssa_24 = fadd ssa_17, ssa_8 | |
vec1 32 ssa_25 = u2f32 ssa_16 | |
vec1 32 ssa_9 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_26 = fmul ssa_25, ssa_9 | |
vec1 32 ssa_10 = load_const (0x00000010 = 0.000000) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_27 = phi block_0: ssa_26, block_4: ssa_322 | |
vec1 32 ssa_28 = phi block_0: ssa_26, block_4: ssa_321 | |
vec1 32 ssa_29 = phi block_0: ssa_26, block_4: ssa_320 | |
vec1 32 ssa_30 = phi block_0: ssa_26, block_4: ssa_319 | |
vec1 32 ssa_31 = phi block_0: ssa_26, block_4: ssa_326 | |
vec1 32 ssa_32 = phi block_0: ssa_26, block_4: ssa_325 | |
vec1 32 ssa_33 = phi block_0: ssa_26, block_4: ssa_324 | |
vec1 32 ssa_34 = phi block_0: ssa_26, block_4: ssa_323 | |
vec1 32 ssa_35 = phi block_0: ssa_21, block_4: ssa_330 | |
vec1 32 ssa_36 = phi block_0: ssa_22, block_4: ssa_329 | |
vec1 32 ssa_37 = phi block_0: ssa_23, block_4: ssa_328 | |
vec1 32 ssa_38 = phi block_0: ssa_24, block_4: ssa_327 | |
vec1 32 ssa_39 = phi block_0: ssa_17, block_4: ssa_334 | |
vec1 32 ssa_40 = phi block_0: ssa_18, block_4: ssa_333 | |
vec1 32 ssa_41 = phi block_0: ssa_19, block_4: ssa_332 | |
vec1 32 ssa_42 = phi block_0: ssa_20, block_4: ssa_331 | |
vec1 32 ssa_43 = phi block_0: ssa_0, block_4: ssa_301 | |
vec1 32 ssa_44 = ige32 ssa_43, ssa_10 | |
/* succs: block_2 block_3 */ | |
if ssa_44 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_446 = ffma ssa_31, ssa_39, ssa_31 | |
vec1 32 ssa_445 = ffma ssa_32, ssa_40, ssa_32 | |
vec1 32 ssa_444 = ffma ssa_33, ssa_41, ssa_33 | |
vec1 32 ssa_443 = ffma ssa_34, ssa_42, ssa_34 | |
vec1 32 ssa_442 = ffma ssa_27, ssa_35, ssa_27 | |
vec1 32 ssa_441 = ffma ssa_28, ssa_36, ssa_28 | |
vec1 32 ssa_440 = ffma ssa_29, ssa_37, ssa_29 | |
vec1 32 ssa_439 = ffma ssa_30, ssa_38, ssa_30 | |
vec1 32 ssa_438 = ffma ssa_446, ssa_31, ssa_446 | |
vec1 32 ssa_437 = ffma ssa_445, ssa_32, ssa_445 | |
vec1 32 ssa_436 = ffma ssa_444, ssa_33, ssa_444 | |
vec1 32 ssa_435 = ffma ssa_443, ssa_34, ssa_443 | |
vec1 32 ssa_434 = ffma ssa_442, ssa_27, ssa_442 | |
vec1 32 ssa_433 = ffma ssa_441, ssa_28, ssa_441 | |
vec1 32 ssa_432 = ffma ssa_440, ssa_29, ssa_440 | |
vec1 32 ssa_431 = ffma ssa_439, ssa_30, ssa_439 | |
vec1 32 ssa_430 = ffma ssa_438, ssa_446, ssa_438 | |
vec1 32 ssa_429 = ffma ssa_437, ssa_445, ssa_437 | |
vec1 32 ssa_428 = ffma ssa_436, ssa_444, ssa_436 | |
vec1 32 ssa_427 = ffma ssa_435, ssa_443, ssa_435 | |
vec1 32 ssa_426 = ffma ssa_434, ssa_442, ssa_434 | |
vec1 32 ssa_425 = ffma ssa_433, ssa_441, ssa_433 | |
vec1 32 ssa_424 = ffma ssa_432, ssa_440, ssa_432 | |
vec1 32 ssa_423 = ffma ssa_431, ssa_439, ssa_431 | |
vec1 32 ssa_422 = ffma ssa_430, ssa_438, ssa_430 | |
vec1 32 ssa_421 = ffma ssa_429, ssa_437, ssa_429 | |
vec1 32 ssa_420 = ffma ssa_428, ssa_436, ssa_428 | |
vec1 32 ssa_419 = ffma ssa_427, ssa_435, ssa_427 | |
vec1 32 ssa_418 = ffma ssa_426, ssa_434, ssa_426 | |
vec1 32 ssa_417 = ffma ssa_425, ssa_433, ssa_425 | |
vec1 32 ssa_416 = ffma ssa_424, ssa_432, ssa_424 | |
vec1 32 ssa_415 = ffma ssa_423, ssa_431, ssa_423 | |
vec1 32 ssa_414 = ffma ssa_422, ssa_430, ssa_422 | |
vec1 32 ssa_413 = ffma ssa_421, ssa_429, ssa_421 | |
vec1 32 ssa_412 = ffma ssa_420, ssa_428, ssa_420 | |
vec1 32 ssa_411 = ffma ssa_419, ssa_427, ssa_419 | |
vec1 32 ssa_410 = ffma ssa_418, ssa_426, ssa_418 | |
vec1 32 ssa_409 = ffma ssa_417, ssa_425, ssa_417 | |
vec1 32 ssa_408 = ffma ssa_416, ssa_424, ssa_416 | |
vec1 32 ssa_407 = ffma ssa_415, ssa_423, ssa_415 | |
vec1 32 ssa_406 = ffma ssa_414, ssa_422, ssa_414 | |
vec1 32 ssa_405 = ffma ssa_413, ssa_421, ssa_413 | |
vec1 32 ssa_404 = ffma ssa_412, ssa_420, ssa_412 | |
vec1 32 ssa_403 = ffma ssa_411, ssa_419, ssa_411 | |
vec1 32 ssa_402 = ffma ssa_410, ssa_418, ssa_410 | |
vec1 32 ssa_401 = ffma ssa_409, ssa_417, ssa_409 | |
vec1 32 ssa_400 = ffma ssa_408, ssa_416, ssa_408 | |
vec1 32 ssa_399 = ffma ssa_407, ssa_415, ssa_407 | |
vec1 32 ssa_398 = ffma ssa_406, ssa_414, ssa_406 | |
vec1 32 ssa_397 = ffma ssa_405, ssa_413, ssa_405 | |
vec1 32 ssa_396 = ffma ssa_404, ssa_412, ssa_404 | |
vec1 32 ssa_395 = ffma ssa_403, ssa_411, ssa_403 | |
vec1 32 ssa_394 = ffma ssa_402, ssa_410, ssa_402 | |
vec1 32 ssa_393 = ffma ssa_401, ssa_409, ssa_401 | |
vec1 32 ssa_392 = ffma ssa_400, ssa_408, ssa_400 | |
vec1 32 ssa_391 = ffma ssa_399, ssa_407, ssa_399 | |
vec1 32 ssa_390 = ffma ssa_398, ssa_406, ssa_398 | |
vec1 32 ssa_389 = ffma ssa_397, ssa_405, ssa_397 | |
vec1 32 ssa_388 = ffma ssa_396, ssa_404, ssa_396 | |
vec1 32 ssa_387 = ffma ssa_395, ssa_403, ssa_395 | |
vec1 32 ssa_386 = ffma ssa_394, ssa_402, ssa_394 | |
vec1 32 ssa_385 = ffma ssa_393, ssa_401, ssa_393 | |
vec1 32 ssa_384 = ffma ssa_392, ssa_400, ssa_392 | |
vec1 32 ssa_383 = ffma ssa_391, ssa_399, ssa_391 | |
vec1 32 ssa_382 = ffma ssa_390, ssa_398, ssa_390 | |
vec1 32 ssa_381 = ffma ssa_389, ssa_397, ssa_389 | |
vec1 32 ssa_380 = ffma ssa_388, ssa_396, ssa_388 | |
vec1 32 ssa_379 = ffma ssa_387, ssa_395, ssa_387 | |
vec1 32 ssa_378 = ffma ssa_386, ssa_394, ssa_386 | |
vec1 32 ssa_377 = ffma ssa_385, ssa_393, ssa_385 | |
vec1 32 ssa_376 = ffma ssa_384, ssa_392, ssa_384 | |
vec1 32 ssa_375 = ffma ssa_383, ssa_391, ssa_383 | |
vec1 32 ssa_374 = ffma ssa_382, ssa_390, ssa_382 | |
vec1 32 ssa_373 = ffma ssa_381, ssa_389, ssa_381 | |
vec1 32 ssa_372 = ffma ssa_380, ssa_388, ssa_380 | |
vec1 32 ssa_371 = ffma ssa_379, ssa_387, ssa_379 | |
vec1 32 ssa_370 = ffma ssa_378, ssa_386, ssa_378 | |
vec1 32 ssa_369 = ffma ssa_377, ssa_385, ssa_377 | |
vec1 32 ssa_368 = ffma ssa_376, ssa_384, ssa_376 | |
vec1 32 ssa_367 = ffma ssa_375, ssa_383, ssa_375 | |
vec1 32 ssa_366 = ffma ssa_374, ssa_382, ssa_374 | |
vec1 32 ssa_365 = ffma ssa_373, ssa_381, ssa_373 | |
vec1 32 ssa_364 = ffma ssa_372, ssa_380, ssa_372 | |
vec1 32 ssa_363 = ffma ssa_371, ssa_379, ssa_371 | |
vec1 32 ssa_362 = ffma ssa_370, ssa_378, ssa_370 | |
vec1 32 ssa_361 = ffma ssa_369, ssa_377, ssa_369 | |
vec1 32 ssa_360 = ffma ssa_368, ssa_376, ssa_368 | |
vec1 32 ssa_359 = ffma ssa_367, ssa_375, ssa_367 | |
vec1 32 ssa_358 = ffma ssa_366, ssa_374, ssa_366 | |
vec1 32 ssa_357 = ffma ssa_365, ssa_373, ssa_365 | |
vec1 32 ssa_356 = ffma ssa_364, ssa_372, ssa_364 | |
vec1 32 ssa_355 = ffma ssa_363, ssa_371, ssa_363 | |
vec1 32 ssa_354 = ffma ssa_362, ssa_370, ssa_362 | |
vec1 32 ssa_353 = ffma ssa_361, ssa_369, ssa_361 | |
vec1 32 ssa_352 = ffma ssa_360, ssa_368, ssa_360 | |
vec1 32 ssa_351 = ffma ssa_359, ssa_367, ssa_359 | |
vec1 32 ssa_350 = ffma ssa_358, ssa_366, ssa_358 | |
vec1 32 ssa_349 = ffma ssa_357, ssa_365, ssa_357 | |
vec1 32 ssa_348 = ffma ssa_356, ssa_364, ssa_356 | |
vec1 32 ssa_347 = ffma ssa_355, ssa_363, ssa_355 | |
vec1 32 ssa_346 = ffma ssa_354, ssa_362, ssa_354 | |
vec1 32 ssa_345 = ffma ssa_353, ssa_361, ssa_353 | |
vec1 32 ssa_344 = ffma ssa_352, ssa_360, ssa_352 | |
vec1 32 ssa_343 = ffma ssa_351, ssa_359, ssa_351 | |
vec1 32 ssa_342 = ffma ssa_350, ssa_358, ssa_350 | |
vec1 32 ssa_341 = ffma ssa_349, ssa_357, ssa_349 | |
vec1 32 ssa_340 = ffma ssa_348, ssa_356, ssa_348 | |
vec1 32 ssa_339 = ffma ssa_347, ssa_355, ssa_347 | |
vec1 32 ssa_338 = ffma ssa_346, ssa_354, ssa_346 | |
vec1 32 ssa_337 = ffma ssa_345, ssa_353, ssa_345 | |
vec1 32 ssa_336 = ffma ssa_344, ssa_352, ssa_344 | |
vec1 32 ssa_335 = ffma ssa_343, ssa_351, ssa_343 | |
vec1 32 ssa_334 = ffma ssa_342, ssa_350, ssa_342 | |
vec1 32 ssa_333 = ffma ssa_341, ssa_349, ssa_341 | |
vec1 32 ssa_332 = ffma ssa_340, ssa_348, ssa_340 | |
vec1 32 ssa_331 = ffma ssa_339, ssa_347, ssa_339 | |
vec1 32 ssa_330 = ffma ssa_338, ssa_346, ssa_338 | |
vec1 32 ssa_329 = ffma ssa_337, ssa_345, ssa_337 | |
vec1 32 ssa_328 = ffma ssa_336, ssa_344, ssa_336 | |
vec1 32 ssa_327 = ffma ssa_335, ssa_343, ssa_335 | |
vec1 32 ssa_326 = ffma ssa_334, ssa_342, ssa_334 | |
vec1 32 ssa_325 = ffma ssa_333, ssa_341, ssa_333 | |
vec1 32 ssa_324 = ffma ssa_332, ssa_340, ssa_332 | |
vec1 32 ssa_323 = ffma ssa_331, ssa_339, ssa_331 | |
vec1 32 ssa_322 = ffma ssa_330, ssa_338, ssa_330 | |
vec1 32 ssa_321 = ffma ssa_329, ssa_337, ssa_329 | |
vec1 32 ssa_320 = ffma ssa_328, ssa_336, ssa_328 | |
vec1 32 ssa_319 = ffma ssa_327, ssa_335, ssa_327 | |
vec1 32 ssa_301 = iadd ssa_43, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_302 = fadd ssa_31, ssa_27 | |
vec1 32 ssa_303 = fadd ssa_32, ssa_28 | |
vec1 32 ssa_304 = fadd ssa_33, ssa_29 | |
vec1 32 ssa_305 = fadd ssa_34, ssa_30 | |
vec1 32 ssa_306 = fadd ssa_302, ssa_304 | |
vec1 32 ssa_307 = fadd ssa_303, ssa_305 | |
vec1 32 ssa_308 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_309 = ishl ssa_16, ssa_308 | |
vec1 32 ssa_310 = fadd ssa_306, ssa_307 | |
vec1 64 ssa_311 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_312 = unpack_64_2x32_split_x ssa_311 | |
vec1 32 ssa_313 = unpack_64_2x32_split_y ssa_311 | |
vec1 32 ssa_314 = iadd ssa_312, ssa_309 | |
vec1 32 ssa_315 = ult32 ssa_314, ssa_312 | |
vec1 32 ssa_316 = b2i32 ssa_315 | |
vec1 32 ssa_317 = iadd ssa_316, ssa_313 | |
vec1 64 ssa_318 = pack_64_2x32_split ssa_314, ssa_317 | |
intrinsic store_global (ssa_310, ssa_318) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
451 = MOV.i32 r62 | |
450 = MOV.i32 r61 | |
449 = MOV.i32 r60 | |
13 = LSHIFT_OR.i32 450, #0x0, #0x8.b0 | |
14 = IADD.s32 449, 13 | |
448 = MKVEC.v2i16 #0x0.h00, 451.h00 | |
16 = IADD.s32 14, 448 | |
18 = FADD.f32 u1, #0x3f800000 | |
19 = FADD.f32 u1, #0x40000000 | |
20 = FADD.f32 u1, #0x40400000 | |
21 = FADD.f32 u1, #0x40800000 | |
22 = FADD.f32 u1, #0x40a00000 | |
23 = FADD.f32 u1, #0x40c00000 | |
24 = FADD.f32 u1, #0x40e00000 | |
25 = U32_TO_F32 16 | |
26 = FMA.f32 25, #0x2edbe6ff, #0x0.neg | |
} -> block1 | |
block1 { | |
27 = PHI 26, 322 | |
28 = PHI 26, 321 | |
29 = PHI 26, 320 | |
30 = PHI 26, 319 | |
31 = PHI 26, 326 | |
32 = PHI 26, 325 | |
33 = PHI 26, 324 | |
34 = PHI 26, 323 | |
35 = PHI 21, 330 | |
36 = PHI 22, 329 | |
37 = PHI 23, 328 | |
38 = PHI 24, 327 | |
39 = PHI u1, 334 | |
40 = PHI 18, 333 | |
41 = PHI 19, 332 | |
42 = PHI 20, 331 | |
43 = PHI #0x0, 301 | |
44 = ICMP.s32.m1.ge 43, #0x10 | |
BRANCHZ.i16.eq 44.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
446 = FMA.f32 31, 39, 31 | |
445 = FMA.f32 32, 40, 32 | |
444 = FMA.f32 33, 41, 33 | |
443 = FMA.f32 34, 42, 34 | |
442 = FMA.f32 27, 35, 27 | |
441 = FMA.f32 28, 36, 28 | |
440 = FMA.f32 29, 37, 29 | |
439 = FMA.f32 30, 38, 30 | |
438 = FMA.f32 446, 31, 446 | |
437 = FMA.f32 445, 32, 445 | |
436 = FMA.f32 444, 33, 444 | |
435 = FMA.f32 443, 34, 443 | |
434 = FMA.f32 442, 27, 442 | |
433 = FMA.f32 441, 28, 441 | |
432 = FMA.f32 440, 29, 440 | |
431 = FMA.f32 439, 30, 439 | |
430 = FMA.f32 438, 446, 438 | |
429 = FMA.f32 437, 445, 437 | |
428 = FMA.f32 436, 444, 436 | |
427 = FMA.f32 435, 443, 435 | |
426 = FMA.f32 434, 442, 434 | |
425 = FMA.f32 433, 441, 433 | |
424 = FMA.f32 432, 440, 432 | |
423 = FMA.f32 431, 439, 431 | |
422 = FMA.f32 430, 438, 430 | |
421 = FMA.f32 429, 437, 429 | |
420 = FMA.f32 428, 436, 428 | |
419 = FMA.f32 427, 435, 427 | |
418 = FMA.f32 426, 434, 426 | |
417 = FMA.f32 425, 433, 425 | |
416 = FMA.f32 424, 432, 424 | |
415 = FMA.f32 423, 431, 423 | |
414 = FMA.f32 422, 430, 422 | |
413 = FMA.f32 421, 429, 421 | |
412 = FMA.f32 420, 428, 420 | |
411 = FMA.f32 419, 427, 419 | |
410 = FMA.f32 418, 426, 418 | |
409 = FMA.f32 417, 425, 417 | |
408 = FMA.f32 416, 424, 416 | |
407 = FMA.f32 415, 423, 415 | |
406 = FMA.f32 414, 422, 414 | |
405 = FMA.f32 413, 421, 413 | |
404 = FMA.f32 412, 420, 412 | |
403 = FMA.f32 411, 419, 411 | |
402 = FMA.f32 410, 418, 410 | |
401 = FMA.f32 409, 417, 409 | |
400 = FMA.f32 408, 416, 408 | |
399 = FMA.f32 407, 415, 407 | |
398 = FMA.f32 406, 414, 406 | |
397 = FMA.f32 405, 413, 405 | |
396 = FMA.f32 404, 412, 404 | |
395 = FMA.f32 403, 411, 403 | |
394 = FMA.f32 402, 410, 402 | |
393 = FMA.f32 401, 409, 401 | |
392 = FMA.f32 400, 408, 400 | |
391 = FMA.f32 399, 407, 399 | |
390 = FMA.f32 398, 406, 398 | |
389 = FMA.f32 397, 405, 397 | |
388 = FMA.f32 396, 404, 396 | |
387 = FMA.f32 395, 403, 395 | |
386 = FMA.f32 394, 402, 394 | |
385 = FMA.f32 393, 401, 393 | |
384 = FMA.f32 392, 400, 392 | |
383 = FMA.f32 391, 399, 391 | |
382 = FMA.f32 390, 398, 390 | |
381 = FMA.f32 389, 397, 389 | |
380 = FMA.f32 388, 396, 388 | |
379 = FMA.f32 387, 395, 387 | |
378 = FMA.f32 386, 394, 386 | |
377 = FMA.f32 385, 393, 385 | |
376 = FMA.f32 384, 392, 384 | |
375 = FMA.f32 383, 391, 383 | |
374 = FMA.f32 382, 390, 382 | |
373 = FMA.f32 381, 389, 381 | |
372 = FMA.f32 380, 388, 380 | |
371 = FMA.f32 379, 387, 379 | |
370 = FMA.f32 378, 386, 378 | |
369 = FMA.f32 377, 385, 377 | |
368 = FMA.f32 376, 384, 376 | |
367 = FMA.f32 375, 383, 375 | |
366 = FMA.f32 374, 382, 374 | |
365 = FMA.f32 373, 381, 373 | |
364 = FMA.f32 372, 380, 372 | |
363 = FMA.f32 371, 379, 371 | |
362 = FMA.f32 370, 378, 370 | |
361 = FMA.f32 369, 377, 369 | |
360 = FMA.f32 368, 376, 368 | |
359 = FMA.f32 367, 375, 367 | |
358 = FMA.f32 366, 374, 366 | |
357 = FMA.f32 365, 373, 365 | |
356 = FMA.f32 364, 372, 364 | |
355 = FMA.f32 363, 371, 363 | |
354 = FMA.f32 362, 370, 362 | |
353 = FMA.f32 361, 369, 361 | |
352 = FMA.f32 360, 368, 360 | |
351 = FMA.f32 359, 367, 359 | |
350 = FMA.f32 358, 366, 358 | |
349 = FMA.f32 357, 365, 357 | |
348 = FMA.f32 356, 364, 356 | |
347 = FMA.f32 355, 363, 355 | |
346 = FMA.f32 354, 362, 354 | |
345 = FMA.f32 353, 361, 353 | |
344 = FMA.f32 352, 360, 352 | |
343 = FMA.f32 351, 359, 351 | |
342 = FMA.f32 350, 358, 350 | |
341 = FMA.f32 349, 357, 349 | |
340 = FMA.f32 348, 356, 348 | |
339 = FMA.f32 347, 355, 347 | |
338 = FMA.f32 346, 354, 346 | |
337 = FMA.f32 345, 353, 345 | |
336 = FMA.f32 344, 352, 344 | |
335 = FMA.f32 343, 351, 343 | |
334 = FMA.f32 342, 350, 342 | |
333 = FMA.f32 341, 349, 341 | |
332 = FMA.f32 340, 348, 340 | |
331 = FMA.f32 339, 347, 339 | |
330 = FMA.f32 338, 346, 338 | |
329 = FMA.f32 337, 345, 337 | |
328 = FMA.f32 336, 344, 336 | |
327 = FMA.f32 335, 343, 335 | |
326 = FMA.f32 334, 342, 334 | |
325 = FMA.f32 333, 341, 333 | |
324 = FMA.f32 332, 340, 332 | |
323 = FMA.f32 331, 339, 331 | |
322 = FMA.f32 330, 338, 330 | |
321 = FMA.f32 329, 337, 329 | |
320 = FMA.f32 328, 336, 328 | |
319 = FMA.f32 327, 335, 327 | |
301 = IADD.s32 43, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
302 = FADD.f32 31, 27 | |
303 = FADD.f32 32, 28 | |
304 = FADD.f32 33, 29 | |
305 = FADD.f32 34, 30 | |
306 = FADD.f32 302, 304 | |
307 = FADD.f32 303, 305 | |
309 = LSHIFT_OR.i32 16, #0x0, #0x2.b0 | |
310 = FADD.f32 306, 307 | |
314 = IADD.s32 u0, 309 | |
316 = ICMP.u32.i1.lt 314, u0 | |
317 = IADD.s32 316, u0[1] | |
STORE.i32 310, 314, 317, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x3f800000 | |
r1 = FADD.f32 u1, r1 | |
r2 = MOV.i32 #0x40000000 | |
r2 = FADD.f32 u1, r2 | |
r3 = MOV.i32 #0x40400000 | |
r3 = FADD.f32 u1, r3 | |
r4 = MOV.i32 #0x40800000 | |
r4 = FADD.f32 u1, r4 | |
r5 = MOV.i32 #0x40a00000 | |
r5 = FADD.f32 u1, r5 | |
r6 = MOV.i32 #0x40c00000 | |
r6 = FADD.f32 u1, r6 | |
r7 = MOV.i32 #0x40e00000 | |
r7 = FADD.f32 u1, r7 | |
r8 = U32_TO_F32 r0 | |
r8 = FMA.f32 r8, #0x2edbe6ff, #0x0.neg | |
r9 = MOV.i32 r8 | |
r10 = MOV.i32 r8 | |
r11 = MOV.i32 r8 | |
r12 = MOV.i32 r8 | |
r13 = MOV.i32 r8 | |
r14 = MOV.i32 r8 | |
r15 = MOV.i32 r8 | |
r48 = MOV.i32 u1 | |
r49 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r50 = ICMP.s32.m1.ge r49, #0x10 | |
BRANCHZ.i16.eq r50.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r48 = FMA.f32 r12, r48, r12 | |
r1 = FMA.f32 r13, r1, r13 | |
r2 = FMA.f32 r14, r2, r14 | |
r3 = FMA.f32 r15, r3, r15 | |
r4 = FMA.f32 r8, r4, r8 | |
r5 = FMA.f32 r9, r5, r9 | |
r6 = FMA.f32 r10, r6, r10 | |
r7 = FMA.f32 r11, r7, r11 | |
r12 = FMA.f32 r48, r12, r48 | |
r13 = FMA.f32 r1, r13, r1 | |
r14 = FMA.f32 r2, r14, r2 | |
r15 = FMA.f32 r3, r15, r3 | |
r8 = FMA.f32 r4, r8, r4 | |
r9 = FMA.f32 r5, r9, r5 | |
r10 = FMA.f32 r6, r10, r6 | |
r11 = FMA.f32 r7, r11, r7 | |
r49 = IADD.s32 r49, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r1 = FADD.f32 r12, r8 | |
r2 = FADD.f32 r13, r9 | |
r3 = FADD.f32 r14, r10 | |
r4 = FADD.f32 r15, r11 | |
r1 = FADD.f32 r1, r3 | |
r2 = FADD.f32 r2, r4 | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = FADD.f32 r1, r2 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.x.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r1 = FADD.f32 fau.x, t0 | |
+ r2 = FADD.f32 fau.x, t1 | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r3 = FADD.f32 fau.x, t0 | |
+ r4 = FADD.f32 fau.x, t1 | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* NOP | |
+ r5 = FADD.f32 fau.x, t1 | |
400000003f800000 4080000040400000 40a0000000000008 | |
id(0) nbb r_uncond | |
* r6 = MOV.i32 fau.y | |
+ _.h00 = U32_TO_F32 r0 | |
* r8 = FMA.f32 t1, fau.x, t.neg | |
+ _.h00 = MOV.i32 fau.y | |
* r6 = FADD.f32 fau.x, r6 | |
+ r7 = FADD.f32 fau.x, t1 | |
* r9 = MOV.i32 r8 | |
+ r10 = MOV.i32 r8 | |
* r11 = MOV.i32 r8 | |
+ r12 = MOV.i32 r8 | |
* r13 = MOV.i32 r8 | |
+ r14 = MOV.i32 r8 | |
* r15 = MOV.i32 r8 | |
+ r48 = MOV.i32 fau.x | |
* NOP | |
+ r49 = MOV.i32 fau.x | |
40e000002edbe6ff 40c0000000000000 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r49, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000010 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ NOP | |
* r11 = FMA.f32 r7, r11, r7 | |
+ NOP | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ NOP | |
* r11 = FMA.f32 r7, r11, r7 | |
+ NOP | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ NOP | |
* r11 = FMA.f32 r7, r11, r7 | |
+ NOP | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ NOP | |
* r11 = FMA.f32 r7, r11, r7 | |
+ NOP | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ NOP | |
* r11 = FMA.f32 r7, r11, r7 | |
+ NOP | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ NOP | |
* r11 = FMA.f32 r7, r11, r7 | |
+ NOP | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ NOP | |
* r11 = FMA.f32 r7, r11, r7 | |
+ NOP | |
id(0) nbb | |
* r48 = FMA.f32 r12, r48, r12 | |
+ NOP | |
* r1 = FMA.f32 r13, r1, r13 | |
+ NOP | |
* r2 = FMA.f32 r14, r2, r14 | |
+ NOP | |
* r3 = FMA.f32 r15, r3, r15 | |
+ NOP | |
* r4 = FMA.f32 r8, r4, r8 | |
+ NOP | |
* r5 = FMA.f32 r9, r5, r9 | |
+ NOP | |
* r6 = FMA.f32 r10, r6, r10 | |
+ NOP | |
* r7 = FMA.f32 r11, r7, r11 | |
+ NOP | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* r12 = FMA.f32 r48, r12, r48 | |
+ NOP | |
* r13 = FMA.f32 r1, r13, r1 | |
+ NOP | |
* r14 = FMA.f32 r2, r14, r2 | |
+ NOP | |
* r15 = FMA.f32 r3, r15, r3 | |
+ NOP | |
* r8 = FMA.f32 r4, r8, r4 | |
+ NOP | |
* r9 = FMA.f32 r5, r9, r5 | |
+ NOP | |
* r10 = FMA.f32 r6, r10, r6 | |
+ r49 = IADD.s32 r49, fau.x | |
* r11 = FMA.f32 r7, r11, r7 | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) nbb | |
* NOP | |
+ r1 = FADD.f32 r12, r8 | |
id(0) wait(0 ) nbb r_uncond | |
* NOP | |
+ r2 = FADD.f32 r13, r9 | |
* NOP | |
+ r3 = FADD.f32 r14, r10 | |
* NOP | |
+ r4 = FADD.f32 r15, r11 | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ r1 = FADD.f32 r1, r3 | |
* _.h00 = FADD.f32 r2, r4 | |
+ r0 = IADD.s32 fau.x, t0 | |
* r1 = FADD.f32 r1, t0 | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*MOV.i32 t0, 0x3f800000 /* 1.000000 */ | |
+MOV.i32 t1, 0x40000000 /* 2.000000 */ | |
*FADD.f32 r1:t0, u1.w0, t0 | |
+FADD.f32 r2:t1, u1.w0, t1 | |
*MOV.i32 t0, 0x40400000 /* 3.000000 */ | |
+MOV.i32 t1, 0x40800000 /* 4.000000 */ | |
*FADD.f32 r3:t0, u1.w0, t0 | |
+FADD.f32 r4:t1, u1.w0, t1 | |
*NOP t0 | |
+MOV.i32 t1, 0x40a00000 /* 5.000000 */ | |
*NOP t0 | |
+FADD.f32 r5:t1, u1.w0, t1 | |
} | |
clause_7: | |
ds(0) nbb r_uncond ncph | |
{ | |
*MOV.i32 r6:t0, 0x40c00000 /* 6.000000 */ | |
+U32_TO_F32 t1, r0 | |
*FMA.f32 r8:t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+MOV.i32 t1, 0x40e00000 /* 7.000000 */ | |
*FADD.f32 r6:t0, u1.w0, r6 | |
+FADD.f32 r7:t1, u1.w0, t1 | |
*MOV.i32 r9:t0, r8 | |
+MOV.i32 r10:t1, r8 | |
*MOV.i32 r11:t0, r8 | |
+MOV.i32 r12:t1, r8 | |
*MOV.i32 r13:t0, r8 | |
+MOV.i32 r14:t1, r8 | |
*MOV.i32 r15:t0, r8 | |
+MOV.i32 r48:t1, u1.w0 | |
*NOP t0 | |
+MOV.i32 r49:t1, #0.x | |
} | |
clause_14: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r49, 0x00000010 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_19 | |
} | |
clause_17: | |
ds(0) nbb | |
{ | |
*NOP t0 | |
+JUMP t1, clause_116 | |
} | |
clause_19: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_25: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+NOP t1 | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+NOP t1 | |
} | |
clause_31: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_37: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+NOP t1 | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+NOP t1 | |
} | |
clause_43: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_49: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+NOP t1 | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+NOP t1 | |
} | |
clause_55: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_61: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+NOP t1 | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+NOP t1 | |
} | |
clause_67: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_73: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+NOP t1 | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+NOP t1 | |
} | |
clause_79: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_85: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+NOP t1 | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+NOP t1 | |
} | |
clause_91: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_97: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+NOP t1 | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+NOP t1 | |
} | |
clause_103: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r48:t0, r12, r48, r12 | |
+NOP t1 | |
*FMA.f32 r1:t0, r13, r1, r13 | |
+NOP t1 | |
*FMA.f32 r2:t0, r14, r2, r14 | |
+NOP t1 | |
*FMA.f32 r3:t0, r15, r3, r15 | |
+NOP t1 | |
*FMA.f32 r4:t0, r8, r4, r8 | |
+NOP t1 | |
*FMA.f32 r5:t0, r9, r5, r9 | |
+NOP t1 | |
*FMA.f32 r6:t0, r10, r6, r10 | |
+NOP t1 | |
*FMA.f32 r7:t0, r11, r7, r11 | |
+NOP t1 | |
} | |
clause_109: | |
ds(0) nbb r_uncond | |
{ | |
*FMA.f32 r12:t0, r48, r12, r48 | |
+NOP t1 | |
*FMA.f32 r13:t0, r1, r13, r1 | |
+NOP t1 | |
*FMA.f32 r14:t0, r2, r14, r2 | |
+NOP t1 | |
*FMA.f32 r15:t0, r3, r15, r3 | |
+NOP t1 | |
*FMA.f32 r8:t0, r4, r8, r4 | |
+NOP t1 | |
*FMA.f32 r9:t0, r5, r9, r5 | |
+NOP t1 | |
*FMA.f32 r10:t0, r6, r10, r6 | |
+IADD.s32 r49:t1, r49, 0x00000001 /* 0.000000 */ | |
*FMA.f32 r11:t0, r7, r11, r7 | |
+JUMP t1, clause_14 | |
} | |
clause_116: | |
ds(0) nbb ncph next_store dwb(0) | |
{ | |
*NOP t0 | |
+FADD.f32 r1:t1, r12, r8 | |
} | |
clause_117: | |
ds(0) eos store | |
{ | |
*NOP t0 | |
+FADD.f32 r2:t1, r13, r9 | |
*NOP t0 | |
+FADD.f32 r3:t1, r14, r10 | |
*NOP t0 | |
+FADD.f32 r4:t1, r15, r11 | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+FADD.f32 r1:t1, r1, r3 | |
*FADD.f32 t0, r2, r4 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*FADD.f32 r1:t0, r1, t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
8a550d7d compute_sp_v8_float 24.268 GFLOPs 11.061ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v16 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE float | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 184 | |
void compute_sp_v16() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec16 x = VEC16(_A, (_A+DATATYPE(1)), (_A+DATATYPE(2)), (_A+DATATYPE(3)), (_A+DATATYPE(4)), (_A+DATATYPE(5)), (_A+DATATYPE(6)), (_A+DATATYPE(7)), | |
(_A+DATATYPE(8)), (_A+DATATYPE(9)), (_A+DATATYPE(10)), (_A+DATATYPE(11)), (_A+DATATYPE(12)), (_A+DATATYPE(13)), (_A+DATATYPE(14)), (_A+DATATYPE(15))); | |
vec16 y = VEC16_S(DATATYPE((float(id) * SCALE))); | |
#undef mad | |
#define mad mad16 | |
for(int i=0; i<8; i++) | |
{ | |
MAD_16(x, y); | |
} | |
vec8 u = VEC8_ADD(y.d0, y.d1); | |
vec4 s = u.d0 + u.d1; | |
vec2 t = s.xy + s.zw; | |
ptr[id] = t.x + t.y; | |
} | |
void main() {compute_sp_v16();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0x31891d4a, 0x859f0d4a, 0x14e16879, 0xa0b69f0b, 0x0d3a19ec} | |
name: GLSL10 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp float[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp float _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_19 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_18 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_20 = ishl ssa_19.y, ssa_18 | |
vec1 32 ssa_21 = iadd ssa_19.x, ssa_20 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_872 = insert_u16 ssa_19.z, ssa_1 | |
vec1 32 ssa_24 = iadd ssa_21, ssa_872 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_25 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_2 = load_const (0x3f800000 = 1.000000) | |
vec1 32 ssa_26 = fadd ssa_25, ssa_2 | |
vec1 32 ssa_3 = load_const (0x40000000 = 2.000000) | |
vec1 32 ssa_27 = fadd ssa_25, ssa_3 | |
vec1 32 ssa_4 = load_const (0x40400000 = 3.000000) | |
vec1 32 ssa_28 = fadd ssa_25, ssa_4 | |
vec1 32 ssa_5 = load_const (0x40800000 = 4.000000) | |
vec1 32 ssa_29 = fadd ssa_25, ssa_5 | |
vec1 32 ssa_6 = load_const (0x40a00000 = 5.000000) | |
vec1 32 ssa_30 = fadd ssa_25, ssa_6 | |
vec1 32 ssa_7 = load_const (0x40c00000 = 6.000000) | |
vec1 32 ssa_31 = fadd ssa_25, ssa_7 | |
vec1 32 ssa_8 = load_const (0x40e00000 = 7.000000) | |
vec1 32 ssa_32 = fadd ssa_25, ssa_8 | |
vec1 32 ssa_9 = load_const (0x41000000 = 8.000000) | |
vec1 32 ssa_33 = fadd ssa_25, ssa_9 | |
vec1 32 ssa_10 = load_const (0x41100000 = 9.000000) | |
vec1 32 ssa_34 = fadd ssa_25, ssa_10 | |
vec1 32 ssa_11 = load_const (0x41200000 = 10.000000) | |
vec1 32 ssa_35 = fadd ssa_25, ssa_11 | |
vec1 32 ssa_12 = load_const (0x41300000 = 11.000000) | |
vec1 32 ssa_36 = fadd ssa_25, ssa_12 | |
vec1 32 ssa_13 = load_const (0x41400000 = 12.000000) | |
vec1 32 ssa_37 = fadd ssa_25, ssa_13 | |
vec1 32 ssa_14 = load_const (0x41500000 = 13.000000) | |
vec1 32 ssa_38 = fadd ssa_25, ssa_14 | |
vec1 32 ssa_15 = load_const (0x41600000 = 14.000000) | |
vec1 32 ssa_39 = fadd ssa_25, ssa_15 | |
vec1 32 ssa_16 = load_const (0x41700000 = 15.000000) | |
vec1 32 ssa_40 = fadd ssa_25, ssa_16 | |
vec1 32 ssa_41 = u2f32 ssa_24 | |
vec1 32 ssa_17 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_42 = fmul ssa_41, ssa_17 | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_43 = phi block_0: ssa_42, block_4: ssa_618 | |
vec1 32 ssa_44 = phi block_0: ssa_42, block_4: ssa_617 | |
vec1 32 ssa_45 = phi block_0: ssa_42, block_4: ssa_616 | |
vec1 32 ssa_46 = phi block_0: ssa_42, block_4: ssa_615 | |
vec1 32 ssa_47 = phi block_0: ssa_42, block_4: ssa_622 | |
vec1 32 ssa_48 = phi block_0: ssa_42, block_4: ssa_621 | |
vec1 32 ssa_49 = phi block_0: ssa_42, block_4: ssa_620 | |
vec1 32 ssa_50 = phi block_0: ssa_42, block_4: ssa_619 | |
vec1 32 ssa_51 = phi block_0: ssa_42, block_4: ssa_626 | |
vec1 32 ssa_52 = phi block_0: ssa_42, block_4: ssa_625 | |
vec1 32 ssa_53 = phi block_0: ssa_42, block_4: ssa_624 | |
vec1 32 ssa_54 = phi block_0: ssa_42, block_4: ssa_623 | |
vec1 32 ssa_55 = phi block_0: ssa_42, block_4: ssa_630 | |
vec1 32 ssa_56 = phi block_0: ssa_42, block_4: ssa_629 | |
vec1 32 ssa_57 = phi block_0: ssa_42, block_4: ssa_628 | |
vec1 32 ssa_58 = phi block_0: ssa_42, block_4: ssa_627 | |
vec1 32 ssa_59 = phi block_0: ssa_37, block_4: ssa_634 | |
vec1 32 ssa_60 = phi block_0: ssa_38, block_4: ssa_633 | |
vec1 32 ssa_61 = phi block_0: ssa_39, block_4: ssa_632 | |
vec1 32 ssa_62 = phi block_0: ssa_40, block_4: ssa_631 | |
vec1 32 ssa_63 = phi block_0: ssa_33, block_4: ssa_638 | |
vec1 32 ssa_64 = phi block_0: ssa_34, block_4: ssa_637 | |
vec1 32 ssa_65 = phi block_0: ssa_35, block_4: ssa_636 | |
vec1 32 ssa_66 = phi block_0: ssa_36, block_4: ssa_635 | |
vec1 32 ssa_67 = phi block_0: ssa_29, block_4: ssa_642 | |
vec1 32 ssa_68 = phi block_0: ssa_30, block_4: ssa_641 | |
vec1 32 ssa_69 = phi block_0: ssa_31, block_4: ssa_640 | |
vec1 32 ssa_70 = phi block_0: ssa_32, block_4: ssa_639 | |
vec1 32 ssa_71 = phi block_0: ssa_25, block_4: ssa_646 | |
vec1 32 ssa_72 = phi block_0: ssa_26, block_4: ssa_645 | |
vec1 32 ssa_73 = phi block_0: ssa_27, block_4: ssa_644 | |
vec1 32 ssa_74 = phi block_0: ssa_28, block_4: ssa_643 | |
vec1 32 ssa_75 = phi block_0: ssa_0, block_4: ssa_589 | |
vec1 32 ssa_76 = ige32 ssa_75, ssa_18 | |
/* succs: block_2 block_3 */ | |
if ssa_76 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_870 = ffma ssa_55, ssa_71, ssa_55 | |
vec1 32 ssa_869 = ffma ssa_56, ssa_72, ssa_56 | |
vec1 32 ssa_868 = ffma ssa_57, ssa_73, ssa_57 | |
vec1 32 ssa_867 = ffma ssa_58, ssa_74, ssa_58 | |
vec1 32 ssa_866 = ffma ssa_51, ssa_67, ssa_51 | |
vec1 32 ssa_865 = ffma ssa_52, ssa_68, ssa_52 | |
vec1 32 ssa_864 = ffma ssa_53, ssa_69, ssa_53 | |
vec1 32 ssa_863 = ffma ssa_54, ssa_70, ssa_54 | |
vec1 32 ssa_862 = ffma ssa_47, ssa_63, ssa_47 | |
vec1 32 ssa_861 = ffma ssa_48, ssa_64, ssa_48 | |
vec1 32 ssa_860 = ffma ssa_49, ssa_65, ssa_49 | |
vec1 32 ssa_859 = ffma ssa_50, ssa_66, ssa_50 | |
vec1 32 ssa_858 = ffma ssa_43, ssa_59, ssa_43 | |
vec1 32 ssa_857 = ffma ssa_44, ssa_60, ssa_44 | |
vec1 32 ssa_856 = ffma ssa_45, ssa_61, ssa_45 | |
vec1 32 ssa_855 = ffma ssa_46, ssa_62, ssa_46 | |
vec1 32 ssa_854 = ffma ssa_870, ssa_55, ssa_870 | |
vec1 32 ssa_853 = ffma ssa_869, ssa_56, ssa_869 | |
vec1 32 ssa_852 = ffma ssa_868, ssa_57, ssa_868 | |
vec1 32 ssa_851 = ffma ssa_867, ssa_58, ssa_867 | |
vec1 32 ssa_850 = ffma ssa_866, ssa_51, ssa_866 | |
vec1 32 ssa_849 = ffma ssa_865, ssa_52, ssa_865 | |
vec1 32 ssa_848 = ffma ssa_864, ssa_53, ssa_864 | |
vec1 32 ssa_847 = ffma ssa_863, ssa_54, ssa_863 | |
vec1 32 ssa_846 = ffma ssa_862, ssa_47, ssa_862 | |
vec1 32 ssa_845 = ffma ssa_861, ssa_48, ssa_861 | |
vec1 32 ssa_844 = ffma ssa_860, ssa_49, ssa_860 | |
vec1 32 ssa_843 = ffma ssa_859, ssa_50, ssa_859 | |
vec1 32 ssa_842 = ffma ssa_858, ssa_43, ssa_858 | |
vec1 32 ssa_841 = ffma ssa_857, ssa_44, ssa_857 | |
vec1 32 ssa_840 = ffma ssa_856, ssa_45, ssa_856 | |
vec1 32 ssa_839 = ffma ssa_855, ssa_46, ssa_855 | |
vec1 32 ssa_838 = ffma ssa_854, ssa_870, ssa_854 | |
vec1 32 ssa_837 = ffma ssa_853, ssa_869, ssa_853 | |
vec1 32 ssa_836 = ffma ssa_852, ssa_868, ssa_852 | |
vec1 32 ssa_835 = ffma ssa_851, ssa_867, ssa_851 | |
vec1 32 ssa_834 = ffma ssa_850, ssa_866, ssa_850 | |
vec1 32 ssa_833 = ffma ssa_849, ssa_865, ssa_849 | |
vec1 32 ssa_832 = ffma ssa_848, ssa_864, ssa_848 | |
vec1 32 ssa_831 = ffma ssa_847, ssa_863, ssa_847 | |
vec1 32 ssa_830 = ffma ssa_846, ssa_862, ssa_846 | |
vec1 32 ssa_829 = ffma ssa_845, ssa_861, ssa_845 | |
vec1 32 ssa_828 = ffma ssa_844, ssa_860, ssa_844 | |
vec1 32 ssa_827 = ffma ssa_843, ssa_859, ssa_843 | |
vec1 32 ssa_826 = ffma ssa_842, ssa_858, ssa_842 | |
vec1 32 ssa_825 = ffma ssa_841, ssa_857, ssa_841 | |
vec1 32 ssa_824 = ffma ssa_840, ssa_856, ssa_840 | |
vec1 32 ssa_823 = ffma ssa_839, ssa_855, ssa_839 | |
vec1 32 ssa_822 = ffma ssa_838, ssa_854, ssa_838 | |
vec1 32 ssa_821 = ffma ssa_837, ssa_853, ssa_837 | |
vec1 32 ssa_820 = ffma ssa_836, ssa_852, ssa_836 | |
vec1 32 ssa_819 = ffma ssa_835, ssa_851, ssa_835 | |
vec1 32 ssa_818 = ffma ssa_834, ssa_850, ssa_834 | |
vec1 32 ssa_817 = ffma ssa_833, ssa_849, ssa_833 | |
vec1 32 ssa_816 = ffma ssa_832, ssa_848, ssa_832 | |
vec1 32 ssa_815 = ffma ssa_831, ssa_847, ssa_831 | |
vec1 32 ssa_814 = ffma ssa_830, ssa_846, ssa_830 | |
vec1 32 ssa_813 = ffma ssa_829, ssa_845, ssa_829 | |
vec1 32 ssa_812 = ffma ssa_828, ssa_844, ssa_828 | |
vec1 32 ssa_811 = ffma ssa_827, ssa_843, ssa_827 | |
vec1 32 ssa_810 = ffma ssa_826, ssa_842, ssa_826 | |
vec1 32 ssa_809 = ffma ssa_825, ssa_841, ssa_825 | |
vec1 32 ssa_808 = ffma ssa_824, ssa_840, ssa_824 | |
vec1 32 ssa_807 = ffma ssa_823, ssa_839, ssa_823 | |
vec1 32 ssa_806 = ffma ssa_822, ssa_838, ssa_822 | |
vec1 32 ssa_805 = ffma ssa_821, ssa_837, ssa_821 | |
vec1 32 ssa_804 = ffma ssa_820, ssa_836, ssa_820 | |
vec1 32 ssa_803 = ffma ssa_819, ssa_835, ssa_819 | |
vec1 32 ssa_802 = ffma ssa_818, ssa_834, ssa_818 | |
vec1 32 ssa_801 = ffma ssa_817, ssa_833, ssa_817 | |
vec1 32 ssa_800 = ffma ssa_816, ssa_832, ssa_816 | |
vec1 32 ssa_799 = ffma ssa_815, ssa_831, ssa_815 | |
vec1 32 ssa_798 = ffma ssa_814, ssa_830, ssa_814 | |
vec1 32 ssa_797 = ffma ssa_813, ssa_829, ssa_813 | |
vec1 32 ssa_796 = ffma ssa_812, ssa_828, ssa_812 | |
vec1 32 ssa_795 = ffma ssa_811, ssa_827, ssa_811 | |
vec1 32 ssa_794 = ffma ssa_810, ssa_826, ssa_810 | |
vec1 32 ssa_793 = ffma ssa_809, ssa_825, ssa_809 | |
vec1 32 ssa_792 = ffma ssa_808, ssa_824, ssa_808 | |
vec1 32 ssa_791 = ffma ssa_807, ssa_823, ssa_807 | |
vec1 32 ssa_790 = ffma ssa_806, ssa_822, ssa_806 | |
vec1 32 ssa_789 = ffma ssa_805, ssa_821, ssa_805 | |
vec1 32 ssa_788 = ffma ssa_804, ssa_820, ssa_804 | |
vec1 32 ssa_787 = ffma ssa_803, ssa_819, ssa_803 | |
vec1 32 ssa_786 = ffma ssa_802, ssa_818, ssa_802 | |
vec1 32 ssa_785 = ffma ssa_801, ssa_817, ssa_801 | |
vec1 32 ssa_784 = ffma ssa_800, ssa_816, ssa_800 | |
vec1 32 ssa_783 = ffma ssa_799, ssa_815, ssa_799 | |
vec1 32 ssa_782 = ffma ssa_798, ssa_814, ssa_798 | |
vec1 32 ssa_781 = ffma ssa_797, ssa_813, ssa_797 | |
vec1 32 ssa_780 = ffma ssa_796, ssa_812, ssa_796 | |
vec1 32 ssa_779 = ffma ssa_795, ssa_811, ssa_795 | |
vec1 32 ssa_778 = ffma ssa_794, ssa_810, ssa_794 | |
vec1 32 ssa_777 = ffma ssa_793, ssa_809, ssa_793 | |
vec1 32 ssa_776 = ffma ssa_792, ssa_808, ssa_792 | |
vec1 32 ssa_775 = ffma ssa_791, ssa_807, ssa_791 | |
vec1 32 ssa_774 = ffma ssa_790, ssa_806, ssa_790 | |
vec1 32 ssa_773 = ffma ssa_789, ssa_805, ssa_789 | |
vec1 32 ssa_772 = ffma ssa_788, ssa_804, ssa_788 | |
vec1 32 ssa_771 = ffma ssa_787, ssa_803, ssa_787 | |
vec1 32 ssa_770 = ffma ssa_786, ssa_802, ssa_786 | |
vec1 32 ssa_769 = ffma ssa_785, ssa_801, ssa_785 | |
vec1 32 ssa_768 = ffma ssa_784, ssa_800, ssa_784 | |
vec1 32 ssa_767 = ffma ssa_783, ssa_799, ssa_783 | |
vec1 32 ssa_766 = ffma ssa_782, ssa_798, ssa_782 | |
vec1 32 ssa_765 = ffma ssa_781, ssa_797, ssa_781 | |
vec1 32 ssa_764 = ffma ssa_780, ssa_796, ssa_780 | |
vec1 32 ssa_763 = ffma ssa_779, ssa_795, ssa_779 | |
vec1 32 ssa_762 = ffma ssa_778, ssa_794, ssa_778 | |
vec1 32 ssa_761 = ffma ssa_777, ssa_793, ssa_777 | |
vec1 32 ssa_760 = ffma ssa_776, ssa_792, ssa_776 | |
vec1 32 ssa_759 = ffma ssa_775, ssa_791, ssa_775 | |
vec1 32 ssa_758 = ffma ssa_774, ssa_790, ssa_774 | |
vec1 32 ssa_757 = ffma ssa_773, ssa_789, ssa_773 | |
vec1 32 ssa_756 = ffma ssa_772, ssa_788, ssa_772 | |
vec1 32 ssa_755 = ffma ssa_771, ssa_787, ssa_771 | |
vec1 32 ssa_754 = ffma ssa_770, ssa_786, ssa_770 | |
vec1 32 ssa_753 = ffma ssa_769, ssa_785, ssa_769 | |
vec1 32 ssa_752 = ffma ssa_768, ssa_784, ssa_768 | |
vec1 32 ssa_751 = ffma ssa_767, ssa_783, ssa_767 | |
vec1 32 ssa_750 = ffma ssa_766, ssa_782, ssa_766 | |
vec1 32 ssa_749 = ffma ssa_765, ssa_781, ssa_765 | |
vec1 32 ssa_748 = ffma ssa_764, ssa_780, ssa_764 | |
vec1 32 ssa_747 = ffma ssa_763, ssa_779, ssa_763 | |
vec1 32 ssa_746 = ffma ssa_762, ssa_778, ssa_762 | |
vec1 32 ssa_745 = ffma ssa_761, ssa_777, ssa_761 | |
vec1 32 ssa_744 = ffma ssa_760, ssa_776, ssa_760 | |
vec1 32 ssa_743 = ffma ssa_759, ssa_775, ssa_759 | |
vec1 32 ssa_742 = ffma ssa_758, ssa_774, ssa_758 | |
vec1 32 ssa_741 = ffma ssa_757, ssa_773, ssa_757 | |
vec1 32 ssa_740 = ffma ssa_756, ssa_772, ssa_756 | |
vec1 32 ssa_739 = ffma ssa_755, ssa_771, ssa_755 | |
vec1 32 ssa_738 = ffma ssa_754, ssa_770, ssa_754 | |
vec1 32 ssa_737 = ffma ssa_753, ssa_769, ssa_753 | |
vec1 32 ssa_736 = ffma ssa_752, ssa_768, ssa_752 | |
vec1 32 ssa_735 = ffma ssa_751, ssa_767, ssa_751 | |
vec1 32 ssa_734 = ffma ssa_750, ssa_766, ssa_750 | |
vec1 32 ssa_733 = ffma ssa_749, ssa_765, ssa_749 | |
vec1 32 ssa_732 = ffma ssa_748, ssa_764, ssa_748 | |
vec1 32 ssa_731 = ffma ssa_747, ssa_763, ssa_747 | |
vec1 32 ssa_730 = ffma ssa_746, ssa_762, ssa_746 | |
vec1 32 ssa_729 = ffma ssa_745, ssa_761, ssa_745 | |
vec1 32 ssa_728 = ffma ssa_744, ssa_760, ssa_744 | |
vec1 32 ssa_727 = ffma ssa_743, ssa_759, ssa_743 | |
vec1 32 ssa_726 = ffma ssa_742, ssa_758, ssa_742 | |
vec1 32 ssa_725 = ffma ssa_741, ssa_757, ssa_741 | |
vec1 32 ssa_724 = ffma ssa_740, ssa_756, ssa_740 | |
vec1 32 ssa_723 = ffma ssa_739, ssa_755, ssa_739 | |
vec1 32 ssa_722 = ffma ssa_738, ssa_754, ssa_738 | |
vec1 32 ssa_721 = ffma ssa_737, ssa_753, ssa_737 | |
vec1 32 ssa_720 = ffma ssa_736, ssa_752, ssa_736 | |
vec1 32 ssa_719 = ffma ssa_735, ssa_751, ssa_735 | |
vec1 32 ssa_718 = ffma ssa_734, ssa_750, ssa_734 | |
vec1 32 ssa_717 = ffma ssa_733, ssa_749, ssa_733 | |
vec1 32 ssa_716 = ffma ssa_732, ssa_748, ssa_732 | |
vec1 32 ssa_715 = ffma ssa_731, ssa_747, ssa_731 | |
vec1 32 ssa_714 = ffma ssa_730, ssa_746, ssa_730 | |
vec1 32 ssa_713 = ffma ssa_729, ssa_745, ssa_729 | |
vec1 32 ssa_712 = ffma ssa_728, ssa_744, ssa_728 | |
vec1 32 ssa_711 = ffma ssa_727, ssa_743, ssa_727 | |
vec1 32 ssa_710 = ffma ssa_726, ssa_742, ssa_726 | |
vec1 32 ssa_709 = ffma ssa_725, ssa_741, ssa_725 | |
vec1 32 ssa_708 = ffma ssa_724, ssa_740, ssa_724 | |
vec1 32 ssa_707 = ffma ssa_723, ssa_739, ssa_723 | |
vec1 32 ssa_706 = ffma ssa_722, ssa_738, ssa_722 | |
vec1 32 ssa_705 = ffma ssa_721, ssa_737, ssa_721 | |
vec1 32 ssa_704 = ffma ssa_720, ssa_736, ssa_720 | |
vec1 32 ssa_703 = ffma ssa_719, ssa_735, ssa_719 | |
vec1 32 ssa_702 = ffma ssa_718, ssa_734, ssa_718 | |
vec1 32 ssa_701 = ffma ssa_717, ssa_733, ssa_717 | |
vec1 32 ssa_700 = ffma ssa_716, ssa_732, ssa_716 | |
vec1 32 ssa_699 = ffma ssa_715, ssa_731, ssa_715 | |
vec1 32 ssa_698 = ffma ssa_714, ssa_730, ssa_714 | |
vec1 32 ssa_697 = ffma ssa_713, ssa_729, ssa_713 | |
vec1 32 ssa_696 = ffma ssa_712, ssa_728, ssa_712 | |
vec1 32 ssa_695 = ffma ssa_711, ssa_727, ssa_711 | |
vec1 32 ssa_694 = ffma ssa_710, ssa_726, ssa_710 | |
vec1 32 ssa_693 = ffma ssa_709, ssa_725, ssa_709 | |
vec1 32 ssa_692 = ffma ssa_708, ssa_724, ssa_708 | |
vec1 32 ssa_691 = ffma ssa_707, ssa_723, ssa_707 | |
vec1 32 ssa_690 = ffma ssa_706, ssa_722, ssa_706 | |
vec1 32 ssa_689 = ffma ssa_705, ssa_721, ssa_705 | |
vec1 32 ssa_688 = ffma ssa_704, ssa_720, ssa_704 | |
vec1 32 ssa_687 = ffma ssa_703, ssa_719, ssa_703 | |
vec1 32 ssa_686 = ffma ssa_702, ssa_718, ssa_702 | |
vec1 32 ssa_685 = ffma ssa_701, ssa_717, ssa_701 | |
vec1 32 ssa_684 = ffma ssa_700, ssa_716, ssa_700 | |
vec1 32 ssa_683 = ffma ssa_699, ssa_715, ssa_699 | |
vec1 32 ssa_682 = ffma ssa_698, ssa_714, ssa_698 | |
vec1 32 ssa_681 = ffma ssa_697, ssa_713, ssa_697 | |
vec1 32 ssa_680 = ffma ssa_696, ssa_712, ssa_696 | |
vec1 32 ssa_679 = ffma ssa_695, ssa_711, ssa_695 | |
vec1 32 ssa_678 = ffma ssa_694, ssa_710, ssa_694 | |
vec1 32 ssa_677 = ffma ssa_693, ssa_709, ssa_693 | |
vec1 32 ssa_676 = ffma ssa_692, ssa_708, ssa_692 | |
vec1 32 ssa_675 = ffma ssa_691, ssa_707, ssa_691 | |
vec1 32 ssa_674 = ffma ssa_690, ssa_706, ssa_690 | |
vec1 32 ssa_673 = ffma ssa_689, ssa_705, ssa_689 | |
vec1 32 ssa_672 = ffma ssa_688, ssa_704, ssa_688 | |
vec1 32 ssa_671 = ffma ssa_687, ssa_703, ssa_687 | |
vec1 32 ssa_670 = ffma ssa_686, ssa_702, ssa_686 | |
vec1 32 ssa_669 = ffma ssa_685, ssa_701, ssa_685 | |
vec1 32 ssa_668 = ffma ssa_684, ssa_700, ssa_684 | |
vec1 32 ssa_667 = ffma ssa_683, ssa_699, ssa_683 | |
vec1 32 ssa_666 = ffma ssa_682, ssa_698, ssa_682 | |
vec1 32 ssa_665 = ffma ssa_681, ssa_697, ssa_681 | |
vec1 32 ssa_664 = ffma ssa_680, ssa_696, ssa_680 | |
vec1 32 ssa_663 = ffma ssa_679, ssa_695, ssa_679 | |
vec1 32 ssa_662 = ffma ssa_678, ssa_694, ssa_678 | |
vec1 32 ssa_661 = ffma ssa_677, ssa_693, ssa_677 | |
vec1 32 ssa_660 = ffma ssa_676, ssa_692, ssa_676 | |
vec1 32 ssa_659 = ffma ssa_675, ssa_691, ssa_675 | |
vec1 32 ssa_658 = ffma ssa_674, ssa_690, ssa_674 | |
vec1 32 ssa_657 = ffma ssa_673, ssa_689, ssa_673 | |
vec1 32 ssa_656 = ffma ssa_672, ssa_688, ssa_672 | |
vec1 32 ssa_655 = ffma ssa_671, ssa_687, ssa_671 | |
vec1 32 ssa_654 = ffma ssa_670, ssa_686, ssa_670 | |
vec1 32 ssa_653 = ffma ssa_669, ssa_685, ssa_669 | |
vec1 32 ssa_652 = ffma ssa_668, ssa_684, ssa_668 | |
vec1 32 ssa_651 = ffma ssa_667, ssa_683, ssa_667 | |
vec1 32 ssa_650 = ffma ssa_666, ssa_682, ssa_666 | |
vec1 32 ssa_649 = ffma ssa_665, ssa_681, ssa_665 | |
vec1 32 ssa_648 = ffma ssa_664, ssa_680, ssa_664 | |
vec1 32 ssa_647 = ffma ssa_663, ssa_679, ssa_663 | |
vec1 32 ssa_646 = ffma ssa_662, ssa_678, ssa_662 | |
vec1 32 ssa_645 = ffma ssa_661, ssa_677, ssa_661 | |
vec1 32 ssa_644 = ffma ssa_660, ssa_676, ssa_660 | |
vec1 32 ssa_643 = ffma ssa_659, ssa_675, ssa_659 | |
vec1 32 ssa_642 = ffma ssa_658, ssa_674, ssa_658 | |
vec1 32 ssa_641 = ffma ssa_657, ssa_673, ssa_657 | |
vec1 32 ssa_640 = ffma ssa_656, ssa_672, ssa_656 | |
vec1 32 ssa_639 = ffma ssa_655, ssa_671, ssa_655 | |
vec1 32 ssa_638 = ffma ssa_654, ssa_670, ssa_654 | |
vec1 32 ssa_637 = ffma ssa_653, ssa_669, ssa_653 | |
vec1 32 ssa_636 = ffma ssa_652, ssa_668, ssa_652 | |
vec1 32 ssa_635 = ffma ssa_651, ssa_667, ssa_651 | |
vec1 32 ssa_634 = ffma ssa_650, ssa_666, ssa_650 | |
vec1 32 ssa_633 = ffma ssa_649, ssa_665, ssa_649 | |
vec1 32 ssa_632 = ffma ssa_648, ssa_664, ssa_648 | |
vec1 32 ssa_631 = ffma ssa_647, ssa_663, ssa_647 | |
vec1 32 ssa_630 = ffma ssa_646, ssa_662, ssa_646 | |
vec1 32 ssa_629 = ffma ssa_645, ssa_661, ssa_645 | |
vec1 32 ssa_628 = ffma ssa_644, ssa_660, ssa_644 | |
vec1 32 ssa_627 = ffma ssa_643, ssa_659, ssa_643 | |
vec1 32 ssa_626 = ffma ssa_642, ssa_658, ssa_642 | |
vec1 32 ssa_625 = ffma ssa_641, ssa_657, ssa_641 | |
vec1 32 ssa_624 = ffma ssa_640, ssa_656, ssa_640 | |
vec1 32 ssa_623 = ffma ssa_639, ssa_655, ssa_639 | |
vec1 32 ssa_622 = ffma ssa_638, ssa_654, ssa_638 | |
vec1 32 ssa_621 = ffma ssa_637, ssa_653, ssa_637 | |
vec1 32 ssa_620 = ffma ssa_636, ssa_652, ssa_636 | |
vec1 32 ssa_619 = ffma ssa_635, ssa_651, ssa_635 | |
vec1 32 ssa_618 = ffma ssa_634, ssa_650, ssa_634 | |
vec1 32 ssa_617 = ffma ssa_633, ssa_649, ssa_633 | |
vec1 32 ssa_616 = ffma ssa_632, ssa_648, ssa_632 | |
vec1 32 ssa_615 = ffma ssa_631, ssa_647, ssa_631 | |
vec1 32 ssa_589 = iadd ssa_75, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_590 = fadd ssa_55, ssa_47 | |
vec1 32 ssa_591 = fadd ssa_56, ssa_48 | |
vec1 32 ssa_592 = fadd ssa_57, ssa_49 | |
vec1 32 ssa_593 = fadd ssa_58, ssa_50 | |
vec1 32 ssa_594 = fadd ssa_51, ssa_43 | |
vec1 32 ssa_595 = fadd ssa_52, ssa_44 | |
vec1 32 ssa_596 = fadd ssa_53, ssa_45 | |
vec1 32 ssa_597 = fadd ssa_54, ssa_46 | |
vec1 32 ssa_598 = fadd ssa_590, ssa_594 | |
vec1 32 ssa_599 = fadd ssa_591, ssa_595 | |
vec1 32 ssa_600 = fadd ssa_592, ssa_596 | |
vec1 32 ssa_601 = fadd ssa_593, ssa_597 | |
vec1 32 ssa_602 = fadd ssa_598, ssa_600 | |
vec1 32 ssa_603 = fadd ssa_599, ssa_601 | |
vec1 32 ssa_604 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_605 = ishl ssa_24, ssa_604 | |
vec1 32 ssa_606 = fadd ssa_602, ssa_603 | |
vec1 64 ssa_607 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_608 = unpack_64_2x32_split_x ssa_607 | |
vec1 32 ssa_609 = unpack_64_2x32_split_y ssa_607 | |
vec1 32 ssa_610 = iadd ssa_608, ssa_605 | |
vec1 32 ssa_611 = ult32 ssa_610, ssa_608 | |
vec1 32 ssa_612 = b2i32 ssa_611 | |
vec1 32 ssa_613 = iadd ssa_612, ssa_609 | |
vec1 64 ssa_614 = pack_64_2x32_split ssa_610, ssa_613 | |
intrinsic store_global (ssa_606, ssa_614) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
875 = MOV.i32 r62 | |
874 = MOV.i32 r61 | |
873 = MOV.i32 r60 | |
20 = LSHIFT_OR.i32 874, #0x0, #0x8.b0 | |
21 = IADD.s32 873, 20 | |
872 = MKVEC.v2i16 #0x0.h00, 875.h00 | |
24 = IADD.s32 21, 872 | |
26 = FADD.f32 u1, #0x3f800000 | |
27 = FADD.f32 u1, #0x40000000 | |
28 = FADD.f32 u1, #0x40400000 | |
29 = FADD.f32 u1, #0x40800000 | |
30 = FADD.f32 u1, #0x40a00000 | |
31 = FADD.f32 u1, #0x40c00000 | |
32 = FADD.f32 u1, #0x40e00000 | |
33 = FADD.f32 u1, #0x41000000 | |
34 = FADD.f32 u1, #0x41100000 | |
35 = FADD.f32 u1, #0x41200000 | |
36 = FADD.f32 u1, #0x41300000 | |
37 = FADD.f32 u1, #0x41400000 | |
38 = FADD.f32 u1, #0x41500000 | |
39 = FADD.f32 u1, #0x41600000 | |
40 = FADD.f32 u1, #0x41700000 | |
41 = U32_TO_F32 24 | |
42 = FMA.f32 41, #0x2edbe6ff, #0x0.neg | |
} -> block1 | |
block1 { | |
43 = PHI 42, 618 | |
44 = PHI 42, 617 | |
45 = PHI 42, 616 | |
46 = PHI 42, 615 | |
47 = PHI 42, 622 | |
48 = PHI 42, 621 | |
49 = PHI 42, 620 | |
50 = PHI 42, 619 | |
51 = PHI 42, 626 | |
52 = PHI 42, 625 | |
53 = PHI 42, 624 | |
54 = PHI 42, 623 | |
55 = PHI 42, 630 | |
56 = PHI 42, 629 | |
57 = PHI 42, 628 | |
58 = PHI 42, 627 | |
59 = PHI 37, 634 | |
60 = PHI 38, 633 | |
61 = PHI 39, 632 | |
62 = PHI 40, 631 | |
63 = PHI 33, 638 | |
64 = PHI 34, 637 | |
65 = PHI 35, 636 | |
66 = PHI 36, 635 | |
67 = PHI 29, 642 | |
68 = PHI 30, 641 | |
69 = PHI 31, 640 | |
70 = PHI 32, 639 | |
71 = PHI u1, 646 | |
72 = PHI 26, 645 | |
73 = PHI 27, 644 | |
74 = PHI 28, 643 | |
75 = PHI #0x0, 589 | |
76 = ICMP.s32.m1.ge 75, #0x8 | |
BRANCHZ.i16.eq 76.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
870 = FMA.f32 55, 71, 55 | |
869 = FMA.f32 56, 72, 56 | |
868 = FMA.f32 57, 73, 57 | |
867 = FMA.f32 58, 74, 58 | |
866 = FMA.f32 51, 67, 51 | |
865 = FMA.f32 52, 68, 52 | |
864 = FMA.f32 53, 69, 53 | |
863 = FMA.f32 54, 70, 54 | |
862 = FMA.f32 47, 63, 47 | |
861 = FMA.f32 48, 64, 48 | |
860 = FMA.f32 49, 65, 49 | |
859 = FMA.f32 50, 66, 50 | |
858 = FMA.f32 43, 59, 43 | |
857 = FMA.f32 44, 60, 44 | |
856 = FMA.f32 45, 61, 45 | |
855 = FMA.f32 46, 62, 46 | |
854 = FMA.f32 870, 55, 870 | |
853 = FMA.f32 869, 56, 869 | |
852 = FMA.f32 868, 57, 868 | |
851 = FMA.f32 867, 58, 867 | |
850 = FMA.f32 866, 51, 866 | |
849 = FMA.f32 865, 52, 865 | |
848 = FMA.f32 864, 53, 864 | |
847 = FMA.f32 863, 54, 863 | |
846 = FMA.f32 862, 47, 862 | |
845 = FMA.f32 861, 48, 861 | |
844 = FMA.f32 860, 49, 860 | |
843 = FMA.f32 859, 50, 859 | |
842 = FMA.f32 858, 43, 858 | |
841 = FMA.f32 857, 44, 857 | |
840 = FMA.f32 856, 45, 856 | |
839 = FMA.f32 855, 46, 855 | |
838 = FMA.f32 854, 870, 854 | |
837 = FMA.f32 853, 869, 853 | |
836 = FMA.f32 852, 868, 852 | |
835 = FMA.f32 851, 867, 851 | |
834 = FMA.f32 850, 866, 850 | |
833 = FMA.f32 849, 865, 849 | |
832 = FMA.f32 848, 864, 848 | |
831 = FMA.f32 847, 863, 847 | |
830 = FMA.f32 846, 862, 846 | |
829 = FMA.f32 845, 861, 845 | |
828 = FMA.f32 844, 860, 844 | |
827 = FMA.f32 843, 859, 843 | |
826 = FMA.f32 842, 858, 842 | |
825 = FMA.f32 841, 857, 841 | |
824 = FMA.f32 840, 856, 840 | |
823 = FMA.f32 839, 855, 839 | |
822 = FMA.f32 838, 854, 838 | |
821 = FMA.f32 837, 853, 837 | |
820 = FMA.f32 836, 852, 836 | |
819 = FMA.f32 835, 851, 835 | |
818 = FMA.f32 834, 850, 834 | |
817 = FMA.f32 833, 849, 833 | |
816 = FMA.f32 832, 848, 832 | |
815 = FMA.f32 831, 847, 831 | |
814 = FMA.f32 830, 846, 830 | |
813 = FMA.f32 829, 845, 829 | |
812 = FMA.f32 828, 844, 828 | |
811 = FMA.f32 827, 843, 827 | |
810 = FMA.f32 826, 842, 826 | |
809 = FMA.f32 825, 841, 825 | |
808 = FMA.f32 824, 840, 824 | |
807 = FMA.f32 823, 839, 823 | |
806 = FMA.f32 822, 838, 822 | |
805 = FMA.f32 821, 837, 821 | |
804 = FMA.f32 820, 836, 820 | |
803 = FMA.f32 819, 835, 819 | |
802 = FMA.f32 818, 834, 818 | |
801 = FMA.f32 817, 833, 817 | |
800 = FMA.f32 816, 832, 816 | |
799 = FMA.f32 815, 831, 815 | |
798 = FMA.f32 814, 830, 814 | |
797 = FMA.f32 813, 829, 813 | |
796 = FMA.f32 812, 828, 812 | |
795 = FMA.f32 811, 827, 811 | |
794 = FMA.f32 810, 826, 810 | |
793 = FMA.f32 809, 825, 809 | |
792 = FMA.f32 808, 824, 808 | |
791 = FMA.f32 807, 823, 807 | |
790 = FMA.f32 806, 822, 806 | |
789 = FMA.f32 805, 821, 805 | |
788 = FMA.f32 804, 820, 804 | |
787 = FMA.f32 803, 819, 803 | |
786 = FMA.f32 802, 818, 802 | |
785 = FMA.f32 801, 817, 801 | |
784 = FMA.f32 800, 816, 800 | |
783 = FMA.f32 799, 815, 799 | |
782 = FMA.f32 798, 814, 798 | |
781 = FMA.f32 797, 813, 797 | |
780 = FMA.f32 796, 812, 796 | |
779 = FMA.f32 795, 811, 795 | |
778 = FMA.f32 794, 810, 794 | |
777 = FMA.f32 793, 809, 793 | |
776 = FMA.f32 792, 808, 792 | |
775 = FMA.f32 791, 807, 791 | |
774 = FMA.f32 790, 806, 790 | |
773 = FMA.f32 789, 805, 789 | |
772 = FMA.f32 788, 804, 788 | |
771 = FMA.f32 787, 803, 787 | |
770 = FMA.f32 786, 802, 786 | |
769 = FMA.f32 785, 801, 785 | |
768 = FMA.f32 784, 800, 784 | |
767 = FMA.f32 783, 799, 783 | |
766 = FMA.f32 782, 798, 782 | |
765 = FMA.f32 781, 797, 781 | |
764 = FMA.f32 780, 796, 780 | |
763 = FMA.f32 779, 795, 779 | |
762 = FMA.f32 778, 794, 778 | |
761 = FMA.f32 777, 793, 777 | |
760 = FMA.f32 776, 792, 776 | |
759 = FMA.f32 775, 791, 775 | |
758 = FMA.f32 774, 790, 774 | |
757 = FMA.f32 773, 789, 773 | |
756 = FMA.f32 772, 788, 772 | |
755 = FMA.f32 771, 787, 771 | |
754 = FMA.f32 770, 786, 770 | |
753 = FMA.f32 769, 785, 769 | |
752 = FMA.f32 768, 784, 768 | |
751 = FMA.f32 767, 783, 767 | |
750 = FMA.f32 766, 782, 766 | |
749 = FMA.f32 765, 781, 765 | |
748 = FMA.f32 764, 780, 764 | |
747 = FMA.f32 763, 779, 763 | |
746 = FMA.f32 762, 778, 762 | |
745 = FMA.f32 761, 777, 761 | |
744 = FMA.f32 760, 776, 760 | |
743 = FMA.f32 759, 775, 759 | |
742 = FMA.f32 758, 774, 758 | |
741 = FMA.f32 757, 773, 757 | |
740 = FMA.f32 756, 772, 756 | |
739 = FMA.f32 755, 771, 755 | |
738 = FMA.f32 754, 770, 754 | |
737 = FMA.f32 753, 769, 753 | |
736 = FMA.f32 752, 768, 752 | |
735 = FMA.f32 751, 767, 751 | |
734 = FMA.f32 750, 766, 750 | |
733 = FMA.f32 749, 765, 749 | |
732 = FMA.f32 748, 764, 748 | |
731 = FMA.f32 747, 763, 747 | |
730 = FMA.f32 746, 762, 746 | |
729 = FMA.f32 745, 761, 745 | |
728 = FMA.f32 744, 760, 744 | |
727 = FMA.f32 743, 759, 743 | |
726 = FMA.f32 742, 758, 742 | |
725 = FMA.f32 741, 757, 741 | |
724 = FMA.f32 740, 756, 740 | |
723 = FMA.f32 739, 755, 739 | |
722 = FMA.f32 738, 754, 738 | |
721 = FMA.f32 737, 753, 737 | |
720 = FMA.f32 736, 752, 736 | |
719 = FMA.f32 735, 751, 735 | |
718 = FMA.f32 734, 750, 734 | |
717 = FMA.f32 733, 749, 733 | |
716 = FMA.f32 732, 748, 732 | |
715 = FMA.f32 731, 747, 731 | |
714 = FMA.f32 730, 746, 730 | |
713 = FMA.f32 729, 745, 729 | |
712 = FMA.f32 728, 744, 728 | |
711 = FMA.f32 727, 743, 727 | |
710 = FMA.f32 726, 742, 726 | |
709 = FMA.f32 725, 741, 725 | |
708 = FMA.f32 724, 740, 724 | |
707 = FMA.f32 723, 739, 723 | |
706 = FMA.f32 722, 738, 722 | |
705 = FMA.f32 721, 737, 721 | |
704 = FMA.f32 720, 736, 720 | |
703 = FMA.f32 719, 735, 719 | |
702 = FMA.f32 718, 734, 718 | |
701 = FMA.f32 717, 733, 717 | |
700 = FMA.f32 716, 732, 716 | |
699 = FMA.f32 715, 731, 715 | |
698 = FMA.f32 714, 730, 714 | |
697 = FMA.f32 713, 729, 713 | |
696 = FMA.f32 712, 728, 712 | |
695 = FMA.f32 711, 727, 711 | |
694 = FMA.f32 710, 726, 710 | |
693 = FMA.f32 709, 725, 709 | |
692 = FMA.f32 708, 724, 708 | |
691 = FMA.f32 707, 723, 707 | |
690 = FMA.f32 706, 722, 706 | |
689 = FMA.f32 705, 721, 705 | |
688 = FMA.f32 704, 720, 704 | |
687 = FMA.f32 703, 719, 703 | |
686 = FMA.f32 702, 718, 702 | |
685 = FMA.f32 701, 717, 701 | |
684 = FMA.f32 700, 716, 700 | |
683 = FMA.f32 699, 715, 699 | |
682 = FMA.f32 698, 714, 698 | |
681 = FMA.f32 697, 713, 697 | |
680 = FMA.f32 696, 712, 696 | |
679 = FMA.f32 695, 711, 695 | |
678 = FMA.f32 694, 710, 694 | |
677 = FMA.f32 693, 709, 693 | |
676 = FMA.f32 692, 708, 692 | |
675 = FMA.f32 691, 707, 691 | |
674 = FMA.f32 690, 706, 690 | |
673 = FMA.f32 689, 705, 689 | |
672 = FMA.f32 688, 704, 688 | |
671 = FMA.f32 687, 703, 687 | |
670 = FMA.f32 686, 702, 686 | |
669 = FMA.f32 685, 701, 685 | |
668 = FMA.f32 684, 700, 684 | |
667 = FMA.f32 683, 699, 683 | |
666 = FMA.f32 682, 698, 682 | |
665 = FMA.f32 681, 697, 681 | |
664 = FMA.f32 680, 696, 680 | |
663 = FMA.f32 679, 695, 679 | |
662 = FMA.f32 678, 694, 678 | |
661 = FMA.f32 677, 693, 677 | |
660 = FMA.f32 676, 692, 676 | |
659 = FMA.f32 675, 691, 675 | |
658 = FMA.f32 674, 690, 674 | |
657 = FMA.f32 673, 689, 673 | |
656 = FMA.f32 672, 688, 672 | |
655 = FMA.f32 671, 687, 671 | |
654 = FMA.f32 670, 686, 670 | |
653 = FMA.f32 669, 685, 669 | |
652 = FMA.f32 668, 684, 668 | |
651 = FMA.f32 667, 683, 667 | |
650 = FMA.f32 666, 682, 666 | |
649 = FMA.f32 665, 681, 665 | |
648 = FMA.f32 664, 680, 664 | |
647 = FMA.f32 663, 679, 663 | |
646 = FMA.f32 662, 678, 662 | |
645 = FMA.f32 661, 677, 661 | |
644 = FMA.f32 660, 676, 660 | |
643 = FMA.f32 659, 675, 659 | |
642 = FMA.f32 658, 674, 658 | |
641 = FMA.f32 657, 673, 657 | |
640 = FMA.f32 656, 672, 656 | |
639 = FMA.f32 655, 671, 655 | |
638 = FMA.f32 654, 670, 654 | |
637 = FMA.f32 653, 669, 653 | |
636 = FMA.f32 652, 668, 652 | |
635 = FMA.f32 651, 667, 651 | |
634 = FMA.f32 650, 666, 650 | |
633 = FMA.f32 649, 665, 649 | |
632 = FMA.f32 648, 664, 648 | |
631 = FMA.f32 647, 663, 647 | |
630 = FMA.f32 646, 662, 646 | |
629 = FMA.f32 645, 661, 645 | |
628 = FMA.f32 644, 660, 644 | |
627 = FMA.f32 643, 659, 643 | |
626 = FMA.f32 642, 658, 642 | |
625 = FMA.f32 641, 657, 641 | |
624 = FMA.f32 640, 656, 640 | |
623 = FMA.f32 639, 655, 639 | |
622 = FMA.f32 638, 654, 638 | |
621 = FMA.f32 637, 653, 637 | |
620 = FMA.f32 636, 652, 636 | |
619 = FMA.f32 635, 651, 635 | |
618 = FMA.f32 634, 650, 634 | |
617 = FMA.f32 633, 649, 633 | |
616 = FMA.f32 632, 648, 632 | |
615 = FMA.f32 631, 647, 631 | |
589 = IADD.s32 75, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
590 = FADD.f32 55, 47 | |
591 = FADD.f32 56, 48 | |
592 = FADD.f32 57, 49 | |
593 = FADD.f32 58, 50 | |
594 = FADD.f32 51, 43 | |
595 = FADD.f32 52, 44 | |
596 = FADD.f32 53, 45 | |
597 = FADD.f32 54, 46 | |
598 = FADD.f32 590, 594 | |
599 = FADD.f32 591, 595 | |
600 = FADD.f32 592, 596 | |
601 = FADD.f32 593, 597 | |
602 = FADD.f32 598, 600 | |
603 = FADD.f32 599, 601 | |
605 = LSHIFT_OR.i32 24, #0x0, #0x2.b0 | |
606 = FADD.f32 602, 603 | |
610 = IADD.s32 u0, 605 | |
612 = ICMP.u32.i1.lt 610, u0 | |
613 = IADD.s32 612, u0[1] | |
STORE.i32 606, 610, 613, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x3f800000 | |
r1 = FADD.f32 u1, r1 | |
r2 = MOV.i32 #0x40000000 | |
r2 = FADD.f32 u1, r2 | |
r3 = MOV.i32 #0x40400000 | |
r3 = FADD.f32 u1, r3 | |
r4 = MOV.i32 #0x40800000 | |
r4 = FADD.f32 u1, r4 | |
r5 = MOV.i32 #0x40a00000 | |
r5 = FADD.f32 u1, r5 | |
r6 = MOV.i32 #0x40c00000 | |
r6 = FADD.f32 u1, r6 | |
r7 = MOV.i32 #0x40e00000 | |
r7 = FADD.f32 u1, r7 | |
r8 = MOV.i32 #0x41000000 | |
r8 = FADD.f32 u1, r8 | |
r9 = MOV.i32 #0x41100000 | |
r9 = FADD.f32 u1, r9 | |
r10 = MOV.i32 #0x41200000 | |
r10 = FADD.f32 u1, r10 | |
r11 = MOV.i32 #0x41300000 | |
r11 = FADD.f32 u1, r11 | |
r12 = MOV.i32 #0x41400000 | |
r12 = FADD.f32 u1, r12 | |
r13 = MOV.i32 #0x41500000 | |
r13 = FADD.f32 u1, r13 | |
r14 = MOV.i32 #0x41600000 | |
r14 = FADD.f32 u1, r14 | |
r15 = MOV.i32 #0x41700000 | |
r15 = FADD.f32 u1, r15 | |
r16 = U32_TO_F32 r0 | |
r16 = FMA.f32 r16, #0x2edbe6ff, #0x0.neg | |
r17 = MOV.i32 r16 | |
r18 = MOV.i32 r16 | |
r19 = MOV.i32 r16 | |
r20 = MOV.i32 r16 | |
r21 = MOV.i32 r16 | |
r22 = MOV.i32 r16 | |
r23 = MOV.i32 r16 | |
r24 = MOV.i32 r16 | |
r25 = MOV.i32 r16 | |
r26 = MOV.i32 r16 | |
r27 = MOV.i32 r16 | |
r28 = MOV.i32 r16 | |
r29 = MOV.i32 r16 | |
r30 = MOV.i32 r16 | |
r31 = MOV.i32 r16 | |
r32 = MOV.i32 u1 | |
r33 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r34 = ICMP.s32.m1.ge r33, #0x8 | |
BRANCHZ.i16.eq r34.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r32 = FMA.f32 r28, r32, r28 | |
r1 = FMA.f32 r29, r1, r29 | |
r2 = FMA.f32 r30, r2, r30 | |
r3 = FMA.f32 r31, r3, r31 | |
r4 = FMA.f32 r24, r4, r24 | |
r5 = FMA.f32 r25, r5, r25 | |
r6 = FMA.f32 r26, r6, r26 | |
r7 = FMA.f32 r27, r7, r27 | |
r8 = FMA.f32 r20, r8, r20 | |
r9 = FMA.f32 r21, r9, r21 | |
r10 = FMA.f32 r22, r10, r22 | |
r11 = FMA.f32 r23, r11, r23 | |
r12 = FMA.f32 r16, r12, r16 | |
r13 = FMA.f32 r17, r13, r17 | |
r14 = FMA.f32 r18, r14, r18 | |
r15 = FMA.f32 r19, r15, r19 | |
r28 = FMA.f32 r32, r28, r32 | |
r29 = FMA.f32 r1, r29, r1 | |
r30 = FMA.f32 r2, r30, r2 | |
r31 = FMA.f32 r3, r31, r3 | |
r24 = FMA.f32 r4, r24, r4 | |
r25 = FMA.f32 r5, r25, r5 | |
r26 = FMA.f32 r6, r26, r6 | |
r27 = FMA.f32 r7, r27, r7 | |
r20 = FMA.f32 r8, r20, r8 | |
r21 = FMA.f32 r9, r21, r9 | |
r22 = FMA.f32 r10, r22, r10 | |
r23 = FMA.f32 r11, r23, r11 | |
r16 = FMA.f32 r12, r16, r12 | |
r17 = FMA.f32 r13, r17, r13 | |
r18 = FMA.f32 r14, r18, r14 | |
r19 = FMA.f32 r15, r19, r15 | |
r33 = IADD.s32 r33, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r1 = FADD.f32 r28, r20 | |
r2 = FADD.f32 r29, r21 | |
r3 = FADD.f32 r30, r22 | |
r4 = FADD.f32 r31, r23 | |
r5 = FADD.f32 r24, r16 | |
r6 = FADD.f32 r25, r17 | |
r7 = FADD.f32 r26, r18 | |
r8 = FADD.f32 r27, r19 | |
r1 = FADD.f32 r1, r5 | |
r2 = FADD.f32 r2, r6 | |
r3 = FADD.f32 r3, r7 | |
r4 = FADD.f32 r4, r8 | |
r1 = FADD.f32 r1, r3 | |
r2 = FADD.f32 r2, r4 | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = FADD.f32 r1, r2 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.x.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r1 = FADD.f32 fau.x, t0 | |
+ r2 = FADD.f32 fau.x, t1 | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* NOP | |
+ r3 = FADD.f32 fau.x, t1 | |
400000003f800000 4040000000000008 | |
id(0) nbb | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r4 = FADD.f32 fau.x, t0 | |
+ r5 = FADD.f32 fau.x, t1 | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r6 = FADD.f32 fau.x, t0 | |
+ r7 = FADD.f32 fau.x, t1 | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r8 = FADD.f32 fau.x, t0 | |
+ r9 = FADD.f32 fau.x, t1 | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* NOP | |
+ r10 = FADD.f32 fau.x, t1 | |
40a0000040800000 40e0000040c00000 4110000041000000 4120000000000000 | |
id(0) nbb | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r11 = FADD.f32 fau.x, t0 | |
+ r12 = FADD.f32 fau.x, t1 | |
* _.h00 = MOV.i32 fau.x | |
+ _.h00 = MOV.i32 fau.y | |
* r13 = FADD.f32 fau.x, t0 | |
+ r14 = FADD.f32 fau.x, t1 | |
* NOP | |
+ _.h00 = MOV.i32 fau.y | |
* r15 = FADD.f32 fau.x, t1 | |
+ _.h00 = U32_TO_F32 r0 | |
* r16 = FMA.f32 t1, fau.x, t.neg | |
+ r17 = MOV.i32 t | |
* NOP | |
+ r18 = MOV.i32 t0 | |
4140000041300000 4160000041500000 417000002edbe6ff | |
id(0) nbb r_uncond | |
* r19 = MOV.i32 r16 | |
+ r20 = MOV.i32 r16 | |
* r21 = MOV.i32 r16 | |
+ r22 = MOV.i32 r16 | |
* r23 = MOV.i32 r16 | |
+ r24 = MOV.i32 r16 | |
* r25 = MOV.i32 r16 | |
+ r26 = MOV.i32 r16 | |
* r27 = MOV.i32 r16 | |
+ r28 = MOV.i32 r16 | |
* r29 = MOV.i32 r16 | |
+ r30 = MOV.i32 r16 | |
* r31 = MOV.i32 r16 | |
+ r32 = MOV.i32 fau.x | |
* NOP | |
+ r33 = MOV.i32 fau.x | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r33, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000008 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ NOP | |
* r19 = FMA.f32 r15, r19, r15 | |
+ NOP | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ NOP | |
* r19 = FMA.f32 r15, r19, r15 | |
+ NOP | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ NOP | |
* r19 = FMA.f32 r15, r19, r15 | |
+ NOP | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ NOP | |
* r19 = FMA.f32 r15, r19, r15 | |
+ NOP | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ NOP | |
* r19 = FMA.f32 r15, r19, r15 | |
+ NOP | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ NOP | |
* r19 = FMA.f32 r15, r19, r15 | |
+ NOP | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ NOP | |
* r19 = FMA.f32 r15, r19, r15 | |
+ NOP | |
id(0) nbb | |
* r32 = FMA.f32 r28, r32, r28 | |
+ NOP | |
* r1 = FMA.f32 r29, r1, r29 | |
+ NOP | |
* r2 = FMA.f32 r30, r2, r30 | |
+ NOP | |
* r3 = FMA.f32 r31, r3, r31 | |
+ NOP | |
* r4 = FMA.f32 r24, r4, r24 | |
+ NOP | |
* r5 = FMA.f32 r25, r5, r25 | |
+ NOP | |
* r6 = FMA.f32 r26, r6, r26 | |
+ NOP | |
* r7 = FMA.f32 r27, r7, r27 | |
+ NOP | |
id(0) nbb | |
* r8 = FMA.f32 r20, r8, r20 | |
+ NOP | |
* r9 = FMA.f32 r21, r9, r21 | |
+ NOP | |
* r10 = FMA.f32 r22, r10, r22 | |
+ NOP | |
* r11 = FMA.f32 r23, r11, r23 | |
+ NOP | |
* r12 = FMA.f32 r16, r12, r16 | |
+ NOP | |
* r13 = FMA.f32 r17, r13, r17 | |
+ NOP | |
* r14 = FMA.f32 r18, r14, r18 | |
+ NOP | |
* r15 = FMA.f32 r19, r15, r19 | |
+ NOP | |
id(0) nbb | |
* r28 = FMA.f32 r32, r28, r32 | |
+ NOP | |
* r29 = FMA.f32 r1, r29, r1 | |
+ NOP | |
* r30 = FMA.f32 r2, r30, r2 | |
+ NOP | |
* r31 = FMA.f32 r3, r31, r3 | |
+ NOP | |
* r24 = FMA.f32 r4, r24, r4 | |
+ NOP | |
* r25 = FMA.f32 r5, r25, r5 | |
+ NOP | |
* r26 = FMA.f32 r6, r26, r6 | |
+ NOP | |
* r27 = FMA.f32 r7, r27, r7 | |
+ NOP | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* r20 = FMA.f32 r8, r20, r8 | |
+ NOP | |
* r21 = FMA.f32 r9, r21, r9 | |
+ NOP | |
* r22 = FMA.f32 r10, r22, r10 | |
+ NOP | |
* r23 = FMA.f32 r11, r23, r11 | |
+ NOP | |
* r16 = FMA.f32 r12, r16, r12 | |
+ NOP | |
* r17 = FMA.f32 r13, r17, r13 | |
+ NOP | |
* r18 = FMA.f32 r14, r18, r14 | |
+ r33 = IADD.s32 r33, fau.x | |
* r19 = FMA.f32 r15, r19, r15 | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) nbb | |
* NOP | |
+ r1 = FADD.f32 r28, r20 | |
id(0) nbb | |
* NOP | |
+ r2 = FADD.f32 r29, r21 | |
* NOP | |
+ r3 = FADD.f32 r30, r22 | |
* NOP | |
+ r4 = FADD.f32 r31, r23 | |
* NOP | |
+ r5 = FADD.f32 r24, r16 | |
* NOP | |
+ r6 = FADD.f32 r25, r17 | |
* NOP | |
+ r7 = FADD.f32 r26, r18 | |
* NOP | |
+ r8 = FADD.f32 r27, r19 | |
* NOP | |
+ r1 = FADD.f32 r1, r5 | |
id(0) wait(0 ) nbb r_uncond | |
* NOP | |
+ r2 = FADD.f32 r2, r6 | |
* NOP | |
+ r3 = FADD.f32 r3, r7 | |
* NOP | |
+ r4 = FADD.f32 r4, r8 | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ r1 = FADD.f32 r1, r3 | |
* _.h00 = FADD.f32 r2, r4 | |
+ r0 = IADD.s32 fau.x, t0 | |
* r1 = FADD.f32 r1, t0 | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*MOV.i32 t0, 0x3f800000 /* 1.000000 */ | |
+MOV.i32 t1, 0x40000000 /* 2.000000 */ | |
*FADD.f32 r1:t0, u1.w0, t0 | |
+FADD.f32 r2:t1, u1.w0, t1 | |
*NOP t0 | |
+MOV.i32 t1, 0x40400000 /* 3.000000 */ | |
*NOP t0 | |
+FADD.f32 r3:t1, u1.w0, t1 | |
} | |
clause_6: | |
ds(0) nbb ncph | |
{ | |
*MOV.i32 t0, 0x40800000 /* 4.000000 */ | |
+MOV.i32 t1, 0x40a00000 /* 5.000000 */ | |
*FADD.f32 r4:t0, u1.w0, t0 | |
+FADD.f32 r5:t1, u1.w0, t1 | |
*MOV.i32 t0, 0x40c00000 /* 6.000000 */ | |
+MOV.i32 t1, 0x40e00000 /* 7.000000 */ | |
*FADD.f32 r6:t0, u1.w0, t0 | |
+FADD.f32 r7:t1, u1.w0, t1 | |
*MOV.i32 t0, 0x41000000 /* 8.000000 */ | |
+MOV.i32 t1, 0x41100000 /* 9.000000 */ | |
*FADD.f32 r8:t0, u1.w0, t0 | |
+FADD.f32 r9:t1, u1.w0, t1 | |
*NOP t0 | |
+MOV.i32 t1, 0x41200000 /* 10.000000 */ | |
*NOP t0 | |
+FADD.f32 r10:t1, u1.w0, t1 | |
} | |
clause_14: | |
ds(0) nbb ncph | |
{ | |
*MOV.i32 t0, 0x41300000 /* 11.000000 */ | |
+MOV.i32 t1, 0x41400000 /* 12.000000 */ | |
*FADD.f32 r11:t0, u1.w0, t0 | |
+FADD.f32 r12:t1, u1.w0, t1 | |
*MOV.i32 t0, 0x41500000 /* 13.000000 */ | |
+MOV.i32 t1, 0x41600000 /* 14.000000 */ | |
*FADD.f32 r13:t0, u1.w0, t0 | |
+FADD.f32 r14:t1, u1.w0, t1 | |
*NOP t0 | |
+MOV.i32 t1, 0x41700000 /* 15.000000 */ | |
*FADD.f32 r15:t0, u1.w0, t1 | |
+U32_TO_F32 t1, r0 | |
*FMA.f32 r16:t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+MOV.i32 r17:t1, t | |
*NOP t0 | |
+MOV.i32 r18:t1, t0 | |
} | |
clause_21: | |
ds(0) nbb r_uncond ncph | |
{ | |
*MOV.i32 r19:t0, r16 | |
+MOV.i32 r20:t1, r16 | |
*MOV.i32 r21:t0, r16 | |
+MOV.i32 r22:t1, r16 | |
*MOV.i32 r23:t0, r16 | |
+MOV.i32 r24:t1, r16 | |
*MOV.i32 r25:t0, r16 | |
+MOV.i32 r26:t1, r16 | |
*MOV.i32 r27:t0, r16 | |
+MOV.i32 r28:t1, r16 | |
*MOV.i32 r29:t0, r16 | |
+MOV.i32 r30:t1, r16 | |
*MOV.i32 r31:t0, r16 | |
+MOV.i32 r32:t1, u1.w0 | |
*NOP t0 | |
+MOV.i32 r33:t1, #0.x | |
} | |
clause_27: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r33, 0x00000008 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_32 | |
} | |
clause_30: | |
ds(0) nbb | |
{ | |
*NOP t0 | |
+JUMP t1, clause_225 | |
} | |
clause_32: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_38: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_44: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_50: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+NOP t1 | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+NOP t1 | |
} | |
clause_56: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_62: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_68: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_74: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+NOP t1 | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+NOP t1 | |
} | |
clause_80: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_86: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_92: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_98: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+NOP t1 | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+NOP t1 | |
} | |
clause_104: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_110: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_116: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_122: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+NOP t1 | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+NOP t1 | |
} | |
clause_128: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_134: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_140: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_146: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+NOP t1 | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+NOP t1 | |
} | |
clause_152: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_158: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_164: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_170: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+NOP t1 | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+NOP t1 | |
} | |
clause_176: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_182: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_188: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_194: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+NOP t1 | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+NOP t1 | |
} | |
clause_200: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r32:t0, r28, r32, r28 | |
+NOP t1 | |
*FMA.f32 r1:t0, r29, r1, r29 | |
+NOP t1 | |
*FMA.f32 r2:t0, r30, r2, r30 | |
+NOP t1 | |
*FMA.f32 r3:t0, r31, r3, r31 | |
+NOP t1 | |
*FMA.f32 r4:t0, r24, r4, r24 | |
+NOP t1 | |
*FMA.f32 r5:t0, r25, r5, r25 | |
+NOP t1 | |
*FMA.f32 r6:t0, r26, r6, r26 | |
+NOP t1 | |
*FMA.f32 r7:t0, r27, r7, r27 | |
+NOP t1 | |
} | |
clause_206: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r8:t0, r20, r8, r20 | |
+NOP t1 | |
*FMA.f32 r9:t0, r21, r9, r21 | |
+NOP t1 | |
*FMA.f32 r10:t0, r22, r10, r22 | |
+NOP t1 | |
*FMA.f32 r11:t0, r23, r11, r23 | |
+NOP t1 | |
*FMA.f32 r12:t0, r16, r12, r16 | |
+NOP t1 | |
*FMA.f32 r13:t0, r17, r13, r17 | |
+NOP t1 | |
*FMA.f32 r14:t0, r18, r14, r18 | |
+NOP t1 | |
*FMA.f32 r15:t0, r19, r15, r19 | |
+NOP t1 | |
} | |
clause_212: | |
ds(0) nbb ncph | |
{ | |
*FMA.f32 r28:t0, r32, r28, r32 | |
+NOP t1 | |
*FMA.f32 r29:t0, r1, r29, r1 | |
+NOP t1 | |
*FMA.f32 r30:t0, r2, r30, r2 | |
+NOP t1 | |
*FMA.f32 r31:t0, r3, r31, r3 | |
+NOP t1 | |
*FMA.f32 r24:t0, r4, r24, r4 | |
+NOP t1 | |
*FMA.f32 r25:t0, r5, r25, r5 | |
+NOP t1 | |
*FMA.f32 r26:t0, r6, r26, r6 | |
+NOP t1 | |
*FMA.f32 r27:t0, r7, r27, r7 | |
+NOP t1 | |
} | |
clause_218: | |
ds(0) nbb r_uncond | |
{ | |
*FMA.f32 r20:t0, r8, r20, r8 | |
+NOP t1 | |
*FMA.f32 r21:t0, r9, r21, r9 | |
+NOP t1 | |
*FMA.f32 r22:t0, r10, r22, r10 | |
+NOP t1 | |
*FMA.f32 r23:t0, r11, r23, r11 | |
+NOP t1 | |
*FMA.f32 r16:t0, r12, r16, r12 | |
+NOP t1 | |
*FMA.f32 r17:t0, r13, r17, r13 | |
+NOP t1 | |
*FMA.f32 r18:t0, r14, r18, r14 | |
+IADD.s32 r33:t1, r33, 0x00000001 /* 0.000000 */ | |
*FMA.f32 r19:t0, r15, r19, r15 | |
+JUMP t1, clause_27 | |
} | |
clause_225: | |
ds(0) nbb ncph | |
{ | |
*NOP t0 | |
+FADD.f32 r1:t1, r28, r20 | |
} | |
clause_226: | |
ds(0) nbb ncph next_store dwb(0) | |
{ | |
*NOP t0 | |
+FADD.f32 r2:t1, r29, r21 | |
*NOP t0 | |
+FADD.f32 r3:t1, r30, r22 | |
*NOP t0 | |
+FADD.f32 r4:t1, r31, r23 | |
*NOP t0 | |
+FADD.f32 r5:t1, r24, r16 | |
*NOP t0 | |
+FADD.f32 r6:t1, r25, r17 | |
*NOP t0 | |
+FADD.f32 r7:t1, r26, r18 | |
*NOP t0 | |
+FADD.f32 r8:t1, r27, r19 | |
*NOP t0 | |
+FADD.f32 r1:t1, r1, r5 | |
} | |
clause_232: | |
ds(0) eos store | |
{ | |
*NOP t0 | |
+FADD.f32 r2:t1, r2, r6 | |
*NOP t0 | |
+FADD.f32 r3:t1, r3, r7 | |
*NOP t0 | |
+FADD.f32 r4:t1, r4, r8 | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+FADD.f32 r1:t1, r1, r3 | |
*FADD.f32 t0, r2, r4 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*FADD.f32 r1:t0, r1, t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
25a14253 compute_sp_v16_float 21.759 GFLOPs 12.337ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v1 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE int | |
#define vec2 ivec2 | |
#define vec4 ivec4 | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 111 | |
void compute_sp_v1() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
DATATYPE x = _A; | |
DATATYPE y = DATATYPE(float(id) * SCALE); | |
for(int i=0; i<128; i++) | |
{ | |
MAD_16(x, y); | |
} | |
ptr[id] = y; | |
} | |
void main() {compute_sp_v1();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0xbbd9d65d, 0x1def99df, 0x82d13ce1, 0x50fbde77, 0x01b473fb} | |
name: GLSL12 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp int[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp int _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_4 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_5 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_6 = ishl ssa_4.y, ssa_5 | |
vec1 32 ssa_7 = iadd ssa_4.x, ssa_6 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_63 = insert_u16 ssa_4.z, ssa_1 | |
vec1 32 ssa_10 = iadd ssa_7, ssa_63 | |
vec1 32 ssa_12 = u2f32 ssa_10 | |
vec1 32 ssa_2 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_13 = fmul ssa_12, ssa_2 | |
vec1 32 ssa_14 = f2i32 ssa_13 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_3 = load_const (0x00000080 = 0.000000) | |
vec1 32 ssa_11 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_15 = phi block_0: ssa_14, block_4: ssa_50 | |
vec1 32 ssa_16 = phi block_0: ssa_11, block_4: ssa_48 | |
vec1 32 ssa_17 = phi block_0: ssa_0, block_4: ssa_51 | |
vec1 32 ssa_18 = ige32 ssa_17, ssa_3 | |
/* succs: block_2 block_3 */ | |
if ssa_18 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_19 = imul ssa_15, ssa_16 | |
vec1 32 ssa_20 = iadd ssa_19, ssa_15 | |
vec1 32 ssa_21 = imul ssa_20, ssa_15 | |
vec1 32 ssa_22 = iadd ssa_21, ssa_20 | |
vec1 32 ssa_23 = imul ssa_22, ssa_20 | |
vec1 32 ssa_24 = iadd ssa_23, ssa_22 | |
vec1 32 ssa_25 = imul ssa_24, ssa_22 | |
vec1 32 ssa_26 = iadd ssa_25, ssa_24 | |
vec1 32 ssa_27 = imul ssa_26, ssa_24 | |
vec1 32 ssa_28 = iadd ssa_27, ssa_26 | |
vec1 32 ssa_29 = imul ssa_28, ssa_26 | |
vec1 32 ssa_30 = iadd ssa_29, ssa_28 | |
vec1 32 ssa_31 = imul ssa_30, ssa_28 | |
vec1 32 ssa_32 = iadd ssa_31, ssa_30 | |
vec1 32 ssa_33 = imul ssa_32, ssa_30 | |
vec1 32 ssa_34 = iadd ssa_33, ssa_32 | |
vec1 32 ssa_35 = imul ssa_34, ssa_32 | |
vec1 32 ssa_36 = iadd ssa_35, ssa_34 | |
vec1 32 ssa_37 = imul ssa_36, ssa_34 | |
vec1 32 ssa_38 = iadd ssa_37, ssa_36 | |
vec1 32 ssa_39 = imul ssa_38, ssa_36 | |
vec1 32 ssa_40 = iadd ssa_39, ssa_38 | |
vec1 32 ssa_41 = imul ssa_40, ssa_38 | |
vec1 32 ssa_42 = iadd ssa_41, ssa_40 | |
vec1 32 ssa_43 = imul ssa_42, ssa_40 | |
vec1 32 ssa_44 = iadd ssa_43, ssa_42 | |
vec1 32 ssa_45 = imul ssa_44, ssa_42 | |
vec1 32 ssa_46 = iadd ssa_45, ssa_44 | |
vec1 32 ssa_47 = imul ssa_46, ssa_44 | |
vec1 32 ssa_48 = iadd ssa_47, ssa_46 | |
vec1 32 ssa_49 = imul ssa_48, ssa_46 | |
vec1 32 ssa_50 = iadd ssa_49, ssa_48 | |
vec1 32 ssa_51 = iadd ssa_17, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_52 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_53 = ishl ssa_10, ssa_52 | |
vec1 64 ssa_54 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_55 = unpack_64_2x32_split_x ssa_54 | |
vec1 32 ssa_56 = unpack_64_2x32_split_y ssa_54 | |
vec1 32 ssa_57 = iadd ssa_55, ssa_53 | |
vec1 32 ssa_58 = ult32 ssa_57, ssa_55 | |
vec1 32 ssa_59 = b2i32 ssa_58 | |
vec1 32 ssa_60 = iadd ssa_59, ssa_56 | |
vec1 64 ssa_61 = pack_64_2x32_split ssa_57, ssa_60 | |
intrinsic store_global (ssa_15, ssa_61) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
66 = MOV.i32 r62 | |
65 = MOV.i32 r61 | |
64 = MOV.i32 r60 | |
6 = LSHIFT_OR.i32 65, #0x0, #0x8.b0 | |
7 = IADD.s32 64, 6 | |
63 = MKVEC.v2i16 #0x0.h00, 66.h00 | |
10 = IADD.s32 7, 63 | |
12 = U32_TO_F32 10 | |
13 = FMA.f32 12, #0x2edbe6ff, #0x0.neg | |
14 = F32_TO_S32.rtz 13 | |
} -> block1 | |
block1 { | |
15 = PHI 14, 50 | |
16 = PHI u1, 48 | |
17 = PHI #0x0, 51 | |
18 = ICMP.s32.m1.ge 17, #0x80 | |
BRANCHZ.i16.eq 18.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
19 = IMUL.i32 15, 16 | |
20 = IADD.s32 19, 15 | |
21 = IMUL.i32 20, 15 | |
22 = IADD.s32 21, 20 | |
23 = IMUL.i32 22, 20 | |
24 = IADD.s32 23, 22 | |
25 = IMUL.i32 24, 22 | |
26 = IADD.s32 25, 24 | |
27 = IMUL.i32 26, 24 | |
28 = IADD.s32 27, 26 | |
29 = IMUL.i32 28, 26 | |
30 = IADD.s32 29, 28 | |
31 = IMUL.i32 30, 28 | |
32 = IADD.s32 31, 30 | |
33 = IMUL.i32 32, 30 | |
34 = IADD.s32 33, 32 | |
35 = IMUL.i32 34, 32 | |
36 = IADD.s32 35, 34 | |
37 = IMUL.i32 36, 34 | |
38 = IADD.s32 37, 36 | |
39 = IMUL.i32 38, 36 | |
40 = IADD.s32 39, 38 | |
41 = IMUL.i32 40, 38 | |
42 = IADD.s32 41, 40 | |
43 = IMUL.i32 42, 40 | |
44 = IADD.s32 43, 42 | |
45 = IMUL.i32 44, 42 | |
46 = IADD.s32 45, 44 | |
47 = IMUL.i32 46, 44 | |
48 = IADD.s32 47, 46 | |
49 = IMUL.i32 48, 46 | |
50 = IADD.s32 49, 48 | |
51 = IADD.s32 17, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
53 = LSHIFT_OR.i32 10, #0x0, #0x2.b0 | |
57 = IADD.s32 u0, 53 | |
59 = ICMP.u32.i1.lt 57, u0 | |
60 = IADD.s32 59, u0[1] | |
STORE.i32 15, 57, 60, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = U32_TO_F32 r0 | |
r1 = FMA.f32 r1, #0x2edbe6ff, #0x0.neg | |
r1 = F32_TO_S32.rtz r1 | |
r2 = MOV.i32 u1 | |
r3 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r4 = ICMP.s32.m1.ge r3, #0x80 | |
BRANCHZ.i16.eq r4.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r2 = IMUL.i32 r1, r2 | |
r2 = IADD.s32 r2, r1 | |
r1 = IMUL.i32 r2, r1 | |
r1 = IADD.s32 r1, r2 | |
r3 = IADD.s32 r3, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.y.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* r2 = MOV.i32 fau.x | |
+ _.h00 = U32_TO_F32 t1 | |
* _.h00 = FMA.f32 t1, fau.y, t.neg | |
+ r3 = MOV.i32 fau.x | |
* NOP | |
+ r1 = F32_TO_S32.rtz t0 | |
2edbe6ff00000000 800000000 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r3, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000080 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* _.h00 = IMUL.i32 r1, r2 | |
+ r2 = IADD.s32 t, r1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ NOP | |
* NOP | |
+ r2 = IADD.s32 t0, r1 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r2, r1 | |
+ r1 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ NOP | |
* NOP | |
+ r1 = IADD.s32 t0, r2 | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* _.h00 = IMUL.i32 r1, r2 | |
+ r2 = IADD.s32 t, r1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* NOP | |
+ r3 = IADD.s32 r3, fau.x | |
* NOP | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) wait(0 ) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ NOP | |
* NOP | |
+ r0 = IADD.s32 fau.x, t0 | |
* NOP | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb r_uncond ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*MOV.i32 r2:t0, u1.w0 | |
+U32_TO_F32 t1, t1 | |
*FMA.f32 t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+MOV.i32 r3:t1, 0x00000000 /* 0.000000 */ | |
*NOP t0 | |
+F32_TO_S32.rtz r1:t1, t0 | |
} | |
clause_5: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r3, 0x00000080 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_10 | |
} | |
clause_8: | |
ds(0) nbb next_store dwb(0) | |
{ | |
*NOP t0 | |
+JUMP t1, clause_26 | |
} | |
clause_10: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r1, r2 | |
+IADD.s32 r2:t1, t, r1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r2:t1, t0, r1 | |
} | |
clause_13: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r2, r1 | |
+IADD.s32 r1:t1, t, r2 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r1:t1, t0, r2 | |
} | |
clause_19: | |
ds(0) nbb r_uncond | |
{ | |
*IMUL.i32 t0, r1, r2 | |
+IADD.s32 r2:t1, t, r1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*NOP t0 | |
+IADD.s32 r3:t1, r3, 0x00000001 /* 0.000000 */ | |
*NOP t0 | |
+JUMP t1, clause_5 | |
} | |
clause_26: | |
ds(0) eos store | |
{ | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*NOP t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
e20eea22 compute_sp_v1_int 18.450 GFLOPs 14.550ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v2 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE int | |
#define vec2 ivec2 | |
#define vec4 ivec4 | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 128 | |
void compute_sp_v2() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec2 x = vec2(_A, (_A+DATATYPE(1))); | |
vec2 y = vec2((float(id) * SCALE), (float(id) * SCALE)); | |
for(int i=0; i<64; i++) | |
{ | |
MAD_16(x, y); | |
} | |
ptr[id] = (y.x) + (y.y); | |
} | |
void main() {compute_sp_v2();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0xa0084485, 0xd8844526, 0x94836c67, 0x8e6882bd, 0xb77caae3} | |
name: GLSL14 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp int[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp int _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_4 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_5 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_6 = ishl ssa_4.y, ssa_5 | |
vec1 32 ssa_7 = iadd ssa_4.x, ssa_6 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_99 = insert_u16 ssa_4.z, ssa_1 | |
vec1 32 ssa_10 = iadd ssa_7, ssa_99 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_11 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_12 = iadd ssa_11, ssa_1 | |
vec1 32 ssa_13 = u2f32 ssa_10 | |
vec1 32 ssa_2 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_14 = fmul ssa_13, ssa_2 | |
vec1 32 ssa_15 = f2i32 ssa_14 | |
vec1 32 ssa_3 = load_const (0x00000040 = 0.000000) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_16 = phi block_0: ssa_15, block_4: ssa_84 | |
vec1 32 ssa_17 = phi block_0: ssa_15, block_4: ssa_85 | |
vec1 32 ssa_18 = phi block_0: ssa_11, block_4: ssa_80 | |
vec1 32 ssa_19 = phi block_0: ssa_12, block_4: ssa_81 | |
vec1 32 ssa_20 = phi block_0: ssa_0, block_4: ssa_86 | |
vec1 32 ssa_21 = ige32 ssa_20, ssa_3 | |
/* succs: block_2 block_3 */ | |
if ssa_21 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_22 = imul ssa_16, ssa_18 | |
vec1 32 ssa_23 = imul ssa_17, ssa_19 | |
vec1 32 ssa_24 = iadd ssa_22, ssa_16 | |
vec1 32 ssa_25 = iadd ssa_23, ssa_17 | |
vec1 32 ssa_26 = imul ssa_24, ssa_16 | |
vec1 32 ssa_27 = imul ssa_25, ssa_17 | |
vec1 32 ssa_28 = iadd ssa_26, ssa_24 | |
vec1 32 ssa_29 = iadd ssa_27, ssa_25 | |
vec1 32 ssa_30 = imul ssa_28, ssa_24 | |
vec1 32 ssa_31 = imul ssa_29, ssa_25 | |
vec1 32 ssa_32 = iadd ssa_30, ssa_28 | |
vec1 32 ssa_33 = iadd ssa_31, ssa_29 | |
vec1 32 ssa_34 = imul ssa_32, ssa_28 | |
vec1 32 ssa_35 = imul ssa_33, ssa_29 | |
vec1 32 ssa_36 = iadd ssa_34, ssa_32 | |
vec1 32 ssa_37 = iadd ssa_35, ssa_33 | |
vec1 32 ssa_38 = imul ssa_36, ssa_32 | |
vec1 32 ssa_39 = imul ssa_37, ssa_33 | |
vec1 32 ssa_40 = iadd ssa_38, ssa_36 | |
vec1 32 ssa_41 = iadd ssa_39, ssa_37 | |
vec1 32 ssa_42 = imul ssa_40, ssa_36 | |
vec1 32 ssa_43 = imul ssa_41, ssa_37 | |
vec1 32 ssa_44 = iadd ssa_42, ssa_40 | |
vec1 32 ssa_45 = iadd ssa_43, ssa_41 | |
vec1 32 ssa_46 = imul ssa_44, ssa_40 | |
vec1 32 ssa_47 = imul ssa_45, ssa_41 | |
vec1 32 ssa_48 = iadd ssa_46, ssa_44 | |
vec1 32 ssa_49 = iadd ssa_47, ssa_45 | |
vec1 32 ssa_50 = imul ssa_48, ssa_44 | |
vec1 32 ssa_51 = imul ssa_49, ssa_45 | |
vec1 32 ssa_52 = iadd ssa_50, ssa_48 | |
vec1 32 ssa_53 = iadd ssa_51, ssa_49 | |
vec1 32 ssa_54 = imul ssa_52, ssa_48 | |
vec1 32 ssa_55 = imul ssa_53, ssa_49 | |
vec1 32 ssa_56 = iadd ssa_54, ssa_52 | |
vec1 32 ssa_57 = iadd ssa_55, ssa_53 | |
vec1 32 ssa_58 = imul ssa_56, ssa_52 | |
vec1 32 ssa_59 = imul ssa_57, ssa_53 | |
vec1 32 ssa_60 = iadd ssa_58, ssa_56 | |
vec1 32 ssa_61 = iadd ssa_59, ssa_57 | |
vec1 32 ssa_62 = imul ssa_60, ssa_56 | |
vec1 32 ssa_63 = imul ssa_61, ssa_57 | |
vec1 32 ssa_64 = iadd ssa_62, ssa_60 | |
vec1 32 ssa_65 = iadd ssa_63, ssa_61 | |
vec1 32 ssa_66 = imul ssa_64, ssa_60 | |
vec1 32 ssa_67 = imul ssa_65, ssa_61 | |
vec1 32 ssa_68 = iadd ssa_66, ssa_64 | |
vec1 32 ssa_69 = iadd ssa_67, ssa_65 | |
vec1 32 ssa_70 = imul ssa_68, ssa_64 | |
vec1 32 ssa_71 = imul ssa_69, ssa_65 | |
vec1 32 ssa_72 = iadd ssa_70, ssa_68 | |
vec1 32 ssa_73 = iadd ssa_71, ssa_69 | |
vec1 32 ssa_74 = imul ssa_72, ssa_68 | |
vec1 32 ssa_75 = imul ssa_73, ssa_69 | |
vec1 32 ssa_76 = iadd ssa_74, ssa_72 | |
vec1 32 ssa_77 = iadd ssa_75, ssa_73 | |
vec1 32 ssa_78 = imul ssa_76, ssa_72 | |
vec1 32 ssa_79 = imul ssa_77, ssa_73 | |
vec1 32 ssa_80 = iadd ssa_78, ssa_76 | |
vec1 32 ssa_81 = iadd ssa_79, ssa_77 | |
vec1 32 ssa_82 = imul ssa_80, ssa_76 | |
vec1 32 ssa_83 = imul ssa_81, ssa_77 | |
vec1 32 ssa_84 = iadd ssa_82, ssa_80 | |
vec1 32 ssa_85 = iadd ssa_83, ssa_81 | |
vec1 32 ssa_86 = iadd ssa_20, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_87 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_88 = ishl ssa_10, ssa_87 | |
vec1 32 ssa_89 = iadd ssa_16, ssa_17 | |
vec1 64 ssa_90 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_91 = unpack_64_2x32_split_x ssa_90 | |
vec1 32 ssa_92 = unpack_64_2x32_split_y ssa_90 | |
vec1 32 ssa_93 = iadd ssa_91, ssa_88 | |
vec1 32 ssa_94 = ult32 ssa_93, ssa_91 | |
vec1 32 ssa_95 = b2i32 ssa_94 | |
vec1 32 ssa_96 = iadd ssa_95, ssa_92 | |
vec1 64 ssa_97 = pack_64_2x32_split ssa_93, ssa_96 | |
intrinsic store_global (ssa_89, ssa_97) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
102 = MOV.i32 r62 | |
101 = MOV.i32 r61 | |
100 = MOV.i32 r60 | |
6 = LSHIFT_OR.i32 101, #0x0, #0x8.b0 | |
7 = IADD.s32 100, 6 | |
99 = MKVEC.v2i16 #0x0.h00, 102.h00 | |
10 = IADD.s32 7, 99 | |
12 = IADD.s32 u1, #0x1 | |
13 = U32_TO_F32 10 | |
14 = FMA.f32 13, #0x2edbe6ff, #0x0.neg | |
15 = F32_TO_S32.rtz 14 | |
} -> block1 | |
block1 { | |
16 = PHI 15, 84 | |
17 = PHI 15, 85 | |
18 = PHI u1, 80 | |
19 = PHI 12, 81 | |
20 = PHI #0x0, 86 | |
21 = ICMP.s32.m1.ge 20, #0x40 | |
BRANCHZ.i16.eq 21.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
22 = IMUL.i32 16, 18 | |
23 = IMUL.i32 17, 19 | |
24 = IADD.s32 22, 16 | |
25 = IADD.s32 23, 17 | |
26 = IMUL.i32 24, 16 | |
27 = IMUL.i32 25, 17 | |
28 = IADD.s32 26, 24 | |
29 = IADD.s32 27, 25 | |
30 = IMUL.i32 28, 24 | |
31 = IMUL.i32 29, 25 | |
32 = IADD.s32 30, 28 | |
33 = IADD.s32 31, 29 | |
34 = IMUL.i32 32, 28 | |
35 = IMUL.i32 33, 29 | |
36 = IADD.s32 34, 32 | |
37 = IADD.s32 35, 33 | |
38 = IMUL.i32 36, 32 | |
39 = IMUL.i32 37, 33 | |
40 = IADD.s32 38, 36 | |
41 = IADD.s32 39, 37 | |
42 = IMUL.i32 40, 36 | |
43 = IMUL.i32 41, 37 | |
44 = IADD.s32 42, 40 | |
45 = IADD.s32 43, 41 | |
46 = IMUL.i32 44, 40 | |
47 = IMUL.i32 45, 41 | |
48 = IADD.s32 46, 44 | |
49 = IADD.s32 47, 45 | |
50 = IMUL.i32 48, 44 | |
51 = IMUL.i32 49, 45 | |
52 = IADD.s32 50, 48 | |
53 = IADD.s32 51, 49 | |
54 = IMUL.i32 52, 48 | |
55 = IMUL.i32 53, 49 | |
56 = IADD.s32 54, 52 | |
57 = IADD.s32 55, 53 | |
58 = IMUL.i32 56, 52 | |
59 = IMUL.i32 57, 53 | |
60 = IADD.s32 58, 56 | |
61 = IADD.s32 59, 57 | |
62 = IMUL.i32 60, 56 | |
63 = IMUL.i32 61, 57 | |
64 = IADD.s32 62, 60 | |
65 = IADD.s32 63, 61 | |
66 = IMUL.i32 64, 60 | |
67 = IMUL.i32 65, 61 | |
68 = IADD.s32 66, 64 | |
69 = IADD.s32 67, 65 | |
70 = IMUL.i32 68, 64 | |
71 = IMUL.i32 69, 65 | |
72 = IADD.s32 70, 68 | |
73 = IADD.s32 71, 69 | |
74 = IMUL.i32 72, 68 | |
75 = IMUL.i32 73, 69 | |
76 = IADD.s32 74, 72 | |
77 = IADD.s32 75, 73 | |
78 = IMUL.i32 76, 72 | |
79 = IMUL.i32 77, 73 | |
80 = IADD.s32 78, 76 | |
81 = IADD.s32 79, 77 | |
82 = IMUL.i32 80, 76 | |
83 = IMUL.i32 81, 77 | |
84 = IADD.s32 82, 80 | |
85 = IADD.s32 83, 81 | |
86 = IADD.s32 20, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
88 = LSHIFT_OR.i32 10, #0x0, #0x2.b0 | |
89 = IADD.s32 16, 17 | |
93 = IADD.s32 u0, 88 | |
95 = ICMP.u32.i1.lt 93, u0 | |
96 = IADD.s32 95, u0[1] | |
STORE.i32 89, 93, 96, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x1 | |
r1 = IADD.s32 u1, r1 | |
r2 = U32_TO_F32 r0 | |
r2 = FMA.f32 r2, #0x2edbe6ff, #0x0.neg | |
r2 = F32_TO_S32.rtz r2 | |
r3 = MOV.i32 r2 | |
r4 = MOV.i32 u1 | |
r5 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r6 = ICMP.s32.m1.ge r5, #0x40 | |
BRANCHZ.i16.eq r6.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r4 = IMUL.i32 r2, r4 | |
r1 = IMUL.i32 r3, r1 | |
r4 = IADD.s32 r4, r2 | |
r1 = IADD.s32 r1, r3 | |
r2 = IMUL.i32 r4, r2 | |
r3 = IMUL.i32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r3 = IADD.s32 r3, r1 | |
r5 = IADD.s32 r5, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = IADD.s32 r2, r3 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.y.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* NOP | |
+ r1 = MOV.i32 fau.x | |
* r4 = MOV.i32 fau.x | |
+ _.h00 = U32_TO_F32 r0 | |
* _.h00 = FMA.f32 t1, fau.y, t.neg | |
+ r2 = F32_TO_S32.rtz t | |
* r3 = MOV.i32 t1 | |
+ r5 = MOV.i32 fau.x | |
* NOP | |
+ r1 = IADD.s32 fau.x, r1 | |
800000001 2edbe6ff00000000 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r5, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000040 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* _.h00 = IMUL.i32 r2, r4 | |
+ r4 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ NOP | |
* NOP | |
+ r4 = IADD.s32 t0, r2 | |
id(0) nbb | |
* r2 = IMUL.i32 r4, r2 | |
+ NOP | |
* _.h00 = IMUL.i32 r3, r1 | |
+ r1 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ NOP | |
* NOP | |
+ r3 = IADD.s32 t0, r1 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r3, r1 | |
+ r1 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* r1 = IMUL.i32 t1, r1 | |
+ NOP | |
* NOP | |
+ r2 = IADD.s32 r2, r4 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r2, r4 | |
+ r4 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* NOP | |
+ r1 = IADD.s32 r1, r3 | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* _.h00 = IMUL.i32 r4, r2 | |
+ r2 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r1, r3 | |
+ r3 = IADD.s32 t, r1 | |
* _.h00 = IMUL.i32 r2, r4 | |
+ r4 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r3, r1 | |
+ r1 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 r4, r2 | |
+ r2 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r1, r3 | |
+ r3 = IADD.s32 t, r1 | |
* NOP | |
+ r5 = IADD.s32 r5, fau.x | |
* NOP | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) wait(0 ) nbb r_uncond | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ r1 = IADD.s32 r2, r3 | |
* NOP | |
+ r0 = IADD.s32 fau.x, t0 | |
* NOP | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb r_uncond ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*NOP t0 | |
+MOV.i32 r1:t1, 0x00000001 /* 0.000000 */ | |
*MOV.i32 r4:t0, u1.w0 | |
+U32_TO_F32 t1, r0 | |
*FMA.f32 t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+F32_TO_S32.rtz r2:t1, t | |
*MOV.i32 r3:t0, t1 | |
+MOV.i32 r5:t1, #0.x | |
*NOP t0 | |
+IADD.s32 r1:t1, u1.w0, r1 | |
} | |
clause_6: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r5, 0x00000040 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_11 | |
} | |
clause_9: | |
ds(0) nbb next_store dwb(0) | |
{ | |
*NOP t0 | |
+JUMP t1, clause_41 | |
} | |
clause_11: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r2, r4 | |
+IADD.s32 r4:t1, t, r2 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r4:t1, t0, r2 | |
} | |
clause_16: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r2:t0, r4, r2 | |
+NOP t1 | |
*IMUL.i32 t0, r3, r1 | |
+IADD.s32 r1:t1, t, r3 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r3:t1, t0, r1 | |
} | |
clause_22: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r3, r1 | |
+IADD.s32 r1:t1, t, r3 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 r1:t0, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r2:t1, r2, r4 | |
} | |
clause_28: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r2, r4 | |
+IADD.s32 r4:t1, t, r2 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r3 | |
} | |
clause_34: | |
ds(0) nbb r_uncond | |
{ | |
*IMUL.i32 t0, r4, r2 | |
+IADD.s32 r2:t1, t, r4 | |
*IMUL.i32 t0, r1, r3 | |
+IADD.s32 r3:t1, t, r1 | |
*IMUL.i32 t0, r2, r4 | |
+IADD.s32 r4:t1, t, r2 | |
*IMUL.i32 t0, r3, r1 | |
+IADD.s32 r1:t1, t, r3 | |
*IMUL.i32 t0, r4, r2 | |
+IADD.s32 r2:t1, t, r4 | |
*IMUL.i32 t0, r1, r3 | |
+IADD.s32 r3:t1, t, r1 | |
*NOP t0 | |
+IADD.s32 r5:t1, r5, 0x00000001 /* 0.000000 */ | |
*NOP t0 | |
+JUMP t1, clause_6 | |
} | |
clause_41: | |
ds(0) eos store | |
{ | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+IADD.s32 r1:t1, r2, r3 | |
*NOP t0 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*NOP t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
e20eea22 compute_sp_v2_int 20.068 GFLOPs 13.376ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v4 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE int | |
#define vec2 ivec2 | |
#define vec4 ivec4 | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 145 | |
void compute_sp_v4() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec4 x = vec4(_A, (_A+DATATYPE(1)), (_A+DATATYPE(2)), (_A+DATATYPE(3))); | |
vec4 y = vec4((float(id) * SCALE), (float(id) * SCALE), (float(id) * SCALE), (float(id) * SCALE)); | |
for(int i=0; i<32; i++) | |
{ | |
MAD_16(x, y); | |
} | |
ptr[id] = (y.x) + (y.y) + (y.z) + (y.w); | |
} | |
void main() {compute_sp_v4();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0x183901bc, 0xa5c63b78, 0x1c36bc35, 0xd2853d78, 0x1152fc21} | |
name: GLSL16 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp int[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp int _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_6 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_7 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_8 = ishl ssa_6.y, ssa_7 | |
vec1 32 ssa_9 = iadd ssa_6.x, ssa_8 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_172 = insert_u16 ssa_6.z, ssa_1 | |
vec1 32 ssa_12 = iadd ssa_9, ssa_172 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_13 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_14 = iadd ssa_13, ssa_1 | |
vec1 32 ssa_2 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_15 = iadd ssa_13, ssa_2 | |
vec1 32 ssa_3 = load_const (0x00000003 = 0.000000) | |
vec1 32 ssa_16 = iadd ssa_13, ssa_3 | |
vec1 32 ssa_17 = u2f32 ssa_12 | |
vec1 32 ssa_4 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_18 = fmul ssa_17, ssa_4 | |
vec1 32 ssa_19 = f2i32 ssa_18 | |
vec1 32 ssa_5 = load_const (0x00000020 = 0.000000) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_20 = phi block_0: ssa_19, block_4: ssa_154 | |
vec1 32 ssa_21 = phi block_0: ssa_19, block_4: ssa_155 | |
vec1 32 ssa_22 = phi block_0: ssa_19, block_4: ssa_156 | |
vec1 32 ssa_23 = phi block_0: ssa_19, block_4: ssa_157 | |
vec1 32 ssa_24 = phi block_0: ssa_13, block_4: ssa_146 | |
vec1 32 ssa_25 = phi block_0: ssa_14, block_4: ssa_147 | |
vec1 32 ssa_26 = phi block_0: ssa_15, block_4: ssa_148 | |
vec1 32 ssa_27 = phi block_0: ssa_16, block_4: ssa_149 | |
vec1 32 ssa_28 = phi block_0: ssa_0, block_4: ssa_158 | |
vec1 32 ssa_29 = ige32 ssa_28, ssa_5 | |
/* succs: block_2 block_3 */ | |
if ssa_29 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_30 = imul ssa_20, ssa_24 | |
vec1 32 ssa_31 = imul ssa_21, ssa_25 | |
vec1 32 ssa_32 = imul ssa_22, ssa_26 | |
vec1 32 ssa_33 = imul ssa_23, ssa_27 | |
vec1 32 ssa_34 = iadd ssa_30, ssa_20 | |
vec1 32 ssa_35 = iadd ssa_31, ssa_21 | |
vec1 32 ssa_36 = iadd ssa_32, ssa_22 | |
vec1 32 ssa_37 = iadd ssa_33, ssa_23 | |
vec1 32 ssa_38 = imul ssa_34, ssa_20 | |
vec1 32 ssa_39 = imul ssa_35, ssa_21 | |
vec1 32 ssa_40 = imul ssa_36, ssa_22 | |
vec1 32 ssa_41 = imul ssa_37, ssa_23 | |
vec1 32 ssa_42 = iadd ssa_38, ssa_34 | |
vec1 32 ssa_43 = iadd ssa_39, ssa_35 | |
vec1 32 ssa_44 = iadd ssa_40, ssa_36 | |
vec1 32 ssa_45 = iadd ssa_41, ssa_37 | |
vec1 32 ssa_46 = imul ssa_42, ssa_34 | |
vec1 32 ssa_47 = imul ssa_43, ssa_35 | |
vec1 32 ssa_48 = imul ssa_44, ssa_36 | |
vec1 32 ssa_49 = imul ssa_45, ssa_37 | |
vec1 32 ssa_50 = iadd ssa_46, ssa_42 | |
vec1 32 ssa_51 = iadd ssa_47, ssa_43 | |
vec1 32 ssa_52 = iadd ssa_48, ssa_44 | |
vec1 32 ssa_53 = iadd ssa_49, ssa_45 | |
vec1 32 ssa_54 = imul ssa_50, ssa_42 | |
vec1 32 ssa_55 = imul ssa_51, ssa_43 | |
vec1 32 ssa_56 = imul ssa_52, ssa_44 | |
vec1 32 ssa_57 = imul ssa_53, ssa_45 | |
vec1 32 ssa_58 = iadd ssa_54, ssa_50 | |
vec1 32 ssa_59 = iadd ssa_55, ssa_51 | |
vec1 32 ssa_60 = iadd ssa_56, ssa_52 | |
vec1 32 ssa_61 = iadd ssa_57, ssa_53 | |
vec1 32 ssa_62 = imul ssa_58, ssa_50 | |
vec1 32 ssa_63 = imul ssa_59, ssa_51 | |
vec1 32 ssa_64 = imul ssa_60, ssa_52 | |
vec1 32 ssa_65 = imul ssa_61, ssa_53 | |
vec1 32 ssa_66 = iadd ssa_62, ssa_58 | |
vec1 32 ssa_67 = iadd ssa_63, ssa_59 | |
vec1 32 ssa_68 = iadd ssa_64, ssa_60 | |
vec1 32 ssa_69 = iadd ssa_65, ssa_61 | |
vec1 32 ssa_70 = imul ssa_66, ssa_58 | |
vec1 32 ssa_71 = imul ssa_67, ssa_59 | |
vec1 32 ssa_72 = imul ssa_68, ssa_60 | |
vec1 32 ssa_73 = imul ssa_69, ssa_61 | |
vec1 32 ssa_74 = iadd ssa_70, ssa_66 | |
vec1 32 ssa_75 = iadd ssa_71, ssa_67 | |
vec1 32 ssa_76 = iadd ssa_72, ssa_68 | |
vec1 32 ssa_77 = iadd ssa_73, ssa_69 | |
vec1 32 ssa_78 = imul ssa_74, ssa_66 | |
vec1 32 ssa_79 = imul ssa_75, ssa_67 | |
vec1 32 ssa_80 = imul ssa_76, ssa_68 | |
vec1 32 ssa_81 = imul ssa_77, ssa_69 | |
vec1 32 ssa_82 = iadd ssa_78, ssa_74 | |
vec1 32 ssa_83 = iadd ssa_79, ssa_75 | |
vec1 32 ssa_84 = iadd ssa_80, ssa_76 | |
vec1 32 ssa_85 = iadd ssa_81, ssa_77 | |
vec1 32 ssa_86 = imul ssa_82, ssa_74 | |
vec1 32 ssa_87 = imul ssa_83, ssa_75 | |
vec1 32 ssa_88 = imul ssa_84, ssa_76 | |
vec1 32 ssa_89 = imul ssa_85, ssa_77 | |
vec1 32 ssa_90 = iadd ssa_86, ssa_82 | |
vec1 32 ssa_91 = iadd ssa_87, ssa_83 | |
vec1 32 ssa_92 = iadd ssa_88, ssa_84 | |
vec1 32 ssa_93 = iadd ssa_89, ssa_85 | |
vec1 32 ssa_94 = imul ssa_90, ssa_82 | |
vec1 32 ssa_95 = imul ssa_91, ssa_83 | |
vec1 32 ssa_96 = imul ssa_92, ssa_84 | |
vec1 32 ssa_97 = imul ssa_93, ssa_85 | |
vec1 32 ssa_98 = iadd ssa_94, ssa_90 | |
vec1 32 ssa_99 = iadd ssa_95, ssa_91 | |
vec1 32 ssa_100 = iadd ssa_96, ssa_92 | |
vec1 32 ssa_101 = iadd ssa_97, ssa_93 | |
vec1 32 ssa_102 = imul ssa_98, ssa_90 | |
vec1 32 ssa_103 = imul ssa_99, ssa_91 | |
vec1 32 ssa_104 = imul ssa_100, ssa_92 | |
vec1 32 ssa_105 = imul ssa_101, ssa_93 | |
vec1 32 ssa_106 = iadd ssa_102, ssa_98 | |
vec1 32 ssa_107 = iadd ssa_103, ssa_99 | |
vec1 32 ssa_108 = iadd ssa_104, ssa_100 | |
vec1 32 ssa_109 = iadd ssa_105, ssa_101 | |
vec1 32 ssa_110 = imul ssa_106, ssa_98 | |
vec1 32 ssa_111 = imul ssa_107, ssa_99 | |
vec1 32 ssa_112 = imul ssa_108, ssa_100 | |
vec1 32 ssa_113 = imul ssa_109, ssa_101 | |
vec1 32 ssa_114 = iadd ssa_110, ssa_106 | |
vec1 32 ssa_115 = iadd ssa_111, ssa_107 | |
vec1 32 ssa_116 = iadd ssa_112, ssa_108 | |
vec1 32 ssa_117 = iadd ssa_113, ssa_109 | |
vec1 32 ssa_118 = imul ssa_114, ssa_106 | |
vec1 32 ssa_119 = imul ssa_115, ssa_107 | |
vec1 32 ssa_120 = imul ssa_116, ssa_108 | |
vec1 32 ssa_121 = imul ssa_117, ssa_109 | |
vec1 32 ssa_122 = iadd ssa_118, ssa_114 | |
vec1 32 ssa_123 = iadd ssa_119, ssa_115 | |
vec1 32 ssa_124 = iadd ssa_120, ssa_116 | |
vec1 32 ssa_125 = iadd ssa_121, ssa_117 | |
vec1 32 ssa_126 = imul ssa_122, ssa_114 | |
vec1 32 ssa_127 = imul ssa_123, ssa_115 | |
vec1 32 ssa_128 = imul ssa_124, ssa_116 | |
vec1 32 ssa_129 = imul ssa_125, ssa_117 | |
vec1 32 ssa_130 = iadd ssa_126, ssa_122 | |
vec1 32 ssa_131 = iadd ssa_127, ssa_123 | |
vec1 32 ssa_132 = iadd ssa_128, ssa_124 | |
vec1 32 ssa_133 = iadd ssa_129, ssa_125 | |
vec1 32 ssa_134 = imul ssa_130, ssa_122 | |
vec1 32 ssa_135 = imul ssa_131, ssa_123 | |
vec1 32 ssa_136 = imul ssa_132, ssa_124 | |
vec1 32 ssa_137 = imul ssa_133, ssa_125 | |
vec1 32 ssa_138 = iadd ssa_134, ssa_130 | |
vec1 32 ssa_139 = iadd ssa_135, ssa_131 | |
vec1 32 ssa_140 = iadd ssa_136, ssa_132 | |
vec1 32 ssa_141 = iadd ssa_137, ssa_133 | |
vec1 32 ssa_142 = imul ssa_138, ssa_130 | |
vec1 32 ssa_143 = imul ssa_139, ssa_131 | |
vec1 32 ssa_144 = imul ssa_140, ssa_132 | |
vec1 32 ssa_145 = imul ssa_141, ssa_133 | |
vec1 32 ssa_146 = iadd ssa_142, ssa_138 | |
vec1 32 ssa_147 = iadd ssa_143, ssa_139 | |
vec1 32 ssa_148 = iadd ssa_144, ssa_140 | |
vec1 32 ssa_149 = iadd ssa_145, ssa_141 | |
vec1 32 ssa_150 = imul ssa_146, ssa_138 | |
vec1 32 ssa_151 = imul ssa_147, ssa_139 | |
vec1 32 ssa_152 = imul ssa_148, ssa_140 | |
vec1 32 ssa_153 = imul ssa_149, ssa_141 | |
vec1 32 ssa_154 = iadd ssa_150, ssa_146 | |
vec1 32 ssa_155 = iadd ssa_151, ssa_147 | |
vec1 32 ssa_156 = iadd ssa_152, ssa_148 | |
vec1 32 ssa_157 = iadd ssa_153, ssa_149 | |
vec1 32 ssa_158 = iadd ssa_28, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_159 = ishl ssa_12, ssa_2 | |
vec1 32 ssa_160 = iadd ssa_20, ssa_21 | |
vec1 32 ssa_161 = iadd ssa_160, ssa_22 | |
vec1 32 ssa_162 = iadd ssa_161, ssa_23 | |
vec1 64 ssa_163 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_164 = unpack_64_2x32_split_x ssa_163 | |
vec1 32 ssa_165 = unpack_64_2x32_split_y ssa_163 | |
vec1 32 ssa_166 = iadd ssa_164, ssa_159 | |
vec1 32 ssa_167 = ult32 ssa_166, ssa_164 | |
vec1 32 ssa_168 = b2i32 ssa_167 | |
vec1 32 ssa_169 = iadd ssa_168, ssa_165 | |
vec1 64 ssa_170 = pack_64_2x32_split ssa_166, ssa_169 | |
intrinsic store_global (ssa_162, ssa_170) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
175 = MOV.i32 r62 | |
174 = MOV.i32 r61 | |
173 = MOV.i32 r60 | |
8 = LSHIFT_OR.i32 174, #0x0, #0x8.b0 | |
9 = IADD.s32 173, 8 | |
172 = MKVEC.v2i16 #0x0.h00, 175.h00 | |
12 = IADD.s32 9, 172 | |
14 = IADD.s32 u1, #0x1 | |
15 = IADD.s32 u1, #0x2 | |
16 = IADD.s32 u1, #0x3 | |
17 = U32_TO_F32 12 | |
18 = FMA.f32 17, #0x2edbe6ff, #0x0.neg | |
19 = F32_TO_S32.rtz 18 | |
} -> block1 | |
block1 { | |
20 = PHI 19, 154 | |
21 = PHI 19, 155 | |
22 = PHI 19, 156 | |
23 = PHI 19, 157 | |
24 = PHI u1, 146 | |
25 = PHI 14, 147 | |
26 = PHI 15, 148 | |
27 = PHI 16, 149 | |
28 = PHI #0x0, 158 | |
29 = ICMP.s32.m1.ge 28, #0x20 | |
BRANCHZ.i16.eq 29.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
30 = IMUL.i32 20, 24 | |
31 = IMUL.i32 21, 25 | |
32 = IMUL.i32 22, 26 | |
33 = IMUL.i32 23, 27 | |
34 = IADD.s32 30, 20 | |
35 = IADD.s32 31, 21 | |
36 = IADD.s32 32, 22 | |
37 = IADD.s32 33, 23 | |
38 = IMUL.i32 34, 20 | |
39 = IMUL.i32 35, 21 | |
40 = IMUL.i32 36, 22 | |
41 = IMUL.i32 37, 23 | |
42 = IADD.s32 38, 34 | |
43 = IADD.s32 39, 35 | |
44 = IADD.s32 40, 36 | |
45 = IADD.s32 41, 37 | |
46 = IMUL.i32 42, 34 | |
47 = IMUL.i32 43, 35 | |
48 = IMUL.i32 44, 36 | |
49 = IMUL.i32 45, 37 | |
50 = IADD.s32 46, 42 | |
51 = IADD.s32 47, 43 | |
52 = IADD.s32 48, 44 | |
53 = IADD.s32 49, 45 | |
54 = IMUL.i32 50, 42 | |
55 = IMUL.i32 51, 43 | |
56 = IMUL.i32 52, 44 | |
57 = IMUL.i32 53, 45 | |
58 = IADD.s32 54, 50 | |
59 = IADD.s32 55, 51 | |
60 = IADD.s32 56, 52 | |
61 = IADD.s32 57, 53 | |
62 = IMUL.i32 58, 50 | |
63 = IMUL.i32 59, 51 | |
64 = IMUL.i32 60, 52 | |
65 = IMUL.i32 61, 53 | |
66 = IADD.s32 62, 58 | |
67 = IADD.s32 63, 59 | |
68 = IADD.s32 64, 60 | |
69 = IADD.s32 65, 61 | |
70 = IMUL.i32 66, 58 | |
71 = IMUL.i32 67, 59 | |
72 = IMUL.i32 68, 60 | |
73 = IMUL.i32 69, 61 | |
74 = IADD.s32 70, 66 | |
75 = IADD.s32 71, 67 | |
76 = IADD.s32 72, 68 | |
77 = IADD.s32 73, 69 | |
78 = IMUL.i32 74, 66 | |
79 = IMUL.i32 75, 67 | |
80 = IMUL.i32 76, 68 | |
81 = IMUL.i32 77, 69 | |
82 = IADD.s32 78, 74 | |
83 = IADD.s32 79, 75 | |
84 = IADD.s32 80, 76 | |
85 = IADD.s32 81, 77 | |
86 = IMUL.i32 82, 74 | |
87 = IMUL.i32 83, 75 | |
88 = IMUL.i32 84, 76 | |
89 = IMUL.i32 85, 77 | |
90 = IADD.s32 86, 82 | |
91 = IADD.s32 87, 83 | |
92 = IADD.s32 88, 84 | |
93 = IADD.s32 89, 85 | |
94 = IMUL.i32 90, 82 | |
95 = IMUL.i32 91, 83 | |
96 = IMUL.i32 92, 84 | |
97 = IMUL.i32 93, 85 | |
98 = IADD.s32 94, 90 | |
99 = IADD.s32 95, 91 | |
100 = IADD.s32 96, 92 | |
101 = IADD.s32 97, 93 | |
102 = IMUL.i32 98, 90 | |
103 = IMUL.i32 99, 91 | |
104 = IMUL.i32 100, 92 | |
105 = IMUL.i32 101, 93 | |
106 = IADD.s32 102, 98 | |
107 = IADD.s32 103, 99 | |
108 = IADD.s32 104, 100 | |
109 = IADD.s32 105, 101 | |
110 = IMUL.i32 106, 98 | |
111 = IMUL.i32 107, 99 | |
112 = IMUL.i32 108, 100 | |
113 = IMUL.i32 109, 101 | |
114 = IADD.s32 110, 106 | |
115 = IADD.s32 111, 107 | |
116 = IADD.s32 112, 108 | |
117 = IADD.s32 113, 109 | |
118 = IMUL.i32 114, 106 | |
119 = IMUL.i32 115, 107 | |
120 = IMUL.i32 116, 108 | |
121 = IMUL.i32 117, 109 | |
122 = IADD.s32 118, 114 | |
123 = IADD.s32 119, 115 | |
124 = IADD.s32 120, 116 | |
125 = IADD.s32 121, 117 | |
126 = IMUL.i32 122, 114 | |
127 = IMUL.i32 123, 115 | |
128 = IMUL.i32 124, 116 | |
129 = IMUL.i32 125, 117 | |
130 = IADD.s32 126, 122 | |
131 = IADD.s32 127, 123 | |
132 = IADD.s32 128, 124 | |
133 = IADD.s32 129, 125 | |
134 = IMUL.i32 130, 122 | |
135 = IMUL.i32 131, 123 | |
136 = IMUL.i32 132, 124 | |
137 = IMUL.i32 133, 125 | |
138 = IADD.s32 134, 130 | |
139 = IADD.s32 135, 131 | |
140 = IADD.s32 136, 132 | |
141 = IADD.s32 137, 133 | |
142 = IMUL.i32 138, 130 | |
143 = IMUL.i32 139, 131 | |
144 = IMUL.i32 140, 132 | |
145 = IMUL.i32 141, 133 | |
146 = IADD.s32 142, 138 | |
147 = IADD.s32 143, 139 | |
148 = IADD.s32 144, 140 | |
149 = IADD.s32 145, 141 | |
150 = IMUL.i32 146, 138 | |
151 = IMUL.i32 147, 139 | |
152 = IMUL.i32 148, 140 | |
153 = IMUL.i32 149, 141 | |
154 = IADD.s32 150, 146 | |
155 = IADD.s32 151, 147 | |
156 = IADD.s32 152, 148 | |
157 = IADD.s32 153, 149 | |
158 = IADD.s32 28, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
159 = LSHIFT_OR.i32 12, #0x0, #0x2.b0 | |
160 = IADD.s32 20, 21 | |
161 = IADD.s32 160, 22 | |
162 = IADD.s32 161, 23 | |
166 = IADD.s32 u0, 159 | |
168 = ICMP.u32.i1.lt 166, u0 | |
169 = IADD.s32 168, u0[1] | |
STORE.i32 162, 166, 169, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x1 | |
r1 = IADD.s32 u1, r1 | |
r2 = MOV.i32 #0x2 | |
r2 = IADD.s32 u1, r2 | |
r3 = MOV.i32 #0x3 | |
r3 = IADD.s32 u1, r3 | |
r4 = U32_TO_F32 r0 | |
r4 = FMA.f32 r4, #0x2edbe6ff, #0x0.neg | |
r4 = F32_TO_S32.rtz r4 | |
r5 = MOV.i32 r4 | |
r6 = MOV.i32 r4 | |
r7 = MOV.i32 r4 | |
r8 = MOV.i32 u1 | |
r9 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r10 = ICMP.s32.m1.ge r9, #0x20 | |
BRANCHZ.i16.eq r10.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r8 = IMUL.i32 r4, r8 | |
r1 = IMUL.i32 r5, r1 | |
r2 = IMUL.i32 r6, r2 | |
r3 = IMUL.i32 r7, r3 | |
r8 = IADD.s32 r8, r4 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r1, r5 | |
r6 = IMUL.i32 r2, r6 | |
r7 = IMUL.i32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r1 | |
r6 = IADD.s32 r6, r2 | |
r7 = IADD.s32 r7, r3 | |
r9 = IADD.s32 r9, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = IADD.s32 r4, r5 | |
r1 = IADD.s32 r1, r6 | |
r1 = IADD.s32 r1, r7 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.y.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
800000000 | |
id(0) nbb r_uncond | |
* NOP | |
+ r1 = MOV.i32 fau.x | |
* r2 = MOV.i32 fau.y | |
+ _.h00 = U32_TO_F32 r0 | |
* _.h00 = FMA.f32 t1, fau.y, t.neg | |
+ r4 = F32_TO_S32.rtz t | |
* r3 = MOV.i32 fau.x | |
+ r5 = MOV.i32 t1 | |
* r6 = MOV.i32 r4 | |
+ r7 = MOV.i32 r4 | |
* r8 = MOV.i32 fau.x | |
+ r1 = IADD.s32 fau.x, r1 | |
* r9 = MOV.i32 t | |
+ r2 = IADD.s32 fau.x, r2 | |
* NOP | |
+ r3 = IADD.s32 fau.x, r3 | |
200000001 2edbe6ff00000003 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r9, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000020 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r1 = IMUL.i32 r5, r1 | |
+ NOP | |
* r2 = IMUL.i32 r6, r2 | |
+ NOP | |
id(0) nbb | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* r4 = IMUL.i32 t1, r4 | |
+ NOP | |
* _.h00 = IMUL.i32 r7, r3 | |
+ r3 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 t1, r7 | |
+ r7 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r7 | |
+ r7 = IADD.s32 t, t1 | |
* r3 = IMUL.i32 t1, r3 | |
+ NOP | |
* NOP | |
+ r1 = IADD.s32 r1, r5 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r1, r5 | |
+ r5 = IADD.s32 t, r1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* r5 = IMUL.i32 t1, r5 | |
+ NOP | |
* NOP | |
+ r4 = IADD.s32 r4, r8 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* r8 = IMUL.i32 t1, r8 | |
+ NOP | |
* NOP | |
+ r3 = IADD.s32 r3, r7 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r3, r7 | |
+ r7 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r7 | |
+ r7 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r7 | |
+ r7 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* r7 = IMUL.i32 t1, r7 | |
+ NOP | |
* NOP | |
+ r5 = IADD.s32 r5, r1 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r5, r1 | |
+ r1 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* r1 = IMUL.i32 t1, r1 | |
+ NOP | |
* NOP | |
+ r2 = IADD.s32 r2, r6 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r2, r6 | |
+ r6 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* NOP | |
+ r8 = IADD.s32 r8, r4 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r6, r2 | |
+ r2 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 r2, r6 | |
+ r6 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r6, r2 | |
+ r2 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 r2, r6 | |
+ r6 = IADD.s32 t, r2 | |
* NOP | |
+ r7 = IADD.s32 r7, r3 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r6, r2 | |
+ r2 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r3 | |
+ r3 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 r2, r6 | |
+ r6 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r3, r7 | |
+ r7 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* NOP | |
+ r1 = IADD.s32 r1, r5 | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* _.h00 = IMUL.i32 r6, r2 | |
+ r2 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r3 | |
+ r3 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 r1, r5 | |
+ r5 = IADD.s32 t, r1 | |
* _.h00 = IMUL.i32 r2, r6 | |
+ r6 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r3, r7 | |
+ r7 = IADD.s32 t, r3 | |
* NOP | |
+ r9 = IADD.s32 r9, fau.x | |
* NOP | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) wait(0 ) nbb r_uncond | |
* NOP | |
+ _.h00 = IADD.s32 r4, r5 | |
* NOP | |
+ _.h00 = IADD.s32 t1, r6 | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ r1 = IADD.s32 t1, r7 | |
* NOP | |
+ r0 = IADD.s32 fau.x, t0 | |
* NOP | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
} | |
clause_3: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+MOV.i32 r1:t1, 0x00000001 /* 0.000000 */ | |
*MOV.i32 r2:t0, 0x00000002 /* 0.000000 */ | |
+U32_TO_F32 t1, r0 | |
*FMA.f32 t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+F32_TO_S32.rtz r4:t1, t | |
*MOV.i32 r3:t0, 0x00000003 /* 0.000000 */ | |
+MOV.i32 r5:t1, t1 | |
*MOV.i32 r6:t0, r4 | |
+MOV.i32 r7:t1, r4 | |
*MOV.i32 r8:t0, u1.w0 | |
+IADD.s32 r1:t1, u1.w0, r1 | |
*MOV.i32 r9:t0, #0 | |
+IADD.s32 r2:t1, u1.w0, r2 | |
*NOP t0 | |
+IADD.s32 r3:t1, u1.w0, r3 | |
} | |
clause_10: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r9, 0x00000020 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_15 | |
} | |
clause_13: | |
ds(0) nbb next_store dwb(0) | |
{ | |
*NOP t0 | |
+JUMP t1, clause_72 | |
} | |
clause_15: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r1:t0, r5, r1 | |
+NOP t1 | |
*IMUL.i32 r2:t0, r6, r2 | |
+NOP t1 | |
} | |
clause_17: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 r4:t0, t1, r4 | |
+NOP t1 | |
*IMUL.i32 t0, r7, r3 | |
+IADD.s32 r3:t1, t, r7 | |
*IMUL.i32 t0, t1, r7 | |
+IADD.s32 r7:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r7 | |
+IADD.s32 r7:t1, t, t1 | |
*IMUL.i32 r3:t0, t1, r3 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r5 | |
} | |
clause_23: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r1, r5 | |
+IADD.s32 r5:t1, t, r1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 r5:t0, t1, r5 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r4:t1, r4, r8 | |
} | |
clause_29: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 r8:t0, t1, r8 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r3:t1, r3, r7 | |
} | |
clause_35: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r3, r7 | |
+IADD.s32 r7:t1, t, r3 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r7 | |
+IADD.s32 r7:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r7 | |
+IADD.s32 r7:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 r7:t0, t1, r7 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r5:t1, r5, r1 | |
} | |
clause_41: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r5, r1 | |
+IADD.s32 r1:t1, t, r5 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 r1:t0, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r2:t1, r2, r6 | |
} | |
clause_47: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r2, r6 | |
+IADD.s32 r6:t1, t, r2 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*NOP t0 | |
+IADD.s32 r8:t1, r8, r4 | |
} | |
clause_53: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r6, r2 | |
+IADD.s32 r2:t1, t, r6 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, r2, r6 | |
+IADD.s32 r6:t1, t, r2 | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 t0, r6, r2 | |
+IADD.s32 r2:t1, t, r6 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, r2, r6 | |
+IADD.s32 r6:t1, t, r2 | |
*NOP t0 | |
+IADD.s32 r7:t1, r7, r3 | |
} | |
clause_59: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 t0, r6, r2 | |
+IADD.s32 r2:t1, t, r6 | |
*IMUL.i32 t0, r7, r3 | |
+IADD.s32 r3:t1, t, r7 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, r2, r6 | |
+IADD.s32 r6:t1, t, r2 | |
*IMUL.i32 t0, r3, r7 | |
+IADD.s32 r7:t1, t, r3 | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r5 | |
} | |
clause_65: | |
ds(0) nbb r_uncond | |
{ | |
*IMUL.i32 t0, r6, r2 | |
+IADD.s32 r2:t1, t, r6 | |
*IMUL.i32 t0, r7, r3 | |
+IADD.s32 r3:t1, t, r7 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, r1, r5 | |
+IADD.s32 r5:t1, t, r1 | |
*IMUL.i32 t0, r2, r6 | |
+IADD.s32 r6:t1, t, r2 | |
*IMUL.i32 t0, r3, r7 | |
+IADD.s32 r7:t1, t, r3 | |
*NOP t0 | |
+IADD.s32 r9:t1, r9, 0x00000001 /* 0.000000 */ | |
*NOP t0 | |
+JUMP t1, clause_10 | |
} | |
clause_72: | |
ds(0) eos store | |
{ | |
*NOP t0 | |
+IADD.s32 t1, r4, r5 | |
*NOP t0 | |
+IADD.s32 t1, t1, r6 | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+IADD.s32 r1:t1, t1, r7 | |
*NOP t0 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*NOP t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
e20eea22 compute_sp_v4_int 21.133 GFLOPs 12.702ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v8 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE int | |
#define vec2 ivec2 | |
#define vec4 ivec4 | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 162 | |
void compute_sp_v8() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec8 x = VEC8(_A, (_A+DATATYPE(1)), (_A+DATATYPE(2)), (_A+DATATYPE(3)), (_A+DATATYPE(4)), (_A+DATATYPE(5)), (_A+DATATYPE(6)), (_A+DATATYPE(7))); | |
vec8 y = VEC8_S(DATATYPE(float(id) * SCALE)); | |
#undef mad | |
#define mad mad8 | |
for(int i=0; i<16; i++) | |
{ | |
MAD_16(x, y); | |
} | |
vec4 s = y.d0 + y.d1; | |
vec2 t = s.xy + s.zw; | |
ptr[id] = t.x + t.y; | |
} | |
void main() {compute_sp_v8();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0x5c855ea2, 0x8971a97c, 0xa0adc9e8, 0x273dc174, 0x2821d310} | |
name: GLSL18 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp int[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp int _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_10 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_11 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_12 = ishl ssa_10.y, ssa_11 | |
vec1 32 ssa_13 = iadd ssa_10.x, ssa_12 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_319 = insert_u16 ssa_10.z, ssa_1 | |
vec1 32 ssa_15 = iadd ssa_13, ssa_319 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_16 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_17 = iadd ssa_16, ssa_1 | |
vec1 32 ssa_2 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_18 = iadd ssa_16, ssa_2 | |
vec1 32 ssa_3 = load_const (0x00000003 = 0.000000) | |
vec1 32 ssa_19 = iadd ssa_16, ssa_3 | |
vec1 32 ssa_4 = load_const (0x00000004 = 0.000000) | |
vec1 32 ssa_20 = iadd ssa_16, ssa_4 | |
vec1 32 ssa_5 = load_const (0x00000005 = 0.000000) | |
vec1 32 ssa_21 = iadd ssa_16, ssa_5 | |
vec1 32 ssa_6 = load_const (0x00000006 = 0.000000) | |
vec1 32 ssa_22 = iadd ssa_16, ssa_6 | |
vec1 32 ssa_7 = load_const (0x00000007 = 0.000000) | |
vec1 32 ssa_23 = iadd ssa_16, ssa_7 | |
vec1 32 ssa_24 = u2f32 ssa_15 | |
vec1 32 ssa_8 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_25 = fmul ssa_24, ssa_8 | |
vec1 32 ssa_26 = f2i32 ssa_25 | |
vec1 32 ssa_9 = load_const (0x00000010 = 0.000000) | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_27 = phi block_0: ssa_26, block_4: ssa_297 | |
vec1 32 ssa_28 = phi block_0: ssa_26, block_4: ssa_298 | |
vec1 32 ssa_29 = phi block_0: ssa_26, block_4: ssa_299 | |
vec1 32 ssa_30 = phi block_0: ssa_26, block_4: ssa_300 | |
vec1 32 ssa_31 = phi block_0: ssa_26, block_4: ssa_293 | |
vec1 32 ssa_32 = phi block_0: ssa_26, block_4: ssa_294 | |
vec1 32 ssa_33 = phi block_0: ssa_26, block_4: ssa_295 | |
vec1 32 ssa_34 = phi block_0: ssa_26, block_4: ssa_296 | |
vec1 32 ssa_35 = phi block_0: ssa_20, block_4: ssa_281 | |
vec1 32 ssa_36 = phi block_0: ssa_21, block_4: ssa_282 | |
vec1 32 ssa_37 = phi block_0: ssa_22, block_4: ssa_283 | |
vec1 32 ssa_38 = phi block_0: ssa_23, block_4: ssa_284 | |
vec1 32 ssa_39 = phi block_0: ssa_16, block_4: ssa_277 | |
vec1 32 ssa_40 = phi block_0: ssa_17, block_4: ssa_278 | |
vec1 32 ssa_41 = phi block_0: ssa_18, block_4: ssa_279 | |
vec1 32 ssa_42 = phi block_0: ssa_19, block_4: ssa_280 | |
vec1 32 ssa_43 = phi block_0: ssa_0, block_4: ssa_301 | |
vec1 32 ssa_44 = ige32 ssa_43, ssa_9 | |
/* succs: block_2 block_3 */ | |
if ssa_44 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_45 = imul ssa_31, ssa_39 | |
vec1 32 ssa_46 = imul ssa_32, ssa_40 | |
vec1 32 ssa_47 = imul ssa_33, ssa_41 | |
vec1 32 ssa_48 = imul ssa_34, ssa_42 | |
vec1 32 ssa_49 = imul ssa_27, ssa_35 | |
vec1 32 ssa_50 = imul ssa_28, ssa_36 | |
vec1 32 ssa_51 = imul ssa_29, ssa_37 | |
vec1 32 ssa_52 = imul ssa_30, ssa_38 | |
vec1 32 ssa_53 = iadd ssa_45, ssa_31 | |
vec1 32 ssa_54 = iadd ssa_46, ssa_32 | |
vec1 32 ssa_55 = iadd ssa_47, ssa_33 | |
vec1 32 ssa_56 = iadd ssa_48, ssa_34 | |
vec1 32 ssa_57 = iadd ssa_49, ssa_27 | |
vec1 32 ssa_58 = iadd ssa_50, ssa_28 | |
vec1 32 ssa_59 = iadd ssa_51, ssa_29 | |
vec1 32 ssa_60 = iadd ssa_52, ssa_30 | |
vec1 32 ssa_61 = imul ssa_53, ssa_31 | |
vec1 32 ssa_62 = imul ssa_54, ssa_32 | |
vec1 32 ssa_63 = imul ssa_55, ssa_33 | |
vec1 32 ssa_64 = imul ssa_56, ssa_34 | |
vec1 32 ssa_65 = imul ssa_57, ssa_27 | |
vec1 32 ssa_66 = imul ssa_58, ssa_28 | |
vec1 32 ssa_67 = imul ssa_59, ssa_29 | |
vec1 32 ssa_68 = imul ssa_60, ssa_30 | |
vec1 32 ssa_69 = iadd ssa_61, ssa_53 | |
vec1 32 ssa_70 = iadd ssa_62, ssa_54 | |
vec1 32 ssa_71 = iadd ssa_63, ssa_55 | |
vec1 32 ssa_72 = iadd ssa_64, ssa_56 | |
vec1 32 ssa_73 = iadd ssa_65, ssa_57 | |
vec1 32 ssa_74 = iadd ssa_66, ssa_58 | |
vec1 32 ssa_75 = iadd ssa_67, ssa_59 | |
vec1 32 ssa_76 = iadd ssa_68, ssa_60 | |
vec1 32 ssa_77 = imul ssa_69, ssa_53 | |
vec1 32 ssa_78 = imul ssa_70, ssa_54 | |
vec1 32 ssa_79 = imul ssa_71, ssa_55 | |
vec1 32 ssa_80 = imul ssa_72, ssa_56 | |
vec1 32 ssa_81 = imul ssa_73, ssa_57 | |
vec1 32 ssa_82 = imul ssa_74, ssa_58 | |
vec1 32 ssa_83 = imul ssa_75, ssa_59 | |
vec1 32 ssa_84 = imul ssa_76, ssa_60 | |
vec1 32 ssa_85 = iadd ssa_77, ssa_69 | |
vec1 32 ssa_86 = iadd ssa_78, ssa_70 | |
vec1 32 ssa_87 = iadd ssa_79, ssa_71 | |
vec1 32 ssa_88 = iadd ssa_80, ssa_72 | |
vec1 32 ssa_89 = iadd ssa_81, ssa_73 | |
vec1 32 ssa_90 = iadd ssa_82, ssa_74 | |
vec1 32 ssa_91 = iadd ssa_83, ssa_75 | |
vec1 32 ssa_92 = iadd ssa_84, ssa_76 | |
vec1 32 ssa_93 = imul ssa_85, ssa_69 | |
vec1 32 ssa_94 = imul ssa_86, ssa_70 | |
vec1 32 ssa_95 = imul ssa_87, ssa_71 | |
vec1 32 ssa_96 = imul ssa_88, ssa_72 | |
vec1 32 ssa_97 = imul ssa_89, ssa_73 | |
vec1 32 ssa_98 = imul ssa_90, ssa_74 | |
vec1 32 ssa_99 = imul ssa_91, ssa_75 | |
vec1 32 ssa_100 = imul ssa_92, ssa_76 | |
vec1 32 ssa_101 = iadd ssa_93, ssa_85 | |
vec1 32 ssa_102 = iadd ssa_94, ssa_86 | |
vec1 32 ssa_103 = iadd ssa_95, ssa_87 | |
vec1 32 ssa_104 = iadd ssa_96, ssa_88 | |
vec1 32 ssa_105 = iadd ssa_97, ssa_89 | |
vec1 32 ssa_106 = iadd ssa_98, ssa_90 | |
vec1 32 ssa_107 = iadd ssa_99, ssa_91 | |
vec1 32 ssa_108 = iadd ssa_100, ssa_92 | |
vec1 32 ssa_109 = imul ssa_101, ssa_85 | |
vec1 32 ssa_110 = imul ssa_102, ssa_86 | |
vec1 32 ssa_111 = imul ssa_103, ssa_87 | |
vec1 32 ssa_112 = imul ssa_104, ssa_88 | |
vec1 32 ssa_113 = imul ssa_105, ssa_89 | |
vec1 32 ssa_114 = imul ssa_106, ssa_90 | |
vec1 32 ssa_115 = imul ssa_107, ssa_91 | |
vec1 32 ssa_116 = imul ssa_108, ssa_92 | |
vec1 32 ssa_117 = iadd ssa_109, ssa_101 | |
vec1 32 ssa_118 = iadd ssa_110, ssa_102 | |
vec1 32 ssa_119 = iadd ssa_111, ssa_103 | |
vec1 32 ssa_120 = iadd ssa_112, ssa_104 | |
vec1 32 ssa_121 = iadd ssa_113, ssa_105 | |
vec1 32 ssa_122 = iadd ssa_114, ssa_106 | |
vec1 32 ssa_123 = iadd ssa_115, ssa_107 | |
vec1 32 ssa_124 = iadd ssa_116, ssa_108 | |
vec1 32 ssa_125 = imul ssa_117, ssa_101 | |
vec1 32 ssa_126 = imul ssa_118, ssa_102 | |
vec1 32 ssa_127 = imul ssa_119, ssa_103 | |
vec1 32 ssa_128 = imul ssa_120, ssa_104 | |
vec1 32 ssa_129 = imul ssa_121, ssa_105 | |
vec1 32 ssa_130 = imul ssa_122, ssa_106 | |
vec1 32 ssa_131 = imul ssa_123, ssa_107 | |
vec1 32 ssa_132 = imul ssa_124, ssa_108 | |
vec1 32 ssa_133 = iadd ssa_125, ssa_117 | |
vec1 32 ssa_134 = iadd ssa_126, ssa_118 | |
vec1 32 ssa_135 = iadd ssa_127, ssa_119 | |
vec1 32 ssa_136 = iadd ssa_128, ssa_120 | |
vec1 32 ssa_137 = iadd ssa_129, ssa_121 | |
vec1 32 ssa_138 = iadd ssa_130, ssa_122 | |
vec1 32 ssa_139 = iadd ssa_131, ssa_123 | |
vec1 32 ssa_140 = iadd ssa_132, ssa_124 | |
vec1 32 ssa_141 = imul ssa_133, ssa_117 | |
vec1 32 ssa_142 = imul ssa_134, ssa_118 | |
vec1 32 ssa_143 = imul ssa_135, ssa_119 | |
vec1 32 ssa_144 = imul ssa_136, ssa_120 | |
vec1 32 ssa_145 = imul ssa_137, ssa_121 | |
vec1 32 ssa_146 = imul ssa_138, ssa_122 | |
vec1 32 ssa_147 = imul ssa_139, ssa_123 | |
vec1 32 ssa_148 = imul ssa_140, ssa_124 | |
vec1 32 ssa_149 = iadd ssa_141, ssa_133 | |
vec1 32 ssa_150 = iadd ssa_142, ssa_134 | |
vec1 32 ssa_151 = iadd ssa_143, ssa_135 | |
vec1 32 ssa_152 = iadd ssa_144, ssa_136 | |
vec1 32 ssa_153 = iadd ssa_145, ssa_137 | |
vec1 32 ssa_154 = iadd ssa_146, ssa_138 | |
vec1 32 ssa_155 = iadd ssa_147, ssa_139 | |
vec1 32 ssa_156 = iadd ssa_148, ssa_140 | |
vec1 32 ssa_157 = imul ssa_149, ssa_133 | |
vec1 32 ssa_158 = imul ssa_150, ssa_134 | |
vec1 32 ssa_159 = imul ssa_151, ssa_135 | |
vec1 32 ssa_160 = imul ssa_152, ssa_136 | |
vec1 32 ssa_161 = imul ssa_153, ssa_137 | |
vec1 32 ssa_162 = imul ssa_154, ssa_138 | |
vec1 32 ssa_163 = imul ssa_155, ssa_139 | |
vec1 32 ssa_164 = imul ssa_156, ssa_140 | |
vec1 32 ssa_165 = iadd ssa_157, ssa_149 | |
vec1 32 ssa_166 = iadd ssa_158, ssa_150 | |
vec1 32 ssa_167 = iadd ssa_159, ssa_151 | |
vec1 32 ssa_168 = iadd ssa_160, ssa_152 | |
vec1 32 ssa_169 = iadd ssa_161, ssa_153 | |
vec1 32 ssa_170 = iadd ssa_162, ssa_154 | |
vec1 32 ssa_171 = iadd ssa_163, ssa_155 | |
vec1 32 ssa_172 = iadd ssa_164, ssa_156 | |
vec1 32 ssa_173 = imul ssa_165, ssa_149 | |
vec1 32 ssa_174 = imul ssa_166, ssa_150 | |
vec1 32 ssa_175 = imul ssa_167, ssa_151 | |
vec1 32 ssa_176 = imul ssa_168, ssa_152 | |
vec1 32 ssa_177 = imul ssa_169, ssa_153 | |
vec1 32 ssa_178 = imul ssa_170, ssa_154 | |
vec1 32 ssa_179 = imul ssa_171, ssa_155 | |
vec1 32 ssa_180 = imul ssa_172, ssa_156 | |
vec1 32 ssa_181 = iadd ssa_173, ssa_165 | |
vec1 32 ssa_182 = iadd ssa_174, ssa_166 | |
vec1 32 ssa_183 = iadd ssa_175, ssa_167 | |
vec1 32 ssa_184 = iadd ssa_176, ssa_168 | |
vec1 32 ssa_185 = iadd ssa_177, ssa_169 | |
vec1 32 ssa_186 = iadd ssa_178, ssa_170 | |
vec1 32 ssa_187 = iadd ssa_179, ssa_171 | |
vec1 32 ssa_188 = iadd ssa_180, ssa_172 | |
vec1 32 ssa_189 = imul ssa_181, ssa_165 | |
vec1 32 ssa_190 = imul ssa_182, ssa_166 | |
vec1 32 ssa_191 = imul ssa_183, ssa_167 | |
vec1 32 ssa_192 = imul ssa_184, ssa_168 | |
vec1 32 ssa_193 = imul ssa_185, ssa_169 | |
vec1 32 ssa_194 = imul ssa_186, ssa_170 | |
vec1 32 ssa_195 = imul ssa_187, ssa_171 | |
vec1 32 ssa_196 = imul ssa_188, ssa_172 | |
vec1 32 ssa_197 = iadd ssa_189, ssa_181 | |
vec1 32 ssa_198 = iadd ssa_190, ssa_182 | |
vec1 32 ssa_199 = iadd ssa_191, ssa_183 | |
vec1 32 ssa_200 = iadd ssa_192, ssa_184 | |
vec1 32 ssa_201 = iadd ssa_193, ssa_185 | |
vec1 32 ssa_202 = iadd ssa_194, ssa_186 | |
vec1 32 ssa_203 = iadd ssa_195, ssa_187 | |
vec1 32 ssa_204 = iadd ssa_196, ssa_188 | |
vec1 32 ssa_205 = imul ssa_197, ssa_181 | |
vec1 32 ssa_206 = imul ssa_198, ssa_182 | |
vec1 32 ssa_207 = imul ssa_199, ssa_183 | |
vec1 32 ssa_208 = imul ssa_200, ssa_184 | |
vec1 32 ssa_209 = imul ssa_201, ssa_185 | |
vec1 32 ssa_210 = imul ssa_202, ssa_186 | |
vec1 32 ssa_211 = imul ssa_203, ssa_187 | |
vec1 32 ssa_212 = imul ssa_204, ssa_188 | |
vec1 32 ssa_213 = iadd ssa_205, ssa_197 | |
vec1 32 ssa_214 = iadd ssa_206, ssa_198 | |
vec1 32 ssa_215 = iadd ssa_207, ssa_199 | |
vec1 32 ssa_216 = iadd ssa_208, ssa_200 | |
vec1 32 ssa_217 = iadd ssa_209, ssa_201 | |
vec1 32 ssa_218 = iadd ssa_210, ssa_202 | |
vec1 32 ssa_219 = iadd ssa_211, ssa_203 | |
vec1 32 ssa_220 = iadd ssa_212, ssa_204 | |
vec1 32 ssa_221 = imul ssa_213, ssa_197 | |
vec1 32 ssa_222 = imul ssa_214, ssa_198 | |
vec1 32 ssa_223 = imul ssa_215, ssa_199 | |
vec1 32 ssa_224 = imul ssa_216, ssa_200 | |
vec1 32 ssa_225 = imul ssa_217, ssa_201 | |
vec1 32 ssa_226 = imul ssa_218, ssa_202 | |
vec1 32 ssa_227 = imul ssa_219, ssa_203 | |
vec1 32 ssa_228 = imul ssa_220, ssa_204 | |
vec1 32 ssa_229 = iadd ssa_221, ssa_213 | |
vec1 32 ssa_230 = iadd ssa_222, ssa_214 | |
vec1 32 ssa_231 = iadd ssa_223, ssa_215 | |
vec1 32 ssa_232 = iadd ssa_224, ssa_216 | |
vec1 32 ssa_233 = iadd ssa_225, ssa_217 | |
vec1 32 ssa_234 = iadd ssa_226, ssa_218 | |
vec1 32 ssa_235 = iadd ssa_227, ssa_219 | |
vec1 32 ssa_236 = iadd ssa_228, ssa_220 | |
vec1 32 ssa_237 = imul ssa_229, ssa_213 | |
vec1 32 ssa_238 = imul ssa_230, ssa_214 | |
vec1 32 ssa_239 = imul ssa_231, ssa_215 | |
vec1 32 ssa_240 = imul ssa_232, ssa_216 | |
vec1 32 ssa_241 = imul ssa_233, ssa_217 | |
vec1 32 ssa_242 = imul ssa_234, ssa_218 | |
vec1 32 ssa_243 = imul ssa_235, ssa_219 | |
vec1 32 ssa_244 = imul ssa_236, ssa_220 | |
vec1 32 ssa_245 = iadd ssa_237, ssa_229 | |
vec1 32 ssa_246 = iadd ssa_238, ssa_230 | |
vec1 32 ssa_247 = iadd ssa_239, ssa_231 | |
vec1 32 ssa_248 = iadd ssa_240, ssa_232 | |
vec1 32 ssa_249 = iadd ssa_241, ssa_233 | |
vec1 32 ssa_250 = iadd ssa_242, ssa_234 | |
vec1 32 ssa_251 = iadd ssa_243, ssa_235 | |
vec1 32 ssa_252 = iadd ssa_244, ssa_236 | |
vec1 32 ssa_253 = imul ssa_245, ssa_229 | |
vec1 32 ssa_254 = imul ssa_246, ssa_230 | |
vec1 32 ssa_255 = imul ssa_247, ssa_231 | |
vec1 32 ssa_256 = imul ssa_248, ssa_232 | |
vec1 32 ssa_257 = imul ssa_249, ssa_233 | |
vec1 32 ssa_258 = imul ssa_250, ssa_234 | |
vec1 32 ssa_259 = imul ssa_251, ssa_235 | |
vec1 32 ssa_260 = imul ssa_252, ssa_236 | |
vec1 32 ssa_261 = iadd ssa_253, ssa_245 | |
vec1 32 ssa_262 = iadd ssa_254, ssa_246 | |
vec1 32 ssa_263 = iadd ssa_255, ssa_247 | |
vec1 32 ssa_264 = iadd ssa_256, ssa_248 | |
vec1 32 ssa_265 = iadd ssa_257, ssa_249 | |
vec1 32 ssa_266 = iadd ssa_258, ssa_250 | |
vec1 32 ssa_267 = iadd ssa_259, ssa_251 | |
vec1 32 ssa_268 = iadd ssa_260, ssa_252 | |
vec1 32 ssa_269 = imul ssa_261, ssa_245 | |
vec1 32 ssa_270 = imul ssa_262, ssa_246 | |
vec1 32 ssa_271 = imul ssa_263, ssa_247 | |
vec1 32 ssa_272 = imul ssa_264, ssa_248 | |
vec1 32 ssa_273 = imul ssa_265, ssa_249 | |
vec1 32 ssa_274 = imul ssa_266, ssa_250 | |
vec1 32 ssa_275 = imul ssa_267, ssa_251 | |
vec1 32 ssa_276 = imul ssa_268, ssa_252 | |
vec1 32 ssa_277 = iadd ssa_269, ssa_261 | |
vec1 32 ssa_278 = iadd ssa_270, ssa_262 | |
vec1 32 ssa_279 = iadd ssa_271, ssa_263 | |
vec1 32 ssa_280 = iadd ssa_272, ssa_264 | |
vec1 32 ssa_281 = iadd ssa_273, ssa_265 | |
vec1 32 ssa_282 = iadd ssa_274, ssa_266 | |
vec1 32 ssa_283 = iadd ssa_275, ssa_267 | |
vec1 32 ssa_284 = iadd ssa_276, ssa_268 | |
vec1 32 ssa_285 = imul ssa_277, ssa_261 | |
vec1 32 ssa_286 = imul ssa_278, ssa_262 | |
vec1 32 ssa_287 = imul ssa_279, ssa_263 | |
vec1 32 ssa_288 = imul ssa_280, ssa_264 | |
vec1 32 ssa_289 = imul ssa_281, ssa_265 | |
vec1 32 ssa_290 = imul ssa_282, ssa_266 | |
vec1 32 ssa_291 = imul ssa_283, ssa_267 | |
vec1 32 ssa_292 = imul ssa_284, ssa_268 | |
vec1 32 ssa_293 = iadd ssa_285, ssa_277 | |
vec1 32 ssa_294 = iadd ssa_286, ssa_278 | |
vec1 32 ssa_295 = iadd ssa_287, ssa_279 | |
vec1 32 ssa_296 = iadd ssa_288, ssa_280 | |
vec1 32 ssa_297 = iadd ssa_289, ssa_281 | |
vec1 32 ssa_298 = iadd ssa_290, ssa_282 | |
vec1 32 ssa_299 = iadd ssa_291, ssa_283 | |
vec1 32 ssa_300 = iadd ssa_292, ssa_284 | |
vec1 32 ssa_301 = iadd ssa_43, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_302 = iadd ssa_31, ssa_27 | |
vec1 32 ssa_303 = iadd ssa_32, ssa_28 | |
vec1 32 ssa_304 = iadd ssa_33, ssa_29 | |
vec1 32 ssa_305 = iadd ssa_34, ssa_30 | |
vec1 32 ssa_306 = iadd ssa_302, ssa_304 | |
vec1 32 ssa_307 = iadd ssa_303, ssa_305 | |
vec1 32 ssa_308 = ishl ssa_15, ssa_2 | |
vec1 32 ssa_309 = iadd ssa_306, ssa_307 | |
vec1 64 ssa_310 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_311 = unpack_64_2x32_split_x ssa_310 | |
vec1 32 ssa_312 = unpack_64_2x32_split_y ssa_310 | |
vec1 32 ssa_313 = iadd ssa_311, ssa_308 | |
vec1 32 ssa_314 = ult32 ssa_313, ssa_311 | |
vec1 32 ssa_315 = b2i32 ssa_314 | |
vec1 32 ssa_316 = iadd ssa_315, ssa_312 | |
vec1 64 ssa_317 = pack_64_2x32_split ssa_313, ssa_316 | |
intrinsic store_global (ssa_309, ssa_317) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
322 = MOV.i32 r62 | |
321 = MOV.i32 r61 | |
320 = MOV.i32 r60 | |
12 = LSHIFT_OR.i32 321, #0x0, #0x8.b0 | |
13 = IADD.s32 320, 12 | |
319 = MKVEC.v2i16 #0x0.h00, 322.h00 | |
15 = IADD.s32 13, 319 | |
17 = IADD.s32 u1, #0x1 | |
18 = IADD.s32 u1, #0x2 | |
19 = IADD.s32 u1, #0x3 | |
20 = IADD.s32 u1, #0x4 | |
21 = IADD.s32 u1, #0x5 | |
22 = IADD.s32 u1, #0x6 | |
23 = IADD.s32 u1, #0x7 | |
24 = U32_TO_F32 15 | |
25 = FMA.f32 24, #0x2edbe6ff, #0x0.neg | |
26 = F32_TO_S32.rtz 25 | |
} -> block1 | |
block1 { | |
27 = PHI 26, 297 | |
28 = PHI 26, 298 | |
29 = PHI 26, 299 | |
30 = PHI 26, 300 | |
31 = PHI 26, 293 | |
32 = PHI 26, 294 | |
33 = PHI 26, 295 | |
34 = PHI 26, 296 | |
35 = PHI 20, 281 | |
36 = PHI 21, 282 | |
37 = PHI 22, 283 | |
38 = PHI 23, 284 | |
39 = PHI u1, 277 | |
40 = PHI 17, 278 | |
41 = PHI 18, 279 | |
42 = PHI 19, 280 | |
43 = PHI #0x0, 301 | |
44 = ICMP.s32.m1.ge 43, #0x10 | |
BRANCHZ.i16.eq 44.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
45 = IMUL.i32 31, 39 | |
46 = IMUL.i32 32, 40 | |
47 = IMUL.i32 33, 41 | |
48 = IMUL.i32 34, 42 | |
49 = IMUL.i32 27, 35 | |
50 = IMUL.i32 28, 36 | |
51 = IMUL.i32 29, 37 | |
52 = IMUL.i32 30, 38 | |
53 = IADD.s32 45, 31 | |
54 = IADD.s32 46, 32 | |
55 = IADD.s32 47, 33 | |
56 = IADD.s32 48, 34 | |
57 = IADD.s32 49, 27 | |
58 = IADD.s32 50, 28 | |
59 = IADD.s32 51, 29 | |
60 = IADD.s32 52, 30 | |
61 = IMUL.i32 53, 31 | |
62 = IMUL.i32 54, 32 | |
63 = IMUL.i32 55, 33 | |
64 = IMUL.i32 56, 34 | |
65 = IMUL.i32 57, 27 | |
66 = IMUL.i32 58, 28 | |
67 = IMUL.i32 59, 29 | |
68 = IMUL.i32 60, 30 | |
69 = IADD.s32 61, 53 | |
70 = IADD.s32 62, 54 | |
71 = IADD.s32 63, 55 | |
72 = IADD.s32 64, 56 | |
73 = IADD.s32 65, 57 | |
74 = IADD.s32 66, 58 | |
75 = IADD.s32 67, 59 | |
76 = IADD.s32 68, 60 | |
77 = IMUL.i32 69, 53 | |
78 = IMUL.i32 70, 54 | |
79 = IMUL.i32 71, 55 | |
80 = IMUL.i32 72, 56 | |
81 = IMUL.i32 73, 57 | |
82 = IMUL.i32 74, 58 | |
83 = IMUL.i32 75, 59 | |
84 = IMUL.i32 76, 60 | |
85 = IADD.s32 77, 69 | |
86 = IADD.s32 78, 70 | |
87 = IADD.s32 79, 71 | |
88 = IADD.s32 80, 72 | |
89 = IADD.s32 81, 73 | |
90 = IADD.s32 82, 74 | |
91 = IADD.s32 83, 75 | |
92 = IADD.s32 84, 76 | |
93 = IMUL.i32 85, 69 | |
94 = IMUL.i32 86, 70 | |
95 = IMUL.i32 87, 71 | |
96 = IMUL.i32 88, 72 | |
97 = IMUL.i32 89, 73 | |
98 = IMUL.i32 90, 74 | |
99 = IMUL.i32 91, 75 | |
100 = IMUL.i32 92, 76 | |
101 = IADD.s32 93, 85 | |
102 = IADD.s32 94, 86 | |
103 = IADD.s32 95, 87 | |
104 = IADD.s32 96, 88 | |
105 = IADD.s32 97, 89 | |
106 = IADD.s32 98, 90 | |
107 = IADD.s32 99, 91 | |
108 = IADD.s32 100, 92 | |
109 = IMUL.i32 101, 85 | |
110 = IMUL.i32 102, 86 | |
111 = IMUL.i32 103, 87 | |
112 = IMUL.i32 104, 88 | |
113 = IMUL.i32 105, 89 | |
114 = IMUL.i32 106, 90 | |
115 = IMUL.i32 107, 91 | |
116 = IMUL.i32 108, 92 | |
117 = IADD.s32 109, 101 | |
118 = IADD.s32 110, 102 | |
119 = IADD.s32 111, 103 | |
120 = IADD.s32 112, 104 | |
121 = IADD.s32 113, 105 | |
122 = IADD.s32 114, 106 | |
123 = IADD.s32 115, 107 | |
124 = IADD.s32 116, 108 | |
125 = IMUL.i32 117, 101 | |
126 = IMUL.i32 118, 102 | |
127 = IMUL.i32 119, 103 | |
128 = IMUL.i32 120, 104 | |
129 = IMUL.i32 121, 105 | |
130 = IMUL.i32 122, 106 | |
131 = IMUL.i32 123, 107 | |
132 = IMUL.i32 124, 108 | |
133 = IADD.s32 125, 117 | |
134 = IADD.s32 126, 118 | |
135 = IADD.s32 127, 119 | |
136 = IADD.s32 128, 120 | |
137 = IADD.s32 129, 121 | |
138 = IADD.s32 130, 122 | |
139 = IADD.s32 131, 123 | |
140 = IADD.s32 132, 124 | |
141 = IMUL.i32 133, 117 | |
142 = IMUL.i32 134, 118 | |
143 = IMUL.i32 135, 119 | |
144 = IMUL.i32 136, 120 | |
145 = IMUL.i32 137, 121 | |
146 = IMUL.i32 138, 122 | |
147 = IMUL.i32 139, 123 | |
148 = IMUL.i32 140, 124 | |
149 = IADD.s32 141, 133 | |
150 = IADD.s32 142, 134 | |
151 = IADD.s32 143, 135 | |
152 = IADD.s32 144, 136 | |
153 = IADD.s32 145, 137 | |
154 = IADD.s32 146, 138 | |
155 = IADD.s32 147, 139 | |
156 = IADD.s32 148, 140 | |
157 = IMUL.i32 149, 133 | |
158 = IMUL.i32 150, 134 | |
159 = IMUL.i32 151, 135 | |
160 = IMUL.i32 152, 136 | |
161 = IMUL.i32 153, 137 | |
162 = IMUL.i32 154, 138 | |
163 = IMUL.i32 155, 139 | |
164 = IMUL.i32 156, 140 | |
165 = IADD.s32 157, 149 | |
166 = IADD.s32 158, 150 | |
167 = IADD.s32 159, 151 | |
168 = IADD.s32 160, 152 | |
169 = IADD.s32 161, 153 | |
170 = IADD.s32 162, 154 | |
171 = IADD.s32 163, 155 | |
172 = IADD.s32 164, 156 | |
173 = IMUL.i32 165, 149 | |
174 = IMUL.i32 166, 150 | |
175 = IMUL.i32 167, 151 | |
176 = IMUL.i32 168, 152 | |
177 = IMUL.i32 169, 153 | |
178 = IMUL.i32 170, 154 | |
179 = IMUL.i32 171, 155 | |
180 = IMUL.i32 172, 156 | |
181 = IADD.s32 173, 165 | |
182 = IADD.s32 174, 166 | |
183 = IADD.s32 175, 167 | |
184 = IADD.s32 176, 168 | |
185 = IADD.s32 177, 169 | |
186 = IADD.s32 178, 170 | |
187 = IADD.s32 179, 171 | |
188 = IADD.s32 180, 172 | |
189 = IMUL.i32 181, 165 | |
190 = IMUL.i32 182, 166 | |
191 = IMUL.i32 183, 167 | |
192 = IMUL.i32 184, 168 | |
193 = IMUL.i32 185, 169 | |
194 = IMUL.i32 186, 170 | |
195 = IMUL.i32 187, 171 | |
196 = IMUL.i32 188, 172 | |
197 = IADD.s32 189, 181 | |
198 = IADD.s32 190, 182 | |
199 = IADD.s32 191, 183 | |
200 = IADD.s32 192, 184 | |
201 = IADD.s32 193, 185 | |
202 = IADD.s32 194, 186 | |
203 = IADD.s32 195, 187 | |
204 = IADD.s32 196, 188 | |
205 = IMUL.i32 197, 181 | |
206 = IMUL.i32 198, 182 | |
207 = IMUL.i32 199, 183 | |
208 = IMUL.i32 200, 184 | |
209 = IMUL.i32 201, 185 | |
210 = IMUL.i32 202, 186 | |
211 = IMUL.i32 203, 187 | |
212 = IMUL.i32 204, 188 | |
213 = IADD.s32 205, 197 | |
214 = IADD.s32 206, 198 | |
215 = IADD.s32 207, 199 | |
216 = IADD.s32 208, 200 | |
217 = IADD.s32 209, 201 | |
218 = IADD.s32 210, 202 | |
219 = IADD.s32 211, 203 | |
220 = IADD.s32 212, 204 | |
221 = IMUL.i32 213, 197 | |
222 = IMUL.i32 214, 198 | |
223 = IMUL.i32 215, 199 | |
224 = IMUL.i32 216, 200 | |
225 = IMUL.i32 217, 201 | |
226 = IMUL.i32 218, 202 | |
227 = IMUL.i32 219, 203 | |
228 = IMUL.i32 220, 204 | |
229 = IADD.s32 221, 213 | |
230 = IADD.s32 222, 214 | |
231 = IADD.s32 223, 215 | |
232 = IADD.s32 224, 216 | |
233 = IADD.s32 225, 217 | |
234 = IADD.s32 226, 218 | |
235 = IADD.s32 227, 219 | |
236 = IADD.s32 228, 220 | |
237 = IMUL.i32 229, 213 | |
238 = IMUL.i32 230, 214 | |
239 = IMUL.i32 231, 215 | |
240 = IMUL.i32 232, 216 | |
241 = IMUL.i32 233, 217 | |
242 = IMUL.i32 234, 218 | |
243 = IMUL.i32 235, 219 | |
244 = IMUL.i32 236, 220 | |
245 = IADD.s32 237, 229 | |
246 = IADD.s32 238, 230 | |
247 = IADD.s32 239, 231 | |
248 = IADD.s32 240, 232 | |
249 = IADD.s32 241, 233 | |
250 = IADD.s32 242, 234 | |
251 = IADD.s32 243, 235 | |
252 = IADD.s32 244, 236 | |
253 = IMUL.i32 245, 229 | |
254 = IMUL.i32 246, 230 | |
255 = IMUL.i32 247, 231 | |
256 = IMUL.i32 248, 232 | |
257 = IMUL.i32 249, 233 | |
258 = IMUL.i32 250, 234 | |
259 = IMUL.i32 251, 235 | |
260 = IMUL.i32 252, 236 | |
261 = IADD.s32 253, 245 | |
262 = IADD.s32 254, 246 | |
263 = IADD.s32 255, 247 | |
264 = IADD.s32 256, 248 | |
265 = IADD.s32 257, 249 | |
266 = IADD.s32 258, 250 | |
267 = IADD.s32 259, 251 | |
268 = IADD.s32 260, 252 | |
269 = IMUL.i32 261, 245 | |
270 = IMUL.i32 262, 246 | |
271 = IMUL.i32 263, 247 | |
272 = IMUL.i32 264, 248 | |
273 = IMUL.i32 265, 249 | |
274 = IMUL.i32 266, 250 | |
275 = IMUL.i32 267, 251 | |
276 = IMUL.i32 268, 252 | |
277 = IADD.s32 269, 261 | |
278 = IADD.s32 270, 262 | |
279 = IADD.s32 271, 263 | |
280 = IADD.s32 272, 264 | |
281 = IADD.s32 273, 265 | |
282 = IADD.s32 274, 266 | |
283 = IADD.s32 275, 267 | |
284 = IADD.s32 276, 268 | |
285 = IMUL.i32 277, 261 | |
286 = IMUL.i32 278, 262 | |
287 = IMUL.i32 279, 263 | |
288 = IMUL.i32 280, 264 | |
289 = IMUL.i32 281, 265 | |
290 = IMUL.i32 282, 266 | |
291 = IMUL.i32 283, 267 | |
292 = IMUL.i32 284, 268 | |
293 = IADD.s32 285, 277 | |
294 = IADD.s32 286, 278 | |
295 = IADD.s32 287, 279 | |
296 = IADD.s32 288, 280 | |
297 = IADD.s32 289, 281 | |
298 = IADD.s32 290, 282 | |
299 = IADD.s32 291, 283 | |
300 = IADD.s32 292, 284 | |
301 = IADD.s32 43, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
302 = IADD.s32 31, 27 | |
303 = IADD.s32 32, 28 | |
304 = IADD.s32 33, 29 | |
305 = IADD.s32 34, 30 | |
306 = IADD.s32 302, 304 | |
307 = IADD.s32 303, 305 | |
308 = LSHIFT_OR.i32 15, #0x0, #0x2.b0 | |
309 = IADD.s32 306, 307 | |
313 = IADD.s32 u0, 308 | |
315 = ICMP.u32.i1.lt 313, u0 | |
316 = IADD.s32 315, u0[1] | |
STORE.i32 309, 313, 316, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x1 | |
r1 = IADD.s32 u1, r1 | |
r2 = MOV.i32 #0x2 | |
r2 = IADD.s32 u1, r2 | |
r3 = MOV.i32 #0x3 | |
r3 = IADD.s32 u1, r3 | |
r4 = MOV.i32 #0x4 | |
r4 = IADD.s32 u1, r4 | |
r5 = MOV.i32 #0x5 | |
r5 = IADD.s32 u1, r5 | |
r6 = MOV.i32 #0x6 | |
r6 = IADD.s32 u1, r6 | |
r7 = MOV.i32 #0x7 | |
r7 = IADD.s32 u1, r7 | |
r8 = U32_TO_F32 r0 | |
r8 = FMA.f32 r8, #0x2edbe6ff, #0x0.neg | |
r8 = F32_TO_S32.rtz r8 | |
r9 = MOV.i32 r8 | |
r10 = MOV.i32 r8 | |
r11 = MOV.i32 r8 | |
r12 = MOV.i32 r8 | |
r13 = MOV.i32 r8 | |
r14 = MOV.i32 r8 | |
r15 = MOV.i32 r8 | |
r48 = MOV.i32 u1 | |
r49 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r50 = ICMP.s32.m1.ge r49, #0x10 | |
BRANCHZ.i16.eq r50.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r48 = IMUL.i32 r12, r48 | |
r1 = IMUL.i32 r13, r1 | |
r2 = IMUL.i32 r14, r2 | |
r3 = IMUL.i32 r15, r3 | |
r4 = IMUL.i32 r8, r4 | |
r5 = IMUL.i32 r9, r5 | |
r6 = IMUL.i32 r10, r6 | |
r7 = IMUL.i32 r11, r7 | |
r48 = IADD.s32 r48, r12 | |
r1 = IADD.s32 r1, r13 | |
r2 = IADD.s32 r2, r14 | |
r3 = IADD.s32 r3, r15 | |
r4 = IADD.s32 r4, r8 | |
r5 = IADD.s32 r5, r9 | |
r6 = IADD.s32 r6, r10 | |
r7 = IADD.s32 r7, r11 | |
r12 = IMUL.i32 r48, r12 | |
r13 = IMUL.i32 r1, r13 | |
r14 = IMUL.i32 r2, r14 | |
r15 = IMUL.i32 r3, r15 | |
r8 = IMUL.i32 r4, r8 | |
r9 = IMUL.i32 r5, r9 | |
r10 = IMUL.i32 r6, r10 | |
r11 = IMUL.i32 r7, r11 | |
r12 = IADD.s32 r12, r48 | |
r13 = IADD.s32 r13, r1 | |
r14 = IADD.s32 r14, r2 | |
r15 = IADD.s32 r15, r3 | |
r8 = IADD.s32 r8, r4 | |
r9 = IADD.s32 r9, r5 | |
r10 = IADD.s32 r10, r6 | |
r11 = IADD.s32 r11, r7 | |
r49 = IADD.s32 r49, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r1 = IADD.s32 r12, r8 | |
r2 = IADD.s32 r13, r9 | |
r3 = IADD.s32 r14, r10 | |
r4 = IADD.s32 r15, r11 | |
r1 = IADD.s32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = IADD.s32 r1, r2 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.y.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* r1 = MOV.i32 fau.x | |
+ r2 = MOV.i32 fau.y | |
* r3 = MOV.i32 fau.x | |
+ r4 = MOV.i32 fau.y | |
* r5 = MOV.i32 fau.x | |
+ r6 = MOV.i32 fau.y | |
* r7 = MOV.i32 fau.x | |
+ _.h00 = U32_TO_F32 r0 | |
* _.h00 = FMA.f32 t1, fau.y, t.neg | |
+ r8 = F32_TO_S32.rtz t | |
* NOP | |
+ r9 = MOV.i32 t1 | |
200000001 400000003 600000005 800000007 2edbe6ff00000000 | |
id(0) nbb r_uncond | |
* r10 = MOV.i32 r8 | |
+ r11 = MOV.i32 r8 | |
* r12 = MOV.i32 r8 | |
+ r1 = IADD.s32 fau.x, r1 | |
* r13 = MOV.i32 r8 | |
+ r2 = IADD.s32 fau.x, r2 | |
* r14 = MOV.i32 r8 | |
+ r3 = IADD.s32 fau.x, r3 | |
* r15 = MOV.i32 r8 | |
+ r4 = IADD.s32 fau.x, r4 | |
* r48 = MOV.i32 fau.x | |
+ r5 = IADD.s32 fau.x, r5 | |
* r49 = MOV.i32 t | |
+ r6 = IADD.s32 fau.x, r6 | |
* NOP | |
+ r7 = IADD.s32 fau.x, r7 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r49, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000010 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r48 = IMUL.i32 r12, r48 | |
+ NOP | |
* _.h00 = IMUL.i32 r14, r2 | |
+ NOP | |
* NOP | |
+ r2 = IADD.s32 t0, r14 | |
id(0) nbb | |
* r14 = IMUL.i32 r2, r14 | |
+ NOP | |
* _.h00 = IMUL.i32 r15, r3 | |
+ r3 = IADD.s32 t, r15 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* r3 = IMUL.i32 t1, r3 | |
+ NOP | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* r4 = IMUL.i32 t1, r4 | |
+ NOP | |
* NOP | |
+ r14 = IADD.s32 r14, r2 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r14, r2 | |
+ r2 = IADD.s32 t, r14 | |
* r14 = IMUL.i32 t1, r14 | |
+ NOP | |
* _.h00 = IMUL.i32 r13, r1 | |
+ r1 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 t1, r13 | |
+ r13 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r13 | |
+ r13 = IADD.s32 t, t1 | |
* r1 = IMUL.i32 t1, r1 | |
+ NOP | |
* NOP | |
+ r3 = IADD.s32 r3, r15 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r3, r15 | |
+ r15 = IADD.s32 t, r3 | |
* r3 = IMUL.i32 t1, r3 | |
+ NOP | |
* _.h00 = IMUL.i32 r10, r6 | |
+ r6 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 t1, r10 | |
+ r10 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r10 | |
+ r10 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* r10 = IMUL.i32 t1, r10 | |
+ NOP | |
id(0) nbb | |
* _.h00 = IMUL.i32 r11, r7 | |
+ r7 = IADD.s32 t, r11 | |
* _.h00 = IMUL.i32 t1, r11 | |
+ r11 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r7 | |
+ r7 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r11 | |
+ r11 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r7 | |
+ r7 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r11 | |
+ r11 = IADD.s32 t, t1 | |
* r7 = IMUL.i32 t1, r7 | |
+ NOP | |
* NOP | |
+ r48 = IADD.s32 r48, r12 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r48, r12 | |
+ r12 = IADD.s32 t, r48 | |
* _.h00 = IMUL.i32 t1, r48 | |
+ r48 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r48 | |
+ r48 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r48 | |
+ r48 = IADD.s32 t, t1 | |
* r12 = IMUL.i32 t1, r12 | |
+ NOP | |
* NOP | |
+ r4 = IADD.s32 r4, r8 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r4 | |
+ r4 = IADD.s32 t, t1 | |
* r8 = IMUL.i32 t1, r8 | |
+ NOP | |
* NOP | |
+ r1 = IADD.s32 r1, r13 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r1, r13 | |
+ r13 = IADD.s32 t, r1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r13 | |
+ r13 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r13 | |
+ r13 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* r13 = IMUL.i32 t1, r13 | |
+ NOP | |
* NOP | |
+ r3 = IADD.s32 r3, r15 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r3, r15 | |
+ r15 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* r15 = IMUL.i32 t1, r15 | |
+ NOP | |
* NOP | |
+ r10 = IADD.s32 r10, r6 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r10, r6 | |
+ r6 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 t1, r10 | |
+ r10 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r10 | |
+ r10 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r10 | |
+ r10 = IADD.s32 t, t1 | |
* r6 = IMUL.i32 t1, r6 | |
+ NOP | |
* NOP | |
+ r12 = IADD.s32 r12, r48 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r12, r48 | |
+ r48 = IADD.s32 t, r12 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r48 | |
+ r48 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r48 | |
+ r48 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* r48 = IMUL.i32 t1, r48 | |
+ NOP | |
* NOP | |
+ r13 = IADD.s32 r13, r1 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r13, r1 | |
+ r1 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 t1, r13 | |
+ r13 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* r13 = IMUL.i32 t1, r13 | |
+ NOP | |
* _.h00 = IMUL.i32 r9, r5 | |
+ r5 = IADD.s32 t, r9 | |
* _.h00 = IMUL.i32 t1, r9 | |
+ r9 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* NOP | |
+ r14 = IADD.s32 r14, r2 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r5, r9 | |
+ r9 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r14, r2 | |
+ r2 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 r9, r5 | |
+ r5 = IADD.s32 t, r9 | |
* _.h00 = IMUL.i32 r2, r14 | |
+ r14 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r5, r9 | |
+ r9 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r14, r2 | |
+ r2 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 r9, r5 | |
+ r5 = IADD.s32 t, r9 | |
* NOP | |
+ r7 = IADD.s32 r7, r11 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r2, r14 | |
+ r14 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r5, r9 | |
+ r9 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r7, r11 | |
+ r11 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r14, r2 | |
+ r2 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 r9, r5 | |
+ r5 = IADD.s32 t, r9 | |
* _.h00 = IMUL.i32 r11, r7 | |
+ r7 = IADD.s32 t, r11 | |
* _.h00 = IMUL.i32 r2, r14 | |
+ r14 = IADD.s32 t, r2 | |
* NOP | |
+ r8 = IADD.s32 r8, r4 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r5, r9 | |
+ r9 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r7, r11 | |
+ r11 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r14, r2 | |
+ r2 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 r9, r5 | |
+ r5 = IADD.s32 t, r9 | |
* _.h00 = IMUL.i32 r11, r7 | |
+ r7 = IADD.s32 t, r11 | |
* _.h00 = IMUL.i32 r2, r14 | |
+ r14 = IADD.s32 t, r2 | |
* NOP | |
+ r15 = IADD.s32 r15, r3 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r5, r9 | |
+ r9 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r7, r11 | |
+ r11 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r14, r2 | |
+ r2 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 r15, r3 | |
+ r3 = IADD.s32 t, r15 | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 r9, r5 | |
+ r5 = IADD.s32 t, r9 | |
* NOP | |
+ r6 = IADD.s32 r6, r10 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r11, r7 | |
+ r7 = IADD.s32 t, r11 | |
* _.h00 = IMUL.i32 r2, r14 | |
+ r14 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r3, r15 | |
+ r15 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r5, r9 | |
+ r9 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r6, r10 | |
+ r10 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r11 | |
+ r11 = IADD.s32 t, r7 | |
* NOP | |
+ r48 = IADD.s32 r48, r12 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r14, r2 | |
+ r2 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 r15, r3 | |
+ r3 = IADD.s32 t, r15 | |
* _.h00 = IMUL.i32 r8, r4 | |
+ r4 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 r9, r5 | |
+ r5 = IADD.s32 t, r9 | |
* _.h00 = IMUL.i32 r10, r6 | |
+ r6 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 r11, r7 | |
+ r7 = IADD.s32 t, r11 | |
* _.h00 = IMUL.i32 r48, r12 | |
+ r12 = IADD.s32 t, r48 | |
* NOP | |
+ r13 = IADD.s32 r13, r1 | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* _.h00 = IMUL.i32 r2, r14 | |
+ r14 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r3, r15 | |
+ r15 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 r4, r8 | |
+ r8 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r5, r9 | |
+ r9 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r6, r10 | |
+ r10 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r11 | |
+ r11 = IADD.s32 t, r7 | |
* NOP | |
+ r49 = IADD.s32 r49, fau.x | |
* NOP | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) nbb | |
* NOP | |
+ r1 = IADD.s32 r12, r8 | |
* NOP | |
+ r2 = IADD.s32 r13, r9 | |
* NOP | |
+ r3 = IADD.s32 r14, r10 | |
id(0) wait(0 ) nbb r_uncond | |
* NOP | |
+ r4 = IADD.s32 r15, r11 | |
* NOP | |
+ r1 = IADD.s32 r1, r3 | |
* NOP | |
+ _.h00 = IADD.s32 r2, r4 | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ r1 = IADD.s32 r1, t1 | |
* NOP | |
+ r0 = IADD.s32 fau.x, t0 | |
* NOP | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*MOV.i32 r1:t0, 0x00000001 /* 0.000000 */ | |
+MOV.i32 r2:t1, 0x00000002 /* 0.000000 */ | |
*MOV.i32 r3:t0, 0x00000003 /* 0.000000 */ | |
+MOV.i32 r4:t1, 0x00000004 /* 0.000000 */ | |
*MOV.i32 r5:t0, 0x00000005 /* 0.000000 */ | |
+MOV.i32 r6:t1, 0x00000006 /* 0.000000 */ | |
*MOV.i32 r7:t0, 0x00000007 /* 0.000000 */ | |
+U32_TO_F32 t1, r0 | |
*FMA.f32 t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+F32_TO_S32.rtz r8:t1, t | |
*NOP t0 | |
+MOV.i32 r9:t1, t1 | |
} | |
clause_8: | |
ds(0) nbb r_uncond ncph | |
{ | |
*MOV.i32 r10:t0, r8 | |
+MOV.i32 r11:t1, r8 | |
*MOV.i32 r12:t0, r8 | |
+IADD.s32 r1:t1, u1.w0, r1 | |
*MOV.i32 r13:t0, r8 | |
+IADD.s32 r2:t1, u1.w0, r2 | |
*MOV.i32 r14:t0, r8 | |
+IADD.s32 r3:t1, u1.w0, r3 | |
*MOV.i32 r15:t0, r8 | |
+IADD.s32 r4:t1, u1.w0, r4 | |
*MOV.i32 r48:t0, u1.w0 | |
+IADD.s32 r5:t1, u1.w0, r5 | |
*MOV.i32 r49:t0, #0 | |
+IADD.s32 r6:t1, u1.w0, r6 | |
*NOP t0 | |
+IADD.s32 r7:t1, u1.w0, r7 | |
} | |
clause_14: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r49, 0x00000010 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_19 | |
} | |
clause_17: | |
ds(0) nbb | |
{ | |
*NOP t0 | |
+JUMP t1, clause_131 | |
} | |
clause_19: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r48:t0, r12, r48 | |
+NOP t1 | |
*IMUL.i32 t0, r14, r2 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r2:t1, t0, r14 | |
} | |
clause_22: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r14:t0, r2, r14 | |
+NOP t1 | |
*IMUL.i32 t0, r15, r3 | |
+IADD.s32 r3:t1, t, r15 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 r3:t0, t1, r3 | |
+NOP t1 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 r4:t0, t1, r4 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r14:t1, r14, r2 | |
} | |
clause_28: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r14, r2 | |
+IADD.s32 r2:t1, t, r14 | |
*IMUL.i32 r14:t0, t1, r14 | |
+NOP t1 | |
*IMUL.i32 t0, r13, r1 | |
+IADD.s32 r1:t1, t, r13 | |
*IMUL.i32 t0, t1, r13 | |
+IADD.s32 r13:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r13 | |
+IADD.s32 r13:t1, t, t1 | |
*IMUL.i32 r1:t0, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r3:t1, r3, r15 | |
} | |
clause_34: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r3, r15 | |
+IADD.s32 r15:t1, t, r3 | |
*IMUL.i32 r3:t0, t1, r3 | |
+NOP t1 | |
*IMUL.i32 t0, r10, r6 | |
+IADD.s32 r6:t1, t, r10 | |
*IMUL.i32 t0, t1, r10 | |
+IADD.s32 r10:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 t0, t1, r10 | |
+IADD.s32 r10:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 r10:t0, t1, r10 | |
+NOP t1 | |
} | |
clause_40: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r11, r7 | |
+IADD.s32 r7:t1, t, r11 | |
*IMUL.i32 t0, t1, r11 | |
+IADD.s32 r11:t1, t, t1 | |
*IMUL.i32 t0, t1, r7 | |
+IADD.s32 r7:t1, t, t1 | |
*IMUL.i32 t0, t1, r11 | |
+IADD.s32 r11:t1, t, t1 | |
*IMUL.i32 t0, t1, r7 | |
+IADD.s32 r7:t1, t, t1 | |
*IMUL.i32 t0, t1, r11 | |
+IADD.s32 r11:t1, t, t1 | |
*IMUL.i32 r7:t0, t1, r7 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r48:t1, r48, r12 | |
} | |
clause_46: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r48, r12 | |
+IADD.s32 r12:t1, t, r48 | |
*IMUL.i32 t0, t1, r48 | |
+IADD.s32 r48:t1, t, t1 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 t0, t1, r48 | |
+IADD.s32 r48:t1, t, t1 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 t0, t1, r48 | |
+IADD.s32 r48:t1, t, t1 | |
*IMUL.i32 r12:t0, t1, r12 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r4:t1, r4, r8 | |
} | |
clause_52: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r4 | |
+IADD.s32 r4:t1, t, t1 | |
*IMUL.i32 r8:t0, t1, r8 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r13 | |
} | |
clause_58: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r1, r13 | |
+IADD.s32 r13:t1, t, r1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r13 | |
+IADD.s32 r13:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r13 | |
+IADD.s32 r13:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 r13:t0, t1, r13 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r3:t1, r3, r15 | |
} | |
clause_64: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r3, r15 | |
+IADD.s32 r15:t1, t, r3 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 r15:t0, t1, r15 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r10:t1, r10, r6 | |
} | |
clause_70: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r10, r6 | |
+IADD.s32 r6:t1, t, r10 | |
*IMUL.i32 t0, t1, r10 | |
+IADD.s32 r10:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 t0, t1, r10 | |
+IADD.s32 r10:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 t0, t1, r10 | |
+IADD.s32 r10:t1, t, t1 | |
*IMUL.i32 r6:t0, t1, r6 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r12:t1, r12, r48 | |
} | |
clause_76: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r12, r48 | |
+IADD.s32 r48:t1, t, r12 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 t0, t1, r48 | |
+IADD.s32 r48:t1, t, t1 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 t0, t1, r48 | |
+IADD.s32 r48:t1, t, t1 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 r48:t0, t1, r48 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r13:t1, r13, r1 | |
} | |
clause_82: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r13, r1 | |
+IADD.s32 r1:t1, t, r13 | |
*IMUL.i32 t0, t1, r13 | |
+IADD.s32 r13:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 r13:t0, t1, r13 | |
+NOP t1 | |
*IMUL.i32 t0, r9, r5 | |
+IADD.s32 r5:t1, t, r9 | |
*IMUL.i32 t0, t1, r9 | |
+IADD.s32 r9:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*NOP t0 | |
+IADD.s32 r14:t1, r14, r2 | |
} | |
clause_88: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r5, r9 | |
+IADD.s32 r9:t1, t, r5 | |
*IMUL.i32 t0, r14, r2 | |
+IADD.s32 r2:t1, t, r14 | |
*IMUL.i32 t0, r9, r5 | |
+IADD.s32 r5:t1, t, r9 | |
*IMUL.i32 t0, r2, r14 | |
+IADD.s32 r14:t1, t, r2 | |
*IMUL.i32 t0, r5, r9 | |
+IADD.s32 r9:t1, t, r5 | |
*IMUL.i32 t0, r14, r2 | |
+IADD.s32 r2:t1, t, r14 | |
*IMUL.i32 t0, r9, r5 | |
+IADD.s32 r5:t1, t, r9 | |
*NOP t0 | |
+IADD.s32 r7:t1, r7, r11 | |
} | |
clause_94: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r2, r14 | |
+IADD.s32 r14:t1, t, r2 | |
*IMUL.i32 t0, r5, r9 | |
+IADD.s32 r9:t1, t, r5 | |
*IMUL.i32 t0, r7, r11 | |
+IADD.s32 r11:t1, t, r7 | |
*IMUL.i32 t0, r14, r2 | |
+IADD.s32 r2:t1, t, r14 | |
*IMUL.i32 t0, r9, r5 | |
+IADD.s32 r5:t1, t, r9 | |
*IMUL.i32 t0, r11, r7 | |
+IADD.s32 r7:t1, t, r11 | |
*IMUL.i32 t0, r2, r14 | |
+IADD.s32 r14:t1, t, r2 | |
*NOP t0 | |
+IADD.s32 r8:t1, r8, r4 | |
} | |
clause_100: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r5, r9 | |
+IADD.s32 r9:t1, t, r5 | |
*IMUL.i32 t0, r7, r11 | |
+IADD.s32 r11:t1, t, r7 | |
*IMUL.i32 t0, r14, r2 | |
+IADD.s32 r2:t1, t, r14 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, r9, r5 | |
+IADD.s32 r5:t1, t, r9 | |
*IMUL.i32 t0, r11, r7 | |
+IADD.s32 r7:t1, t, r11 | |
*IMUL.i32 t0, r2, r14 | |
+IADD.s32 r14:t1, t, r2 | |
*NOP t0 | |
+IADD.s32 r15:t1, r15, r3 | |
} | |
clause_106: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 t0, r5, r9 | |
+IADD.s32 r9:t1, t, r5 | |
*IMUL.i32 t0, r7, r11 | |
+IADD.s32 r11:t1, t, r7 | |
*IMUL.i32 t0, r14, r2 | |
+IADD.s32 r2:t1, t, r14 | |
*IMUL.i32 t0, r15, r3 | |
+IADD.s32 r3:t1, t, r15 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, r9, r5 | |
+IADD.s32 r5:t1, t, r9 | |
*NOP t0 | |
+IADD.s32 r6:t1, r6, r10 | |
} | |
clause_112: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r11, r7 | |
+IADD.s32 r7:t1, t, r11 | |
*IMUL.i32 t0, r2, r14 | |
+IADD.s32 r14:t1, t, r2 | |
*IMUL.i32 t0, r3, r15 | |
+IADD.s32 r15:t1, t, r3 | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 t0, r5, r9 | |
+IADD.s32 r9:t1, t, r5 | |
*IMUL.i32 t0, r6, r10 | |
+IADD.s32 r10:t1, t, r6 | |
*IMUL.i32 t0, r7, r11 | |
+IADD.s32 r11:t1, t, r7 | |
*NOP t0 | |
+IADD.s32 r48:t1, r48, r12 | |
} | |
clause_118: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r14, r2 | |
+IADD.s32 r2:t1, t, r14 | |
*IMUL.i32 t0, r15, r3 | |
+IADD.s32 r3:t1, t, r15 | |
*IMUL.i32 t0, r8, r4 | |
+IADD.s32 r4:t1, t, r8 | |
*IMUL.i32 t0, r9, r5 | |
+IADD.s32 r5:t1, t, r9 | |
*IMUL.i32 t0, r10, r6 | |
+IADD.s32 r6:t1, t, r10 | |
*IMUL.i32 t0, r11, r7 | |
+IADD.s32 r7:t1, t, r11 | |
*IMUL.i32 t0, r48, r12 | |
+IADD.s32 r12:t1, t, r48 | |
*NOP t0 | |
+IADD.s32 r13:t1, r13, r1 | |
} | |
clause_124: | |
ds(0) nbb r_uncond | |
{ | |
*IMUL.i32 t0, r2, r14 | |
+IADD.s32 r14:t1, t, r2 | |
*IMUL.i32 t0, r3, r15 | |
+IADD.s32 r15:t1, t, r3 | |
*IMUL.i32 t0, r4, r8 | |
+IADD.s32 r8:t1, t, r4 | |
*IMUL.i32 t0, r5, r9 | |
+IADD.s32 r9:t1, t, r5 | |
*IMUL.i32 t0, r6, r10 | |
+IADD.s32 r10:t1, t, r6 | |
*IMUL.i32 t0, r7, r11 | |
+IADD.s32 r11:t1, t, r7 | |
*NOP t0 | |
+IADD.s32 r49:t1, r49, 0x00000001 /* 0.000000 */ | |
*NOP t0 | |
+JUMP t1, clause_14 | |
} | |
clause_131: | |
ds(0) nbb ncph next_store dwb(0) | |
{ | |
*NOP t0 | |
+IADD.s32 r1:t1, r12, r8 | |
*NOP t0 | |
+IADD.s32 r2:t1, r13, r9 | |
*NOP t0 | |
+IADD.s32 r3:t1, r14, r10 | |
} | |
clause_134: | |
ds(0) eos store | |
{ | |
*NOP t0 | |
+IADD.s32 r4:t1, r15, r11 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r3 | |
*NOP t0 | |
+IADD.s32 t1, r2, r4 | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+IADD.s32 r1:t1, r1, t1 | |
*NOP t0 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*NOP t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
e20eea22 compute_sp_v8_int 21.294 GFLOPs 12.606ms | |
compute shader ---------- | |
#define KERNEL compute_sp_v16 | |
#define LOCAL_SIZE_X 256 | |
#define DATATYPE int | |
#define vec2 ivec2 | |
#define vec4 ivec4 | |
#line 64 | |
// Avoiding auto-vectorize by using vector-width locked dependent code | |
layout(local_size_x = LOCAL_SIZE_X) in; | |
#undef MAD_4 | |
#undef MAD_16 | |
#undef MAD_64 | |
#define mad(a,b,c) (a*b+c) | |
#define MAD_4(x, y) x = mad(y, x, y); y = mad(x, y, x); x = mad(y, x, y); y = mad(x, y, x); | |
#define MAD_16(x, y) MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); MAD_4(x, y); | |
#define MAD_64(x, y) MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); MAD_16(x, y); | |
struct vec8 { | |
vec4 d0, d1; | |
}; | |
#define VEC8(x0,x1,x2,x3,x4,x5,x6,x7) vec8(vec4(x0,x1,x2,x3), vec4(x4,x5,x6,x7)) | |
#define VEC8_S(x) vec8(vec4(x,x,x,x), vec4(x,x,x,x)) | |
#define VEC8_ADD(a, b) (vec8(a.d0 + b.d0, a.d1 + b.d1)) | |
#define VEC8_MUL(a, b) (vec8(a.d0 * b.d0, a.d1 * b.d1)) | |
struct vec16 { | |
vec8 d0,d1; | |
}; | |
#define VEC16(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15) vec16(VEC8(x0,x1,x2,x3,x4,x5,x6,x7), VEC8(x8,x9,x10,x11,x12,x13,x14,x15)) | |
#define VEC16_S(x) vec16(VEC8_S(x), VEC8_S(x)); | |
#define VEC16_ADD(a, b) (vec16(VEC8_ADD(a.d0, b.d0), VEC8_ADD(a.d1, b.d1))) | |
#define VEC16_MUL(a, b) (vec16(VEC8_MUL(a.d0, b.d0), VEC8_MUL(a.d1, b.d1))) | |
#define mad8(a,b,c) (VEC8_ADD(VEC8_MUL(a,b),c)) | |
#define mad16(a,b,c) (VEC16_ADD(VEC16_MUL(a,b),c)) | |
layout(location = 1) uniform DATATYPE _A; | |
#define SCALE 1e-10 | |
layout(std430, binding = 0) restrict writeonly buffer outbuffer { | |
DATATYPE ptr[]; | |
}; | |
#line 184 | |
void compute_sp_v16() | |
{ | |
uint id = gl_GlobalInvocationID[0] + gl_GlobalInvocationID[1] * 256u + gl_GlobalInvocationID[2] * 256u * 256u; | |
vec16 x = VEC16(_A, (_A+DATATYPE(1)), (_A+DATATYPE(2)), (_A+DATATYPE(3)), (_A+DATATYPE(4)), (_A+DATATYPE(5)), (_A+DATATYPE(6)), (_A+DATATYPE(7)), | |
(_A+DATATYPE(8)), (_A+DATATYPE(9)), (_A+DATATYPE(10)), (_A+DATATYPE(11)), (_A+DATATYPE(12)), (_A+DATATYPE(13)), (_A+DATATYPE(14)), (_A+DATATYPE(15))); | |
vec16 y = VEC16_S(DATATYPE((float(id) * SCALE))); | |
#undef mad | |
#define mad mad16 | |
for(int i=0; i<8; i++) | |
{ | |
MAD_16(x, y); | |
} | |
vec8 u = VEC8_ADD(y.d0, y.d1); | |
vec4 s = u.d0 + u.d1; | |
vec2 t = s.xy + s.zw; | |
ptr[id] = t.x + t.y; | |
} | |
void main() {compute_sp_v16();} | |
---------- | |
shader: MESA_SHADER_COMPUTE | |
source_sha1: {0x330fb7af, 0x4ad9a35d, 0x58354a59, 0xe4dc307b, 0x9dc86d72} | |
name: GLSL20 | |
workgroup-size: 256, 1, 1 | |
shared-size: 0 | |
inputs: 0 | |
outputs: 0 | |
uniforms: 1 | |
ubos: 1 | |
shared: 0 | |
ray queries: 0 | |
decl_var ssbo INTERP_MODE_NONE restrict writeonly highp int[] ptr (0, 0, 0) | |
decl_var uniform INTERP_MODE_NONE highp int _A (1, 0, 0) | |
decl_var ubo INTERP_MODE_NONE vec4[1] uniform_0 (0, 0, 0) | |
decl_function main (0 params) | |
impl main { | |
block block_0: | |
/* preds: */ | |
vec3 32 ssa_17 = intrinsic load_global_invocation_id () () | |
vec1 32 ssa_8 = load_const (0x00000008 = 0.000000) | |
vec1 32 ssa_18 = ishl ssa_17.y, ssa_8 | |
vec1 32 ssa_19 = iadd ssa_17.x, ssa_18 | |
vec1 32 ssa_1 = load_const (0x00000001 = 0.000000) | |
vec1 32 ssa_614 = insert_u16 ssa_17.z, ssa_1 | |
vec1 32 ssa_22 = iadd ssa_19, ssa_614 | |
vec1 32 ssa_0 = load_const (0x00000000 = 0.000000) | |
vec1 32 ssa_23 = intrinsic load_ubo (ssa_0, ssa_0) (access=0, align_mul=1073741824, align_offset=0, range_base=0, range=4) | |
vec1 32 ssa_24 = iadd ssa_23, ssa_1 | |
vec1 32 ssa_2 = load_const (0x00000002 = 0.000000) | |
vec1 32 ssa_25 = iadd ssa_23, ssa_2 | |
vec1 32 ssa_3 = load_const (0x00000003 = 0.000000) | |
vec1 32 ssa_26 = iadd ssa_23, ssa_3 | |
vec1 32 ssa_4 = load_const (0x00000004 = 0.000000) | |
vec1 32 ssa_27 = iadd ssa_23, ssa_4 | |
vec1 32 ssa_5 = load_const (0x00000005 = 0.000000) | |
vec1 32 ssa_28 = iadd ssa_23, ssa_5 | |
vec1 32 ssa_6 = load_const (0x00000006 = 0.000000) | |
vec1 32 ssa_29 = iadd ssa_23, ssa_6 | |
vec1 32 ssa_7 = load_const (0x00000007 = 0.000000) | |
vec1 32 ssa_30 = iadd ssa_23, ssa_7 | |
vec1 32 ssa_31 = iadd ssa_23, ssa_8 | |
vec1 32 ssa_9 = load_const (0x00000009 = 0.000000) | |
vec1 32 ssa_32 = iadd ssa_23, ssa_9 | |
vec1 32 ssa_10 = load_const (0x0000000a = 0.000000) | |
vec1 32 ssa_33 = iadd ssa_23, ssa_10 | |
vec1 32 ssa_11 = load_const (0x0000000b = 0.000000) | |
vec1 32 ssa_34 = iadd ssa_23, ssa_11 | |
vec1 32 ssa_12 = load_const (0x0000000c = 0.000000) | |
vec1 32 ssa_35 = iadd ssa_23, ssa_12 | |
vec1 32 ssa_13 = load_const (0x0000000d = 0.000000) | |
vec1 32 ssa_36 = iadd ssa_23, ssa_13 | |
vec1 32 ssa_14 = load_const (0x0000000e = 0.000000) | |
vec1 32 ssa_37 = iadd ssa_23, ssa_14 | |
vec1 32 ssa_15 = load_const (0x0000000f = 0.000000) | |
vec1 32 ssa_38 = iadd ssa_23, ssa_15 | |
vec1 32 ssa_39 = u2f32 ssa_22 | |
vec1 32 ssa_16 = load_const (0x2edbe6ff = 0.000000) | |
vec1 32 ssa_40 = fmul ssa_39, ssa_16 | |
vec1 32 ssa_41 = f2i32 ssa_40 | |
/* succs: block_1 */ | |
loop { | |
block block_1: | |
/* preds: block_0 block_4 */ | |
vec1 32 ssa_42 = phi block_0: ssa_41, block_4: ssa_584 | |
vec1 32 ssa_43 = phi block_0: ssa_41, block_4: ssa_585 | |
vec1 32 ssa_44 = phi block_0: ssa_41, block_4: ssa_586 | |
vec1 32 ssa_45 = phi block_0: ssa_41, block_4: ssa_587 | |
vec1 32 ssa_46 = phi block_0: ssa_41, block_4: ssa_580 | |
vec1 32 ssa_47 = phi block_0: ssa_41, block_4: ssa_581 | |
vec1 32 ssa_48 = phi block_0: ssa_41, block_4: ssa_582 | |
vec1 32 ssa_49 = phi block_0: ssa_41, block_4: ssa_583 | |
vec1 32 ssa_50 = phi block_0: ssa_41, block_4: ssa_568 | |
vec1 32 ssa_51 = phi block_0: ssa_41, block_4: ssa_569 | |
vec1 32 ssa_52 = phi block_0: ssa_41, block_4: ssa_570 | |
vec1 32 ssa_53 = phi block_0: ssa_41, block_4: ssa_571 | |
vec1 32 ssa_54 = phi block_0: ssa_41, block_4: ssa_564 | |
vec1 32 ssa_55 = phi block_0: ssa_41, block_4: ssa_565 | |
vec1 32 ssa_56 = phi block_0: ssa_41, block_4: ssa_566 | |
vec1 32 ssa_57 = phi block_0: ssa_41, block_4: ssa_567 | |
vec1 32 ssa_58 = phi block_0: ssa_35, block_4: ssa_552 | |
vec1 32 ssa_59 = phi block_0: ssa_36, block_4: ssa_553 | |
vec1 32 ssa_60 = phi block_0: ssa_37, block_4: ssa_554 | |
vec1 32 ssa_61 = phi block_0: ssa_38, block_4: ssa_555 | |
vec1 32 ssa_62 = phi block_0: ssa_31, block_4: ssa_548 | |
vec1 32 ssa_63 = phi block_0: ssa_32, block_4: ssa_549 | |
vec1 32 ssa_64 = phi block_0: ssa_33, block_4: ssa_550 | |
vec1 32 ssa_65 = phi block_0: ssa_34, block_4: ssa_551 | |
vec1 32 ssa_66 = phi block_0: ssa_27, block_4: ssa_536 | |
vec1 32 ssa_67 = phi block_0: ssa_28, block_4: ssa_537 | |
vec1 32 ssa_68 = phi block_0: ssa_29, block_4: ssa_538 | |
vec1 32 ssa_69 = phi block_0: ssa_30, block_4: ssa_539 | |
vec1 32 ssa_70 = phi block_0: ssa_23, block_4: ssa_532 | |
vec1 32 ssa_71 = phi block_0: ssa_24, block_4: ssa_533 | |
vec1 32 ssa_72 = phi block_0: ssa_25, block_4: ssa_534 | |
vec1 32 ssa_73 = phi block_0: ssa_26, block_4: ssa_535 | |
vec1 32 ssa_74 = phi block_0: ssa_0, block_4: ssa_588 | |
vec1 32 ssa_75 = ige32 ssa_74, ssa_8 | |
/* succs: block_2 block_3 */ | |
if ssa_75 { | |
block block_2: | |
/* preds: block_1 */ | |
break | |
/* succs: block_5 */ | |
} else { | |
block block_3: | |
/* preds: block_1 */ | |
/* succs: block_4 */ | |
} | |
block block_4: | |
/* preds: block_3 */ | |
vec1 32 ssa_76 = imul ssa_54, ssa_70 | |
vec1 32 ssa_77 = imul ssa_55, ssa_71 | |
vec1 32 ssa_78 = imul ssa_56, ssa_72 | |
vec1 32 ssa_79 = imul ssa_57, ssa_73 | |
vec1 32 ssa_80 = imul ssa_50, ssa_66 | |
vec1 32 ssa_81 = imul ssa_51, ssa_67 | |
vec1 32 ssa_82 = imul ssa_52, ssa_68 | |
vec1 32 ssa_83 = imul ssa_53, ssa_69 | |
vec1 32 ssa_84 = iadd ssa_76, ssa_54 | |
vec1 32 ssa_85 = iadd ssa_77, ssa_55 | |
vec1 32 ssa_86 = iadd ssa_78, ssa_56 | |
vec1 32 ssa_87 = iadd ssa_79, ssa_57 | |
vec1 32 ssa_88 = iadd ssa_80, ssa_50 | |
vec1 32 ssa_89 = iadd ssa_81, ssa_51 | |
vec1 32 ssa_90 = iadd ssa_82, ssa_52 | |
vec1 32 ssa_91 = iadd ssa_83, ssa_53 | |
vec1 32 ssa_92 = imul ssa_46, ssa_62 | |
vec1 32 ssa_93 = imul ssa_47, ssa_63 | |
vec1 32 ssa_94 = imul ssa_48, ssa_64 | |
vec1 32 ssa_95 = imul ssa_49, ssa_65 | |
vec1 32 ssa_96 = imul ssa_42, ssa_58 | |
vec1 32 ssa_97 = imul ssa_43, ssa_59 | |
vec1 32 ssa_98 = imul ssa_44, ssa_60 | |
vec1 32 ssa_99 = imul ssa_45, ssa_61 | |
vec1 32 ssa_100 = iadd ssa_92, ssa_46 | |
vec1 32 ssa_101 = iadd ssa_93, ssa_47 | |
vec1 32 ssa_102 = iadd ssa_94, ssa_48 | |
vec1 32 ssa_103 = iadd ssa_95, ssa_49 | |
vec1 32 ssa_104 = iadd ssa_96, ssa_42 | |
vec1 32 ssa_105 = iadd ssa_97, ssa_43 | |
vec1 32 ssa_106 = iadd ssa_98, ssa_44 | |
vec1 32 ssa_107 = iadd ssa_99, ssa_45 | |
vec1 32 ssa_108 = imul ssa_84, ssa_54 | |
vec1 32 ssa_109 = imul ssa_85, ssa_55 | |
vec1 32 ssa_110 = imul ssa_86, ssa_56 | |
vec1 32 ssa_111 = imul ssa_87, ssa_57 | |
vec1 32 ssa_112 = imul ssa_88, ssa_50 | |
vec1 32 ssa_113 = imul ssa_89, ssa_51 | |
vec1 32 ssa_114 = imul ssa_90, ssa_52 | |
vec1 32 ssa_115 = imul ssa_91, ssa_53 | |
vec1 32 ssa_116 = iadd ssa_108, ssa_84 | |
vec1 32 ssa_117 = iadd ssa_109, ssa_85 | |
vec1 32 ssa_118 = iadd ssa_110, ssa_86 | |
vec1 32 ssa_119 = iadd ssa_111, ssa_87 | |
vec1 32 ssa_120 = iadd ssa_112, ssa_88 | |
vec1 32 ssa_121 = iadd ssa_113, ssa_89 | |
vec1 32 ssa_122 = iadd ssa_114, ssa_90 | |
vec1 32 ssa_123 = iadd ssa_115, ssa_91 | |
vec1 32 ssa_124 = imul ssa_100, ssa_46 | |
vec1 32 ssa_125 = imul ssa_101, ssa_47 | |
vec1 32 ssa_126 = imul ssa_102, ssa_48 | |
vec1 32 ssa_127 = imul ssa_103, ssa_49 | |
vec1 32 ssa_128 = imul ssa_104, ssa_42 | |
vec1 32 ssa_129 = imul ssa_105, ssa_43 | |
vec1 32 ssa_130 = imul ssa_106, ssa_44 | |
vec1 32 ssa_131 = imul ssa_107, ssa_45 | |
vec1 32 ssa_132 = iadd ssa_124, ssa_100 | |
vec1 32 ssa_133 = iadd ssa_125, ssa_101 | |
vec1 32 ssa_134 = iadd ssa_126, ssa_102 | |
vec1 32 ssa_135 = iadd ssa_127, ssa_103 | |
vec1 32 ssa_136 = iadd ssa_128, ssa_104 | |
vec1 32 ssa_137 = iadd ssa_129, ssa_105 | |
vec1 32 ssa_138 = iadd ssa_130, ssa_106 | |
vec1 32 ssa_139 = iadd ssa_131, ssa_107 | |
vec1 32 ssa_140 = imul ssa_116, ssa_84 | |
vec1 32 ssa_141 = imul ssa_117, ssa_85 | |
vec1 32 ssa_142 = imul ssa_118, ssa_86 | |
vec1 32 ssa_143 = imul ssa_119, ssa_87 | |
vec1 32 ssa_144 = imul ssa_120, ssa_88 | |
vec1 32 ssa_145 = imul ssa_121, ssa_89 | |
vec1 32 ssa_146 = imul ssa_122, ssa_90 | |
vec1 32 ssa_147 = imul ssa_123, ssa_91 | |
vec1 32 ssa_148 = iadd ssa_140, ssa_116 | |
vec1 32 ssa_149 = iadd ssa_141, ssa_117 | |
vec1 32 ssa_150 = iadd ssa_142, ssa_118 | |
vec1 32 ssa_151 = iadd ssa_143, ssa_119 | |
vec1 32 ssa_152 = iadd ssa_144, ssa_120 | |
vec1 32 ssa_153 = iadd ssa_145, ssa_121 | |
vec1 32 ssa_154 = iadd ssa_146, ssa_122 | |
vec1 32 ssa_155 = iadd ssa_147, ssa_123 | |
vec1 32 ssa_156 = imul ssa_132, ssa_100 | |
vec1 32 ssa_157 = imul ssa_133, ssa_101 | |
vec1 32 ssa_158 = imul ssa_134, ssa_102 | |
vec1 32 ssa_159 = imul ssa_135, ssa_103 | |
vec1 32 ssa_160 = imul ssa_136, ssa_104 | |
vec1 32 ssa_161 = imul ssa_137, ssa_105 | |
vec1 32 ssa_162 = imul ssa_138, ssa_106 | |
vec1 32 ssa_163 = imul ssa_139, ssa_107 | |
vec1 32 ssa_164 = iadd ssa_156, ssa_132 | |
vec1 32 ssa_165 = iadd ssa_157, ssa_133 | |
vec1 32 ssa_166 = iadd ssa_158, ssa_134 | |
vec1 32 ssa_167 = iadd ssa_159, ssa_135 | |
vec1 32 ssa_168 = iadd ssa_160, ssa_136 | |
vec1 32 ssa_169 = iadd ssa_161, ssa_137 | |
vec1 32 ssa_170 = iadd ssa_162, ssa_138 | |
vec1 32 ssa_171 = iadd ssa_163, ssa_139 | |
vec1 32 ssa_172 = imul ssa_148, ssa_116 | |
vec1 32 ssa_173 = imul ssa_149, ssa_117 | |
vec1 32 ssa_174 = imul ssa_150, ssa_118 | |
vec1 32 ssa_175 = imul ssa_151, ssa_119 | |
vec1 32 ssa_176 = imul ssa_152, ssa_120 | |
vec1 32 ssa_177 = imul ssa_153, ssa_121 | |
vec1 32 ssa_178 = imul ssa_154, ssa_122 | |
vec1 32 ssa_179 = imul ssa_155, ssa_123 | |
vec1 32 ssa_180 = iadd ssa_172, ssa_148 | |
vec1 32 ssa_181 = iadd ssa_173, ssa_149 | |
vec1 32 ssa_182 = iadd ssa_174, ssa_150 | |
vec1 32 ssa_183 = iadd ssa_175, ssa_151 | |
vec1 32 ssa_184 = iadd ssa_176, ssa_152 | |
vec1 32 ssa_185 = iadd ssa_177, ssa_153 | |
vec1 32 ssa_186 = iadd ssa_178, ssa_154 | |
vec1 32 ssa_187 = iadd ssa_179, ssa_155 | |
vec1 32 ssa_188 = imul ssa_164, ssa_132 | |
vec1 32 ssa_189 = imul ssa_165, ssa_133 | |
vec1 32 ssa_190 = imul ssa_166, ssa_134 | |
vec1 32 ssa_191 = imul ssa_167, ssa_135 | |
vec1 32 ssa_192 = imul ssa_168, ssa_136 | |
vec1 32 ssa_193 = imul ssa_169, ssa_137 | |
vec1 32 ssa_194 = imul ssa_170, ssa_138 | |
vec1 32 ssa_195 = imul ssa_171, ssa_139 | |
vec1 32 ssa_196 = iadd ssa_188, ssa_164 | |
vec1 32 ssa_197 = iadd ssa_189, ssa_165 | |
vec1 32 ssa_198 = iadd ssa_190, ssa_166 | |
vec1 32 ssa_199 = iadd ssa_191, ssa_167 | |
vec1 32 ssa_200 = iadd ssa_192, ssa_168 | |
vec1 32 ssa_201 = iadd ssa_193, ssa_169 | |
vec1 32 ssa_202 = iadd ssa_194, ssa_170 | |
vec1 32 ssa_203 = iadd ssa_195, ssa_171 | |
vec1 32 ssa_204 = imul ssa_180, ssa_148 | |
vec1 32 ssa_205 = imul ssa_181, ssa_149 | |
vec1 32 ssa_206 = imul ssa_182, ssa_150 | |
vec1 32 ssa_207 = imul ssa_183, ssa_151 | |
vec1 32 ssa_208 = imul ssa_184, ssa_152 | |
vec1 32 ssa_209 = imul ssa_185, ssa_153 | |
vec1 32 ssa_210 = imul ssa_186, ssa_154 | |
vec1 32 ssa_211 = imul ssa_187, ssa_155 | |
vec1 32 ssa_212 = iadd ssa_204, ssa_180 | |
vec1 32 ssa_213 = iadd ssa_205, ssa_181 | |
vec1 32 ssa_214 = iadd ssa_206, ssa_182 | |
vec1 32 ssa_215 = iadd ssa_207, ssa_183 | |
vec1 32 ssa_216 = iadd ssa_208, ssa_184 | |
vec1 32 ssa_217 = iadd ssa_209, ssa_185 | |
vec1 32 ssa_218 = iadd ssa_210, ssa_186 | |
vec1 32 ssa_219 = iadd ssa_211, ssa_187 | |
vec1 32 ssa_220 = imul ssa_196, ssa_164 | |
vec1 32 ssa_221 = imul ssa_197, ssa_165 | |
vec1 32 ssa_222 = imul ssa_198, ssa_166 | |
vec1 32 ssa_223 = imul ssa_199, ssa_167 | |
vec1 32 ssa_224 = imul ssa_200, ssa_168 | |
vec1 32 ssa_225 = imul ssa_201, ssa_169 | |
vec1 32 ssa_226 = imul ssa_202, ssa_170 | |
vec1 32 ssa_227 = imul ssa_203, ssa_171 | |
vec1 32 ssa_228 = iadd ssa_220, ssa_196 | |
vec1 32 ssa_229 = iadd ssa_221, ssa_197 | |
vec1 32 ssa_230 = iadd ssa_222, ssa_198 | |
vec1 32 ssa_231 = iadd ssa_223, ssa_199 | |
vec1 32 ssa_232 = iadd ssa_224, ssa_200 | |
vec1 32 ssa_233 = iadd ssa_225, ssa_201 | |
vec1 32 ssa_234 = iadd ssa_226, ssa_202 | |
vec1 32 ssa_235 = iadd ssa_227, ssa_203 | |
vec1 32 ssa_236 = imul ssa_212, ssa_180 | |
vec1 32 ssa_237 = imul ssa_213, ssa_181 | |
vec1 32 ssa_238 = imul ssa_214, ssa_182 | |
vec1 32 ssa_239 = imul ssa_215, ssa_183 | |
vec1 32 ssa_240 = imul ssa_216, ssa_184 | |
vec1 32 ssa_241 = imul ssa_217, ssa_185 | |
vec1 32 ssa_242 = imul ssa_218, ssa_186 | |
vec1 32 ssa_243 = imul ssa_219, ssa_187 | |
vec1 32 ssa_244 = iadd ssa_236, ssa_212 | |
vec1 32 ssa_245 = iadd ssa_237, ssa_213 | |
vec1 32 ssa_246 = iadd ssa_238, ssa_214 | |
vec1 32 ssa_247 = iadd ssa_239, ssa_215 | |
vec1 32 ssa_248 = iadd ssa_240, ssa_216 | |
vec1 32 ssa_249 = iadd ssa_241, ssa_217 | |
vec1 32 ssa_250 = iadd ssa_242, ssa_218 | |
vec1 32 ssa_251 = iadd ssa_243, ssa_219 | |
vec1 32 ssa_252 = imul ssa_228, ssa_196 | |
vec1 32 ssa_253 = imul ssa_229, ssa_197 | |
vec1 32 ssa_254 = imul ssa_230, ssa_198 | |
vec1 32 ssa_255 = imul ssa_231, ssa_199 | |
vec1 32 ssa_256 = imul ssa_232, ssa_200 | |
vec1 32 ssa_257 = imul ssa_233, ssa_201 | |
vec1 32 ssa_258 = imul ssa_234, ssa_202 | |
vec1 32 ssa_259 = imul ssa_235, ssa_203 | |
vec1 32 ssa_260 = iadd ssa_252, ssa_228 | |
vec1 32 ssa_261 = iadd ssa_253, ssa_229 | |
vec1 32 ssa_262 = iadd ssa_254, ssa_230 | |
vec1 32 ssa_263 = iadd ssa_255, ssa_231 | |
vec1 32 ssa_264 = iadd ssa_256, ssa_232 | |
vec1 32 ssa_265 = iadd ssa_257, ssa_233 | |
vec1 32 ssa_266 = iadd ssa_258, ssa_234 | |
vec1 32 ssa_267 = iadd ssa_259, ssa_235 | |
vec1 32 ssa_268 = imul ssa_244, ssa_212 | |
vec1 32 ssa_269 = imul ssa_245, ssa_213 | |
vec1 32 ssa_270 = imul ssa_246, ssa_214 | |
vec1 32 ssa_271 = imul ssa_247, ssa_215 | |
vec1 32 ssa_272 = imul ssa_248, ssa_216 | |
vec1 32 ssa_273 = imul ssa_249, ssa_217 | |
vec1 32 ssa_274 = imul ssa_250, ssa_218 | |
vec1 32 ssa_275 = imul ssa_251, ssa_219 | |
vec1 32 ssa_276 = iadd ssa_268, ssa_244 | |
vec1 32 ssa_277 = iadd ssa_269, ssa_245 | |
vec1 32 ssa_278 = iadd ssa_270, ssa_246 | |
vec1 32 ssa_279 = iadd ssa_271, ssa_247 | |
vec1 32 ssa_280 = iadd ssa_272, ssa_248 | |
vec1 32 ssa_281 = iadd ssa_273, ssa_249 | |
vec1 32 ssa_282 = iadd ssa_274, ssa_250 | |
vec1 32 ssa_283 = iadd ssa_275, ssa_251 | |
vec1 32 ssa_284 = imul ssa_260, ssa_228 | |
vec1 32 ssa_285 = imul ssa_261, ssa_229 | |
vec1 32 ssa_286 = imul ssa_262, ssa_230 | |
vec1 32 ssa_287 = imul ssa_263, ssa_231 | |
vec1 32 ssa_288 = imul ssa_264, ssa_232 | |
vec1 32 ssa_289 = imul ssa_265, ssa_233 | |
vec1 32 ssa_290 = imul ssa_266, ssa_234 | |
vec1 32 ssa_291 = imul ssa_267, ssa_235 | |
vec1 32 ssa_292 = iadd ssa_284, ssa_260 | |
vec1 32 ssa_293 = iadd ssa_285, ssa_261 | |
vec1 32 ssa_294 = iadd ssa_286, ssa_262 | |
vec1 32 ssa_295 = iadd ssa_287, ssa_263 | |
vec1 32 ssa_296 = iadd ssa_288, ssa_264 | |
vec1 32 ssa_297 = iadd ssa_289, ssa_265 | |
vec1 32 ssa_298 = iadd ssa_290, ssa_266 | |
vec1 32 ssa_299 = iadd ssa_291, ssa_267 | |
vec1 32 ssa_300 = imul ssa_276, ssa_244 | |
vec1 32 ssa_301 = imul ssa_277, ssa_245 | |
vec1 32 ssa_302 = imul ssa_278, ssa_246 | |
vec1 32 ssa_303 = imul ssa_279, ssa_247 | |
vec1 32 ssa_304 = imul ssa_280, ssa_248 | |
vec1 32 ssa_305 = imul ssa_281, ssa_249 | |
vec1 32 ssa_306 = imul ssa_282, ssa_250 | |
vec1 32 ssa_307 = imul ssa_283, ssa_251 | |
vec1 32 ssa_308 = iadd ssa_300, ssa_276 | |
vec1 32 ssa_309 = iadd ssa_301, ssa_277 | |
vec1 32 ssa_310 = iadd ssa_302, ssa_278 | |
vec1 32 ssa_311 = iadd ssa_303, ssa_279 | |
vec1 32 ssa_312 = iadd ssa_304, ssa_280 | |
vec1 32 ssa_313 = iadd ssa_305, ssa_281 | |
vec1 32 ssa_314 = iadd ssa_306, ssa_282 | |
vec1 32 ssa_315 = iadd ssa_307, ssa_283 | |
vec1 32 ssa_316 = imul ssa_292, ssa_260 | |
vec1 32 ssa_317 = imul ssa_293, ssa_261 | |
vec1 32 ssa_318 = imul ssa_294, ssa_262 | |
vec1 32 ssa_319 = imul ssa_295, ssa_263 | |
vec1 32 ssa_320 = imul ssa_296, ssa_264 | |
vec1 32 ssa_321 = imul ssa_297, ssa_265 | |
vec1 32 ssa_322 = imul ssa_298, ssa_266 | |
vec1 32 ssa_323 = imul ssa_299, ssa_267 | |
vec1 32 ssa_324 = iadd ssa_316, ssa_292 | |
vec1 32 ssa_325 = iadd ssa_317, ssa_293 | |
vec1 32 ssa_326 = iadd ssa_318, ssa_294 | |
vec1 32 ssa_327 = iadd ssa_319, ssa_295 | |
vec1 32 ssa_328 = iadd ssa_320, ssa_296 | |
vec1 32 ssa_329 = iadd ssa_321, ssa_297 | |
vec1 32 ssa_330 = iadd ssa_322, ssa_298 | |
vec1 32 ssa_331 = iadd ssa_323, ssa_299 | |
vec1 32 ssa_332 = imul ssa_308, ssa_276 | |
vec1 32 ssa_333 = imul ssa_309, ssa_277 | |
vec1 32 ssa_334 = imul ssa_310, ssa_278 | |
vec1 32 ssa_335 = imul ssa_311, ssa_279 | |
vec1 32 ssa_336 = imul ssa_312, ssa_280 | |
vec1 32 ssa_337 = imul ssa_313, ssa_281 | |
vec1 32 ssa_338 = imul ssa_314, ssa_282 | |
vec1 32 ssa_339 = imul ssa_315, ssa_283 | |
vec1 32 ssa_340 = iadd ssa_332, ssa_308 | |
vec1 32 ssa_341 = iadd ssa_333, ssa_309 | |
vec1 32 ssa_342 = iadd ssa_334, ssa_310 | |
vec1 32 ssa_343 = iadd ssa_335, ssa_311 | |
vec1 32 ssa_344 = iadd ssa_336, ssa_312 | |
vec1 32 ssa_345 = iadd ssa_337, ssa_313 | |
vec1 32 ssa_346 = iadd ssa_338, ssa_314 | |
vec1 32 ssa_347 = iadd ssa_339, ssa_315 | |
vec1 32 ssa_348 = imul ssa_324, ssa_292 | |
vec1 32 ssa_349 = imul ssa_325, ssa_293 | |
vec1 32 ssa_350 = imul ssa_326, ssa_294 | |
vec1 32 ssa_351 = imul ssa_327, ssa_295 | |
vec1 32 ssa_352 = imul ssa_328, ssa_296 | |
vec1 32 ssa_353 = imul ssa_329, ssa_297 | |
vec1 32 ssa_354 = imul ssa_330, ssa_298 | |
vec1 32 ssa_355 = imul ssa_331, ssa_299 | |
vec1 32 ssa_356 = iadd ssa_348, ssa_324 | |
vec1 32 ssa_357 = iadd ssa_349, ssa_325 | |
vec1 32 ssa_358 = iadd ssa_350, ssa_326 | |
vec1 32 ssa_359 = iadd ssa_351, ssa_327 | |
vec1 32 ssa_360 = iadd ssa_352, ssa_328 | |
vec1 32 ssa_361 = iadd ssa_353, ssa_329 | |
vec1 32 ssa_362 = iadd ssa_354, ssa_330 | |
vec1 32 ssa_363 = iadd ssa_355, ssa_331 | |
vec1 32 ssa_364 = imul ssa_340, ssa_308 | |
vec1 32 ssa_365 = imul ssa_341, ssa_309 | |
vec1 32 ssa_366 = imul ssa_342, ssa_310 | |
vec1 32 ssa_367 = imul ssa_343, ssa_311 | |
vec1 32 ssa_368 = imul ssa_344, ssa_312 | |
vec1 32 ssa_369 = imul ssa_345, ssa_313 | |
vec1 32 ssa_370 = imul ssa_346, ssa_314 | |
vec1 32 ssa_371 = imul ssa_347, ssa_315 | |
vec1 32 ssa_372 = iadd ssa_364, ssa_340 | |
vec1 32 ssa_373 = iadd ssa_365, ssa_341 | |
vec1 32 ssa_374 = iadd ssa_366, ssa_342 | |
vec1 32 ssa_375 = iadd ssa_367, ssa_343 | |
vec1 32 ssa_376 = iadd ssa_368, ssa_344 | |
vec1 32 ssa_377 = iadd ssa_369, ssa_345 | |
vec1 32 ssa_378 = iadd ssa_370, ssa_346 | |
vec1 32 ssa_379 = iadd ssa_371, ssa_347 | |
vec1 32 ssa_380 = imul ssa_356, ssa_324 | |
vec1 32 ssa_381 = imul ssa_357, ssa_325 | |
vec1 32 ssa_382 = imul ssa_358, ssa_326 | |
vec1 32 ssa_383 = imul ssa_359, ssa_327 | |
vec1 32 ssa_384 = imul ssa_360, ssa_328 | |
vec1 32 ssa_385 = imul ssa_361, ssa_329 | |
vec1 32 ssa_386 = imul ssa_362, ssa_330 | |
vec1 32 ssa_387 = imul ssa_363, ssa_331 | |
vec1 32 ssa_388 = iadd ssa_380, ssa_356 | |
vec1 32 ssa_389 = iadd ssa_381, ssa_357 | |
vec1 32 ssa_390 = iadd ssa_382, ssa_358 | |
vec1 32 ssa_391 = iadd ssa_383, ssa_359 | |
vec1 32 ssa_392 = iadd ssa_384, ssa_360 | |
vec1 32 ssa_393 = iadd ssa_385, ssa_361 | |
vec1 32 ssa_394 = iadd ssa_386, ssa_362 | |
vec1 32 ssa_395 = iadd ssa_387, ssa_363 | |
vec1 32 ssa_396 = imul ssa_372, ssa_340 | |
vec1 32 ssa_397 = imul ssa_373, ssa_341 | |
vec1 32 ssa_398 = imul ssa_374, ssa_342 | |
vec1 32 ssa_399 = imul ssa_375, ssa_343 | |
vec1 32 ssa_400 = imul ssa_376, ssa_344 | |
vec1 32 ssa_401 = imul ssa_377, ssa_345 | |
vec1 32 ssa_402 = imul ssa_378, ssa_346 | |
vec1 32 ssa_403 = imul ssa_379, ssa_347 | |
vec1 32 ssa_404 = iadd ssa_396, ssa_372 | |
vec1 32 ssa_405 = iadd ssa_397, ssa_373 | |
vec1 32 ssa_406 = iadd ssa_398, ssa_374 | |
vec1 32 ssa_407 = iadd ssa_399, ssa_375 | |
vec1 32 ssa_408 = iadd ssa_400, ssa_376 | |
vec1 32 ssa_409 = iadd ssa_401, ssa_377 | |
vec1 32 ssa_410 = iadd ssa_402, ssa_378 | |
vec1 32 ssa_411 = iadd ssa_403, ssa_379 | |
vec1 32 ssa_412 = imul ssa_388, ssa_356 | |
vec1 32 ssa_413 = imul ssa_389, ssa_357 | |
vec1 32 ssa_414 = imul ssa_390, ssa_358 | |
vec1 32 ssa_415 = imul ssa_391, ssa_359 | |
vec1 32 ssa_416 = imul ssa_392, ssa_360 | |
vec1 32 ssa_417 = imul ssa_393, ssa_361 | |
vec1 32 ssa_418 = imul ssa_394, ssa_362 | |
vec1 32 ssa_419 = imul ssa_395, ssa_363 | |
vec1 32 ssa_420 = iadd ssa_412, ssa_388 | |
vec1 32 ssa_421 = iadd ssa_413, ssa_389 | |
vec1 32 ssa_422 = iadd ssa_414, ssa_390 | |
vec1 32 ssa_423 = iadd ssa_415, ssa_391 | |
vec1 32 ssa_424 = iadd ssa_416, ssa_392 | |
vec1 32 ssa_425 = iadd ssa_417, ssa_393 | |
vec1 32 ssa_426 = iadd ssa_418, ssa_394 | |
vec1 32 ssa_427 = iadd ssa_419, ssa_395 | |
vec1 32 ssa_428 = imul ssa_404, ssa_372 | |
vec1 32 ssa_429 = imul ssa_405, ssa_373 | |
vec1 32 ssa_430 = imul ssa_406, ssa_374 | |
vec1 32 ssa_431 = imul ssa_407, ssa_375 | |
vec1 32 ssa_432 = imul ssa_408, ssa_376 | |
vec1 32 ssa_433 = imul ssa_409, ssa_377 | |
vec1 32 ssa_434 = imul ssa_410, ssa_378 | |
vec1 32 ssa_435 = imul ssa_411, ssa_379 | |
vec1 32 ssa_436 = iadd ssa_428, ssa_404 | |
vec1 32 ssa_437 = iadd ssa_429, ssa_405 | |
vec1 32 ssa_438 = iadd ssa_430, ssa_406 | |
vec1 32 ssa_439 = iadd ssa_431, ssa_407 | |
vec1 32 ssa_440 = iadd ssa_432, ssa_408 | |
vec1 32 ssa_441 = iadd ssa_433, ssa_409 | |
vec1 32 ssa_442 = iadd ssa_434, ssa_410 | |
vec1 32 ssa_443 = iadd ssa_435, ssa_411 | |
vec1 32 ssa_444 = imul ssa_420, ssa_388 | |
vec1 32 ssa_445 = imul ssa_421, ssa_389 | |
vec1 32 ssa_446 = imul ssa_422, ssa_390 | |
vec1 32 ssa_447 = imul ssa_423, ssa_391 | |
vec1 32 ssa_448 = imul ssa_424, ssa_392 | |
vec1 32 ssa_449 = imul ssa_425, ssa_393 | |
vec1 32 ssa_450 = imul ssa_426, ssa_394 | |
vec1 32 ssa_451 = imul ssa_427, ssa_395 | |
vec1 32 ssa_452 = iadd ssa_444, ssa_420 | |
vec1 32 ssa_453 = iadd ssa_445, ssa_421 | |
vec1 32 ssa_454 = iadd ssa_446, ssa_422 | |
vec1 32 ssa_455 = iadd ssa_447, ssa_423 | |
vec1 32 ssa_456 = iadd ssa_448, ssa_424 | |
vec1 32 ssa_457 = iadd ssa_449, ssa_425 | |
vec1 32 ssa_458 = iadd ssa_450, ssa_426 | |
vec1 32 ssa_459 = iadd ssa_451, ssa_427 | |
vec1 32 ssa_460 = imul ssa_436, ssa_404 | |
vec1 32 ssa_461 = imul ssa_437, ssa_405 | |
vec1 32 ssa_462 = imul ssa_438, ssa_406 | |
vec1 32 ssa_463 = imul ssa_439, ssa_407 | |
vec1 32 ssa_464 = imul ssa_440, ssa_408 | |
vec1 32 ssa_465 = imul ssa_441, ssa_409 | |
vec1 32 ssa_466 = imul ssa_442, ssa_410 | |
vec1 32 ssa_467 = imul ssa_443, ssa_411 | |
vec1 32 ssa_468 = iadd ssa_460, ssa_436 | |
vec1 32 ssa_469 = iadd ssa_461, ssa_437 | |
vec1 32 ssa_470 = iadd ssa_462, ssa_438 | |
vec1 32 ssa_471 = iadd ssa_463, ssa_439 | |
vec1 32 ssa_472 = iadd ssa_464, ssa_440 | |
vec1 32 ssa_473 = iadd ssa_465, ssa_441 | |
vec1 32 ssa_474 = iadd ssa_466, ssa_442 | |
vec1 32 ssa_475 = iadd ssa_467, ssa_443 | |
vec1 32 ssa_476 = imul ssa_452, ssa_420 | |
vec1 32 ssa_477 = imul ssa_453, ssa_421 | |
vec1 32 ssa_478 = imul ssa_454, ssa_422 | |
vec1 32 ssa_479 = imul ssa_455, ssa_423 | |
vec1 32 ssa_480 = imul ssa_456, ssa_424 | |
vec1 32 ssa_481 = imul ssa_457, ssa_425 | |
vec1 32 ssa_482 = imul ssa_458, ssa_426 | |
vec1 32 ssa_483 = imul ssa_459, ssa_427 | |
vec1 32 ssa_484 = iadd ssa_476, ssa_452 | |
vec1 32 ssa_485 = iadd ssa_477, ssa_453 | |
vec1 32 ssa_486 = iadd ssa_478, ssa_454 | |
vec1 32 ssa_487 = iadd ssa_479, ssa_455 | |
vec1 32 ssa_488 = iadd ssa_480, ssa_456 | |
vec1 32 ssa_489 = iadd ssa_481, ssa_457 | |
vec1 32 ssa_490 = iadd ssa_482, ssa_458 | |
vec1 32 ssa_491 = iadd ssa_483, ssa_459 | |
vec1 32 ssa_492 = imul ssa_468, ssa_436 | |
vec1 32 ssa_493 = imul ssa_469, ssa_437 | |
vec1 32 ssa_494 = imul ssa_470, ssa_438 | |
vec1 32 ssa_495 = imul ssa_471, ssa_439 | |
vec1 32 ssa_496 = imul ssa_472, ssa_440 | |
vec1 32 ssa_497 = imul ssa_473, ssa_441 | |
vec1 32 ssa_498 = imul ssa_474, ssa_442 | |
vec1 32 ssa_499 = imul ssa_475, ssa_443 | |
vec1 32 ssa_500 = iadd ssa_492, ssa_468 | |
vec1 32 ssa_501 = iadd ssa_493, ssa_469 | |
vec1 32 ssa_502 = iadd ssa_494, ssa_470 | |
vec1 32 ssa_503 = iadd ssa_495, ssa_471 | |
vec1 32 ssa_504 = iadd ssa_496, ssa_472 | |
vec1 32 ssa_505 = iadd ssa_497, ssa_473 | |
vec1 32 ssa_506 = iadd ssa_498, ssa_474 | |
vec1 32 ssa_507 = iadd ssa_499, ssa_475 | |
vec1 32 ssa_508 = imul ssa_484, ssa_452 | |
vec1 32 ssa_509 = imul ssa_485, ssa_453 | |
vec1 32 ssa_510 = imul ssa_486, ssa_454 | |
vec1 32 ssa_511 = imul ssa_487, ssa_455 | |
vec1 32 ssa_512 = imul ssa_488, ssa_456 | |
vec1 32 ssa_513 = imul ssa_489, ssa_457 | |
vec1 32 ssa_514 = imul ssa_490, ssa_458 | |
vec1 32 ssa_515 = imul ssa_491, ssa_459 | |
vec1 32 ssa_516 = iadd ssa_508, ssa_484 | |
vec1 32 ssa_517 = iadd ssa_509, ssa_485 | |
vec1 32 ssa_518 = iadd ssa_510, ssa_486 | |
vec1 32 ssa_519 = iadd ssa_511, ssa_487 | |
vec1 32 ssa_520 = iadd ssa_512, ssa_488 | |
vec1 32 ssa_521 = iadd ssa_513, ssa_489 | |
vec1 32 ssa_522 = iadd ssa_514, ssa_490 | |
vec1 32 ssa_523 = iadd ssa_515, ssa_491 | |
vec1 32 ssa_524 = imul ssa_500, ssa_468 | |
vec1 32 ssa_525 = imul ssa_501, ssa_469 | |
vec1 32 ssa_526 = imul ssa_502, ssa_470 | |
vec1 32 ssa_527 = imul ssa_503, ssa_471 | |
vec1 32 ssa_528 = imul ssa_504, ssa_472 | |
vec1 32 ssa_529 = imul ssa_505, ssa_473 | |
vec1 32 ssa_530 = imul ssa_506, ssa_474 | |
vec1 32 ssa_531 = imul ssa_507, ssa_475 | |
vec1 32 ssa_532 = iadd ssa_524, ssa_500 | |
vec1 32 ssa_533 = iadd ssa_525, ssa_501 | |
vec1 32 ssa_534 = iadd ssa_526, ssa_502 | |
vec1 32 ssa_535 = iadd ssa_527, ssa_503 | |
vec1 32 ssa_536 = iadd ssa_528, ssa_504 | |
vec1 32 ssa_537 = iadd ssa_529, ssa_505 | |
vec1 32 ssa_538 = iadd ssa_530, ssa_506 | |
vec1 32 ssa_539 = iadd ssa_531, ssa_507 | |
vec1 32 ssa_540 = imul ssa_516, ssa_484 | |
vec1 32 ssa_541 = imul ssa_517, ssa_485 | |
vec1 32 ssa_542 = imul ssa_518, ssa_486 | |
vec1 32 ssa_543 = imul ssa_519, ssa_487 | |
vec1 32 ssa_544 = imul ssa_520, ssa_488 | |
vec1 32 ssa_545 = imul ssa_521, ssa_489 | |
vec1 32 ssa_546 = imul ssa_522, ssa_490 | |
vec1 32 ssa_547 = imul ssa_523, ssa_491 | |
vec1 32 ssa_548 = iadd ssa_540, ssa_516 | |
vec1 32 ssa_549 = iadd ssa_541, ssa_517 | |
vec1 32 ssa_550 = iadd ssa_542, ssa_518 | |
vec1 32 ssa_551 = iadd ssa_543, ssa_519 | |
vec1 32 ssa_552 = iadd ssa_544, ssa_520 | |
vec1 32 ssa_553 = iadd ssa_545, ssa_521 | |
vec1 32 ssa_554 = iadd ssa_546, ssa_522 | |
vec1 32 ssa_555 = iadd ssa_547, ssa_523 | |
vec1 32 ssa_556 = imul ssa_532, ssa_500 | |
vec1 32 ssa_557 = imul ssa_533, ssa_501 | |
vec1 32 ssa_558 = imul ssa_534, ssa_502 | |
vec1 32 ssa_559 = imul ssa_535, ssa_503 | |
vec1 32 ssa_560 = imul ssa_536, ssa_504 | |
vec1 32 ssa_561 = imul ssa_537, ssa_505 | |
vec1 32 ssa_562 = imul ssa_538, ssa_506 | |
vec1 32 ssa_563 = imul ssa_539, ssa_507 | |
vec1 32 ssa_564 = iadd ssa_556, ssa_532 | |
vec1 32 ssa_565 = iadd ssa_557, ssa_533 | |
vec1 32 ssa_566 = iadd ssa_558, ssa_534 | |
vec1 32 ssa_567 = iadd ssa_559, ssa_535 | |
vec1 32 ssa_568 = iadd ssa_560, ssa_536 | |
vec1 32 ssa_569 = iadd ssa_561, ssa_537 | |
vec1 32 ssa_570 = iadd ssa_562, ssa_538 | |
vec1 32 ssa_571 = iadd ssa_563, ssa_539 | |
vec1 32 ssa_572 = imul ssa_548, ssa_516 | |
vec1 32 ssa_573 = imul ssa_549, ssa_517 | |
vec1 32 ssa_574 = imul ssa_550, ssa_518 | |
vec1 32 ssa_575 = imul ssa_551, ssa_519 | |
vec1 32 ssa_576 = imul ssa_552, ssa_520 | |
vec1 32 ssa_577 = imul ssa_553, ssa_521 | |
vec1 32 ssa_578 = imul ssa_554, ssa_522 | |
vec1 32 ssa_579 = imul ssa_555, ssa_523 | |
vec1 32 ssa_580 = iadd ssa_572, ssa_548 | |
vec1 32 ssa_581 = iadd ssa_573, ssa_549 | |
vec1 32 ssa_582 = iadd ssa_574, ssa_550 | |
vec1 32 ssa_583 = iadd ssa_575, ssa_551 | |
vec1 32 ssa_584 = iadd ssa_576, ssa_552 | |
vec1 32 ssa_585 = iadd ssa_577, ssa_553 | |
vec1 32 ssa_586 = iadd ssa_578, ssa_554 | |
vec1 32 ssa_587 = iadd ssa_579, ssa_555 | |
vec1 32 ssa_588 = iadd ssa_74, ssa_1 | |
/* succs: block_1 */ | |
} | |
block block_5: | |
/* preds: block_2 */ | |
vec1 32 ssa_589 = iadd ssa_54, ssa_46 | |
vec1 32 ssa_590 = iadd ssa_55, ssa_47 | |
vec1 32 ssa_591 = iadd ssa_56, ssa_48 | |
vec1 32 ssa_592 = iadd ssa_57, ssa_49 | |
vec1 32 ssa_593 = iadd ssa_50, ssa_42 | |
vec1 32 ssa_594 = iadd ssa_51, ssa_43 | |
vec1 32 ssa_595 = iadd ssa_52, ssa_44 | |
vec1 32 ssa_596 = iadd ssa_53, ssa_45 | |
vec1 32 ssa_597 = iadd ssa_589, ssa_593 | |
vec1 32 ssa_598 = iadd ssa_590, ssa_594 | |
vec1 32 ssa_599 = iadd ssa_591, ssa_595 | |
vec1 32 ssa_600 = iadd ssa_592, ssa_596 | |
vec1 32 ssa_601 = iadd ssa_597, ssa_599 | |
vec1 32 ssa_602 = iadd ssa_598, ssa_600 | |
vec1 32 ssa_603 = ishl ssa_22, ssa_2 | |
vec1 32 ssa_604 = iadd ssa_601, ssa_602 | |
vec1 64 ssa_605 = intrinsic load_ssbo_address (ssa_0) () | |
vec1 32 ssa_606 = unpack_64_2x32_split_x ssa_605 | |
vec1 32 ssa_607 = unpack_64_2x32_split_y ssa_605 | |
vec1 32 ssa_608 = iadd ssa_606, ssa_603 | |
vec1 32 ssa_609 = ult32 ssa_608, ssa_606 | |
vec1 32 ssa_610 = b2i32 ssa_609 | |
vec1 32 ssa_611 = iadd ssa_610, ssa_607 | |
vec1 64 ssa_612 = pack_64_2x32_split ssa_608, ssa_611 | |
intrinsic store_global (ssa_604, ssa_612) (wrmask=x /*1*/, access=0, align_mul=4, align_offset=0) | |
/* succs: block_6 */ | |
block block_6: | |
} | |
block0 { | |
617 = MOV.i32 r62 | |
616 = MOV.i32 r61 | |
615 = MOV.i32 r60 | |
18 = LSHIFT_OR.i32 616, #0x0, #0x8.b0 | |
19 = IADD.s32 615, 18 | |
614 = MKVEC.v2i16 #0x0.h00, 617.h00 | |
22 = IADD.s32 19, 614 | |
24 = IADD.s32 u1, #0x1 | |
25 = IADD.s32 u1, #0x2 | |
26 = IADD.s32 u1, #0x3 | |
27 = IADD.s32 u1, #0x4 | |
28 = IADD.s32 u1, #0x5 | |
29 = IADD.s32 u1, #0x6 | |
30 = IADD.s32 u1, #0x7 | |
31 = IADD.s32 u1, #0x8 | |
32 = IADD.s32 u1, #0x9 | |
33 = IADD.s32 u1, #0xa | |
34 = IADD.s32 u1, #0xb | |
35 = IADD.s32 u1, #0xc | |
36 = IADD.s32 u1, #0xd | |
37 = IADD.s32 u1, #0xe | |
38 = IADD.s32 u1, #0xf | |
39 = U32_TO_F32 22 | |
40 = FMA.f32 39, #0x2edbe6ff, #0x0.neg | |
41 = F32_TO_S32.rtz 40 | |
} -> block1 | |
block1 { | |
42 = PHI 41, 584 | |
43 = PHI 41, 585 | |
44 = PHI 41, 586 | |
45 = PHI 41, 587 | |
46 = PHI 41, 580 | |
47 = PHI 41, 581 | |
48 = PHI 41, 582 | |
49 = PHI 41, 583 | |
50 = PHI 41, 568 | |
51 = PHI 41, 569 | |
52 = PHI 41, 570 | |
53 = PHI 41, 571 | |
54 = PHI 41, 564 | |
55 = PHI 41, 565 | |
56 = PHI 41, 566 | |
57 = PHI 41, 567 | |
58 = PHI 35, 552 | |
59 = PHI 36, 553 | |
60 = PHI 37, 554 | |
61 = PHI 38, 555 | |
62 = PHI 31, 548 | |
63 = PHI 32, 549 | |
64 = PHI 33, 550 | |
65 = PHI 34, 551 | |
66 = PHI 27, 536 | |
67 = PHI 28, 537 | |
68 = PHI 29, 538 | |
69 = PHI 30, 539 | |
70 = PHI u1, 532 | |
71 = PHI 24, 533 | |
72 = PHI 25, 534 | |
73 = PHI 26, 535 | |
74 = PHI #0x0, 588 | |
75 = ICMP.s32.m1.ge 74, #0x8 | |
BRANCHZ.i16.eq 75.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
76 = IMUL.i32 54, 70 | |
77 = IMUL.i32 55, 71 | |
78 = IMUL.i32 56, 72 | |
79 = IMUL.i32 57, 73 | |
80 = IMUL.i32 50, 66 | |
81 = IMUL.i32 51, 67 | |
82 = IMUL.i32 52, 68 | |
83 = IMUL.i32 53, 69 | |
84 = IADD.s32 76, 54 | |
85 = IADD.s32 77, 55 | |
86 = IADD.s32 78, 56 | |
87 = IADD.s32 79, 57 | |
88 = IADD.s32 80, 50 | |
89 = IADD.s32 81, 51 | |
90 = IADD.s32 82, 52 | |
91 = IADD.s32 83, 53 | |
92 = IMUL.i32 46, 62 | |
93 = IMUL.i32 47, 63 | |
94 = IMUL.i32 48, 64 | |
95 = IMUL.i32 49, 65 | |
96 = IMUL.i32 42, 58 | |
97 = IMUL.i32 43, 59 | |
98 = IMUL.i32 44, 60 | |
99 = IMUL.i32 45, 61 | |
100 = IADD.s32 92, 46 | |
101 = IADD.s32 93, 47 | |
102 = IADD.s32 94, 48 | |
103 = IADD.s32 95, 49 | |
104 = IADD.s32 96, 42 | |
105 = IADD.s32 97, 43 | |
106 = IADD.s32 98, 44 | |
107 = IADD.s32 99, 45 | |
108 = IMUL.i32 84, 54 | |
109 = IMUL.i32 85, 55 | |
110 = IMUL.i32 86, 56 | |
111 = IMUL.i32 87, 57 | |
112 = IMUL.i32 88, 50 | |
113 = IMUL.i32 89, 51 | |
114 = IMUL.i32 90, 52 | |
115 = IMUL.i32 91, 53 | |
116 = IADD.s32 108, 84 | |
117 = IADD.s32 109, 85 | |
118 = IADD.s32 110, 86 | |
119 = IADD.s32 111, 87 | |
120 = IADD.s32 112, 88 | |
121 = IADD.s32 113, 89 | |
122 = IADD.s32 114, 90 | |
123 = IADD.s32 115, 91 | |
124 = IMUL.i32 100, 46 | |
125 = IMUL.i32 101, 47 | |
126 = IMUL.i32 102, 48 | |
127 = IMUL.i32 103, 49 | |
128 = IMUL.i32 104, 42 | |
129 = IMUL.i32 105, 43 | |
130 = IMUL.i32 106, 44 | |
131 = IMUL.i32 107, 45 | |
132 = IADD.s32 124, 100 | |
133 = IADD.s32 125, 101 | |
134 = IADD.s32 126, 102 | |
135 = IADD.s32 127, 103 | |
136 = IADD.s32 128, 104 | |
137 = IADD.s32 129, 105 | |
138 = IADD.s32 130, 106 | |
139 = IADD.s32 131, 107 | |
140 = IMUL.i32 116, 84 | |
141 = IMUL.i32 117, 85 | |
142 = IMUL.i32 118, 86 | |
143 = IMUL.i32 119, 87 | |
144 = IMUL.i32 120, 88 | |
145 = IMUL.i32 121, 89 | |
146 = IMUL.i32 122, 90 | |
147 = IMUL.i32 123, 91 | |
148 = IADD.s32 140, 116 | |
149 = IADD.s32 141, 117 | |
150 = IADD.s32 142, 118 | |
151 = IADD.s32 143, 119 | |
152 = IADD.s32 144, 120 | |
153 = IADD.s32 145, 121 | |
154 = IADD.s32 146, 122 | |
155 = IADD.s32 147, 123 | |
156 = IMUL.i32 132, 100 | |
157 = IMUL.i32 133, 101 | |
158 = IMUL.i32 134, 102 | |
159 = IMUL.i32 135, 103 | |
160 = IMUL.i32 136, 104 | |
161 = IMUL.i32 137, 105 | |
162 = IMUL.i32 138, 106 | |
163 = IMUL.i32 139, 107 | |
164 = IADD.s32 156, 132 | |
165 = IADD.s32 157, 133 | |
166 = IADD.s32 158, 134 | |
167 = IADD.s32 159, 135 | |
168 = IADD.s32 160, 136 | |
169 = IADD.s32 161, 137 | |
170 = IADD.s32 162, 138 | |
171 = IADD.s32 163, 139 | |
172 = IMUL.i32 148, 116 | |
173 = IMUL.i32 149, 117 | |
174 = IMUL.i32 150, 118 | |
175 = IMUL.i32 151, 119 | |
176 = IMUL.i32 152, 120 | |
177 = IMUL.i32 153, 121 | |
178 = IMUL.i32 154, 122 | |
179 = IMUL.i32 155, 123 | |
180 = IADD.s32 172, 148 | |
181 = IADD.s32 173, 149 | |
182 = IADD.s32 174, 150 | |
183 = IADD.s32 175, 151 | |
184 = IADD.s32 176, 152 | |
185 = IADD.s32 177, 153 | |
186 = IADD.s32 178, 154 | |
187 = IADD.s32 179, 155 | |
188 = IMUL.i32 164, 132 | |
189 = IMUL.i32 165, 133 | |
190 = IMUL.i32 166, 134 | |
191 = IMUL.i32 167, 135 | |
192 = IMUL.i32 168, 136 | |
193 = IMUL.i32 169, 137 | |
194 = IMUL.i32 170, 138 | |
195 = IMUL.i32 171, 139 | |
196 = IADD.s32 188, 164 | |
197 = IADD.s32 189, 165 | |
198 = IADD.s32 190, 166 | |
199 = IADD.s32 191, 167 | |
200 = IADD.s32 192, 168 | |
201 = IADD.s32 193, 169 | |
202 = IADD.s32 194, 170 | |
203 = IADD.s32 195, 171 | |
204 = IMUL.i32 180, 148 | |
205 = IMUL.i32 181, 149 | |
206 = IMUL.i32 182, 150 | |
207 = IMUL.i32 183, 151 | |
208 = IMUL.i32 184, 152 | |
209 = IMUL.i32 185, 153 | |
210 = IMUL.i32 186, 154 | |
211 = IMUL.i32 187, 155 | |
212 = IADD.s32 204, 180 | |
213 = IADD.s32 205, 181 | |
214 = IADD.s32 206, 182 | |
215 = IADD.s32 207, 183 | |
216 = IADD.s32 208, 184 | |
217 = IADD.s32 209, 185 | |
218 = IADD.s32 210, 186 | |
219 = IADD.s32 211, 187 | |
220 = IMUL.i32 196, 164 | |
221 = IMUL.i32 197, 165 | |
222 = IMUL.i32 198, 166 | |
223 = IMUL.i32 199, 167 | |
224 = IMUL.i32 200, 168 | |
225 = IMUL.i32 201, 169 | |
226 = IMUL.i32 202, 170 | |
227 = IMUL.i32 203, 171 | |
228 = IADD.s32 220, 196 | |
229 = IADD.s32 221, 197 | |
230 = IADD.s32 222, 198 | |
231 = IADD.s32 223, 199 | |
232 = IADD.s32 224, 200 | |
233 = IADD.s32 225, 201 | |
234 = IADD.s32 226, 202 | |
235 = IADD.s32 227, 203 | |
236 = IMUL.i32 212, 180 | |
237 = IMUL.i32 213, 181 | |
238 = IMUL.i32 214, 182 | |
239 = IMUL.i32 215, 183 | |
240 = IMUL.i32 216, 184 | |
241 = IMUL.i32 217, 185 | |
242 = IMUL.i32 218, 186 | |
243 = IMUL.i32 219, 187 | |
244 = IADD.s32 236, 212 | |
245 = IADD.s32 237, 213 | |
246 = IADD.s32 238, 214 | |
247 = IADD.s32 239, 215 | |
248 = IADD.s32 240, 216 | |
249 = IADD.s32 241, 217 | |
250 = IADD.s32 242, 218 | |
251 = IADD.s32 243, 219 | |
252 = IMUL.i32 228, 196 | |
253 = IMUL.i32 229, 197 | |
254 = IMUL.i32 230, 198 | |
255 = IMUL.i32 231, 199 | |
256 = IMUL.i32 232, 200 | |
257 = IMUL.i32 233, 201 | |
258 = IMUL.i32 234, 202 | |
259 = IMUL.i32 235, 203 | |
260 = IADD.s32 252, 228 | |
261 = IADD.s32 253, 229 | |
262 = IADD.s32 254, 230 | |
263 = IADD.s32 255, 231 | |
264 = IADD.s32 256, 232 | |
265 = IADD.s32 257, 233 | |
266 = IADD.s32 258, 234 | |
267 = IADD.s32 259, 235 | |
268 = IMUL.i32 244, 212 | |
269 = IMUL.i32 245, 213 | |
270 = IMUL.i32 246, 214 | |
271 = IMUL.i32 247, 215 | |
272 = IMUL.i32 248, 216 | |
273 = IMUL.i32 249, 217 | |
274 = IMUL.i32 250, 218 | |
275 = IMUL.i32 251, 219 | |
276 = IADD.s32 268, 244 | |
277 = IADD.s32 269, 245 | |
278 = IADD.s32 270, 246 | |
279 = IADD.s32 271, 247 | |
280 = IADD.s32 272, 248 | |
281 = IADD.s32 273, 249 | |
282 = IADD.s32 274, 250 | |
283 = IADD.s32 275, 251 | |
284 = IMUL.i32 260, 228 | |
285 = IMUL.i32 261, 229 | |
286 = IMUL.i32 262, 230 | |
287 = IMUL.i32 263, 231 | |
288 = IMUL.i32 264, 232 | |
289 = IMUL.i32 265, 233 | |
290 = IMUL.i32 266, 234 | |
291 = IMUL.i32 267, 235 | |
292 = IADD.s32 284, 260 | |
293 = IADD.s32 285, 261 | |
294 = IADD.s32 286, 262 | |
295 = IADD.s32 287, 263 | |
296 = IADD.s32 288, 264 | |
297 = IADD.s32 289, 265 | |
298 = IADD.s32 290, 266 | |
299 = IADD.s32 291, 267 | |
300 = IMUL.i32 276, 244 | |
301 = IMUL.i32 277, 245 | |
302 = IMUL.i32 278, 246 | |
303 = IMUL.i32 279, 247 | |
304 = IMUL.i32 280, 248 | |
305 = IMUL.i32 281, 249 | |
306 = IMUL.i32 282, 250 | |
307 = IMUL.i32 283, 251 | |
308 = IADD.s32 300, 276 | |
309 = IADD.s32 301, 277 | |
310 = IADD.s32 302, 278 | |
311 = IADD.s32 303, 279 | |
312 = IADD.s32 304, 280 | |
313 = IADD.s32 305, 281 | |
314 = IADD.s32 306, 282 | |
315 = IADD.s32 307, 283 | |
316 = IMUL.i32 292, 260 | |
317 = IMUL.i32 293, 261 | |
318 = IMUL.i32 294, 262 | |
319 = IMUL.i32 295, 263 | |
320 = IMUL.i32 296, 264 | |
321 = IMUL.i32 297, 265 | |
322 = IMUL.i32 298, 266 | |
323 = IMUL.i32 299, 267 | |
324 = IADD.s32 316, 292 | |
325 = IADD.s32 317, 293 | |
326 = IADD.s32 318, 294 | |
327 = IADD.s32 319, 295 | |
328 = IADD.s32 320, 296 | |
329 = IADD.s32 321, 297 | |
330 = IADD.s32 322, 298 | |
331 = IADD.s32 323, 299 | |
332 = IMUL.i32 308, 276 | |
333 = IMUL.i32 309, 277 | |
334 = IMUL.i32 310, 278 | |
335 = IMUL.i32 311, 279 | |
336 = IMUL.i32 312, 280 | |
337 = IMUL.i32 313, 281 | |
338 = IMUL.i32 314, 282 | |
339 = IMUL.i32 315, 283 | |
340 = IADD.s32 332, 308 | |
341 = IADD.s32 333, 309 | |
342 = IADD.s32 334, 310 | |
343 = IADD.s32 335, 311 | |
344 = IADD.s32 336, 312 | |
345 = IADD.s32 337, 313 | |
346 = IADD.s32 338, 314 | |
347 = IADD.s32 339, 315 | |
348 = IMUL.i32 324, 292 | |
349 = IMUL.i32 325, 293 | |
350 = IMUL.i32 326, 294 | |
351 = IMUL.i32 327, 295 | |
352 = IMUL.i32 328, 296 | |
353 = IMUL.i32 329, 297 | |
354 = IMUL.i32 330, 298 | |
355 = IMUL.i32 331, 299 | |
356 = IADD.s32 348, 324 | |
357 = IADD.s32 349, 325 | |
358 = IADD.s32 350, 326 | |
359 = IADD.s32 351, 327 | |
360 = IADD.s32 352, 328 | |
361 = IADD.s32 353, 329 | |
362 = IADD.s32 354, 330 | |
363 = IADD.s32 355, 331 | |
364 = IMUL.i32 340, 308 | |
365 = IMUL.i32 341, 309 | |
366 = IMUL.i32 342, 310 | |
367 = IMUL.i32 343, 311 | |
368 = IMUL.i32 344, 312 | |
369 = IMUL.i32 345, 313 | |
370 = IMUL.i32 346, 314 | |
371 = IMUL.i32 347, 315 | |
372 = IADD.s32 364, 340 | |
373 = IADD.s32 365, 341 | |
374 = IADD.s32 366, 342 | |
375 = IADD.s32 367, 343 | |
376 = IADD.s32 368, 344 | |
377 = IADD.s32 369, 345 | |
378 = IADD.s32 370, 346 | |
379 = IADD.s32 371, 347 | |
380 = IMUL.i32 356, 324 | |
381 = IMUL.i32 357, 325 | |
382 = IMUL.i32 358, 326 | |
383 = IMUL.i32 359, 327 | |
384 = IMUL.i32 360, 328 | |
385 = IMUL.i32 361, 329 | |
386 = IMUL.i32 362, 330 | |
387 = IMUL.i32 363, 331 | |
388 = IADD.s32 380, 356 | |
389 = IADD.s32 381, 357 | |
390 = IADD.s32 382, 358 | |
391 = IADD.s32 383, 359 | |
392 = IADD.s32 384, 360 | |
393 = IADD.s32 385, 361 | |
394 = IADD.s32 386, 362 | |
395 = IADD.s32 387, 363 | |
396 = IMUL.i32 372, 340 | |
397 = IMUL.i32 373, 341 | |
398 = IMUL.i32 374, 342 | |
399 = IMUL.i32 375, 343 | |
400 = IMUL.i32 376, 344 | |
401 = IMUL.i32 377, 345 | |
402 = IMUL.i32 378, 346 | |
403 = IMUL.i32 379, 347 | |
404 = IADD.s32 396, 372 | |
405 = IADD.s32 397, 373 | |
406 = IADD.s32 398, 374 | |
407 = IADD.s32 399, 375 | |
408 = IADD.s32 400, 376 | |
409 = IADD.s32 401, 377 | |
410 = IADD.s32 402, 378 | |
411 = IADD.s32 403, 379 | |
412 = IMUL.i32 388, 356 | |
413 = IMUL.i32 389, 357 | |
414 = IMUL.i32 390, 358 | |
415 = IMUL.i32 391, 359 | |
416 = IMUL.i32 392, 360 | |
417 = IMUL.i32 393, 361 | |
418 = IMUL.i32 394, 362 | |
419 = IMUL.i32 395, 363 | |
420 = IADD.s32 412, 388 | |
421 = IADD.s32 413, 389 | |
422 = IADD.s32 414, 390 | |
423 = IADD.s32 415, 391 | |
424 = IADD.s32 416, 392 | |
425 = IADD.s32 417, 393 | |
426 = IADD.s32 418, 394 | |
427 = IADD.s32 419, 395 | |
428 = IMUL.i32 404, 372 | |
429 = IMUL.i32 405, 373 | |
430 = IMUL.i32 406, 374 | |
431 = IMUL.i32 407, 375 | |
432 = IMUL.i32 408, 376 | |
433 = IMUL.i32 409, 377 | |
434 = IMUL.i32 410, 378 | |
435 = IMUL.i32 411, 379 | |
436 = IADD.s32 428, 404 | |
437 = IADD.s32 429, 405 | |
438 = IADD.s32 430, 406 | |
439 = IADD.s32 431, 407 | |
440 = IADD.s32 432, 408 | |
441 = IADD.s32 433, 409 | |
442 = IADD.s32 434, 410 | |
443 = IADD.s32 435, 411 | |
444 = IMUL.i32 420, 388 | |
445 = IMUL.i32 421, 389 | |
446 = IMUL.i32 422, 390 | |
447 = IMUL.i32 423, 391 | |
448 = IMUL.i32 424, 392 | |
449 = IMUL.i32 425, 393 | |
450 = IMUL.i32 426, 394 | |
451 = IMUL.i32 427, 395 | |
452 = IADD.s32 444, 420 | |
453 = IADD.s32 445, 421 | |
454 = IADD.s32 446, 422 | |
455 = IADD.s32 447, 423 | |
456 = IADD.s32 448, 424 | |
457 = IADD.s32 449, 425 | |
458 = IADD.s32 450, 426 | |
459 = IADD.s32 451, 427 | |
460 = IMUL.i32 436, 404 | |
461 = IMUL.i32 437, 405 | |
462 = IMUL.i32 438, 406 | |
463 = IMUL.i32 439, 407 | |
464 = IMUL.i32 440, 408 | |
465 = IMUL.i32 441, 409 | |
466 = IMUL.i32 442, 410 | |
467 = IMUL.i32 443, 411 | |
468 = IADD.s32 460, 436 | |
469 = IADD.s32 461, 437 | |
470 = IADD.s32 462, 438 | |
471 = IADD.s32 463, 439 | |
472 = IADD.s32 464, 440 | |
473 = IADD.s32 465, 441 | |
474 = IADD.s32 466, 442 | |
475 = IADD.s32 467, 443 | |
476 = IMUL.i32 452, 420 | |
477 = IMUL.i32 453, 421 | |
478 = IMUL.i32 454, 422 | |
479 = IMUL.i32 455, 423 | |
480 = IMUL.i32 456, 424 | |
481 = IMUL.i32 457, 425 | |
482 = IMUL.i32 458, 426 | |
483 = IMUL.i32 459, 427 | |
484 = IADD.s32 476, 452 | |
485 = IADD.s32 477, 453 | |
486 = IADD.s32 478, 454 | |
487 = IADD.s32 479, 455 | |
488 = IADD.s32 480, 456 | |
489 = IADD.s32 481, 457 | |
490 = IADD.s32 482, 458 | |
491 = IADD.s32 483, 459 | |
492 = IMUL.i32 468, 436 | |
493 = IMUL.i32 469, 437 | |
494 = IMUL.i32 470, 438 | |
495 = IMUL.i32 471, 439 | |
496 = IMUL.i32 472, 440 | |
497 = IMUL.i32 473, 441 | |
498 = IMUL.i32 474, 442 | |
499 = IMUL.i32 475, 443 | |
500 = IADD.s32 492, 468 | |
501 = IADD.s32 493, 469 | |
502 = IADD.s32 494, 470 | |
503 = IADD.s32 495, 471 | |
504 = IADD.s32 496, 472 | |
505 = IADD.s32 497, 473 | |
506 = IADD.s32 498, 474 | |
507 = IADD.s32 499, 475 | |
508 = IMUL.i32 484, 452 | |
509 = IMUL.i32 485, 453 | |
510 = IMUL.i32 486, 454 | |
511 = IMUL.i32 487, 455 | |
512 = IMUL.i32 488, 456 | |
513 = IMUL.i32 489, 457 | |
514 = IMUL.i32 490, 458 | |
515 = IMUL.i32 491, 459 | |
516 = IADD.s32 508, 484 | |
517 = IADD.s32 509, 485 | |
518 = IADD.s32 510, 486 | |
519 = IADD.s32 511, 487 | |
520 = IADD.s32 512, 488 | |
521 = IADD.s32 513, 489 | |
522 = IADD.s32 514, 490 | |
523 = IADD.s32 515, 491 | |
524 = IMUL.i32 500, 468 | |
525 = IMUL.i32 501, 469 | |
526 = IMUL.i32 502, 470 | |
527 = IMUL.i32 503, 471 | |
528 = IMUL.i32 504, 472 | |
529 = IMUL.i32 505, 473 | |
530 = IMUL.i32 506, 474 | |
531 = IMUL.i32 507, 475 | |
532 = IADD.s32 524, 500 | |
533 = IADD.s32 525, 501 | |
534 = IADD.s32 526, 502 | |
535 = IADD.s32 527, 503 | |
536 = IADD.s32 528, 504 | |
537 = IADD.s32 529, 505 | |
538 = IADD.s32 530, 506 | |
539 = IADD.s32 531, 507 | |
540 = IMUL.i32 516, 484 | |
541 = IMUL.i32 517, 485 | |
542 = IMUL.i32 518, 486 | |
543 = IMUL.i32 519, 487 | |
544 = IMUL.i32 520, 488 | |
545 = IMUL.i32 521, 489 | |
546 = IMUL.i32 522, 490 | |
547 = IMUL.i32 523, 491 | |
548 = IADD.s32 540, 516 | |
549 = IADD.s32 541, 517 | |
550 = IADD.s32 542, 518 | |
551 = IADD.s32 543, 519 | |
552 = IADD.s32 544, 520 | |
553 = IADD.s32 545, 521 | |
554 = IADD.s32 546, 522 | |
555 = IADD.s32 547, 523 | |
556 = IMUL.i32 532, 500 | |
557 = IMUL.i32 533, 501 | |
558 = IMUL.i32 534, 502 | |
559 = IMUL.i32 535, 503 | |
560 = IMUL.i32 536, 504 | |
561 = IMUL.i32 537, 505 | |
562 = IMUL.i32 538, 506 | |
563 = IMUL.i32 539, 507 | |
564 = IADD.s32 556, 532 | |
565 = IADD.s32 557, 533 | |
566 = IADD.s32 558, 534 | |
567 = IADD.s32 559, 535 | |
568 = IADD.s32 560, 536 | |
569 = IADD.s32 561, 537 | |
570 = IADD.s32 562, 538 | |
571 = IADD.s32 563, 539 | |
572 = IMUL.i32 548, 516 | |
573 = IMUL.i32 549, 517 | |
574 = IMUL.i32 550, 518 | |
575 = IMUL.i32 551, 519 | |
576 = IMUL.i32 552, 520 | |
577 = IMUL.i32 553, 521 | |
578 = IMUL.i32 554, 522 | |
579 = IMUL.i32 555, 523 | |
580 = IADD.s32 572, 548 | |
581 = IADD.s32 573, 549 | |
582 = IADD.s32 574, 550 | |
583 = IADD.s32 575, 551 | |
584 = IADD.s32 576, 552 | |
585 = IADD.s32 577, 553 | |
586 = IADD.s32 578, 554 | |
587 = IADD.s32 579, 555 | |
588 = IADD.s32 74, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
589 = IADD.s32 54, 46 | |
590 = IADD.s32 55, 47 | |
591 = IADD.s32 56, 48 | |
592 = IADD.s32 57, 49 | |
593 = IADD.s32 50, 42 | |
594 = IADD.s32 51, 43 | |
595 = IADD.s32 52, 44 | |
596 = IADD.s32 53, 45 | |
597 = IADD.s32 589, 593 | |
598 = IADD.s32 590, 594 | |
599 = IADD.s32 591, 595 | |
600 = IADD.s32 592, 596 | |
601 = IADD.s32 597, 599 | |
602 = IADD.s32 598, 600 | |
603 = LSHIFT_OR.i32 22, #0x0, #0x2.b0 | |
604 = IADD.s32 601, 602 | |
608 = IADD.s32 u0, 603 | |
610 = ICMP.u32.i1.lt 608, u0 | |
611 = IADD.s32 610, u0[1] | |
STORE.i32 604, 608, 611, byte_offset:0 | |
} from block2 | |
block0 { | |
r0 = LSHIFT_OR.i32 r61, #0x0, #0x8.b0 | |
r0 = IADD.s32 r60, r0 | |
r1 = MKVEC.v2i16 #0x0.h00, r62.h00 | |
r0 = IADD.s32 r0, r1 | |
r1 = MOV.i32 #0x1 | |
r1 = IADD.s32 u1, r1 | |
r2 = MOV.i32 #0x2 | |
r2 = IADD.s32 u1, r2 | |
r3 = MOV.i32 #0x3 | |
r3 = IADD.s32 u1, r3 | |
r4 = MOV.i32 #0x4 | |
r4 = IADD.s32 u1, r4 | |
r5 = MOV.i32 #0x5 | |
r5 = IADD.s32 u1, r5 | |
r6 = MOV.i32 #0x6 | |
r6 = IADD.s32 u1, r6 | |
r7 = MOV.i32 #0x7 | |
r7 = IADD.s32 u1, r7 | |
r8 = MOV.i32 #0x8 | |
r8 = IADD.s32 u1, r8 | |
r9 = MOV.i32 #0x9 | |
r9 = IADD.s32 u1, r9 | |
r10 = MOV.i32 #0xa | |
r10 = IADD.s32 u1, r10 | |
r11 = MOV.i32 #0xb | |
r11 = IADD.s32 u1, r11 | |
r12 = MOV.i32 #0xc | |
r12 = IADD.s32 u1, r12 | |
r13 = MOV.i32 #0xd | |
r13 = IADD.s32 u1, r13 | |
r14 = MOV.i32 #0xe | |
r14 = IADD.s32 u1, r14 | |
r15 = MOV.i32 #0xf | |
r15 = IADD.s32 u1, r15 | |
r16 = U32_TO_F32 r0 | |
r16 = FMA.f32 r16, #0x2edbe6ff, #0x0.neg | |
r16 = F32_TO_S32.rtz r16 | |
r17 = MOV.i32 r16 | |
r18 = MOV.i32 r16 | |
r19 = MOV.i32 r16 | |
r20 = MOV.i32 r16 | |
r21 = MOV.i32 r16 | |
r22 = MOV.i32 r16 | |
r23 = MOV.i32 r16 | |
r24 = MOV.i32 r16 | |
r25 = MOV.i32 r16 | |
r26 = MOV.i32 r16 | |
r27 = MOV.i32 r16 | |
r28 = MOV.i32 r16 | |
r29 = MOV.i32 r16 | |
r30 = MOV.i32 r16 | |
r31 = MOV.i32 r16 | |
r32 = MOV.i32 u1 | |
r33 = MOV.i32 #0x0 | |
} -> block1 | |
block1 { | |
r34 = ICMP.s32.m1.ge r33, #0x8 | |
BRANCHZ.i16.eq r34.h00, #0x0 -> block3 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
JUMP #0x0 -> block5 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r32 = IMUL.i32 r28, r32 | |
r1 = IMUL.i32 r29, r1 | |
r2 = IMUL.i32 r30, r2 | |
r3 = IMUL.i32 r31, r3 | |
r4 = IMUL.i32 r24, r4 | |
r5 = IMUL.i32 r25, r5 | |
r6 = IMUL.i32 r26, r6 | |
r7 = IMUL.i32 r27, r7 | |
r32 = IADD.s32 r32, r28 | |
r1 = IADD.s32 r1, r29 | |
r2 = IADD.s32 r2, r30 | |
r3 = IADD.s32 r3, r31 | |
r4 = IADD.s32 r4, r24 | |
r5 = IADD.s32 r5, r25 | |
r6 = IADD.s32 r6, r26 | |
r7 = IADD.s32 r7, r27 | |
r8 = IMUL.i32 r20, r8 | |
r9 = IMUL.i32 r21, r9 | |
r10 = IMUL.i32 r22, r10 | |
r11 = IMUL.i32 r23, r11 | |
r12 = IMUL.i32 r16, r12 | |
r13 = IMUL.i32 r17, r13 | |
r14 = IMUL.i32 r18, r14 | |
r15 = IMUL.i32 r19, r15 | |
r8 = IADD.s32 r8, r20 | |
r9 = IADD.s32 r9, r21 | |
r10 = IADD.s32 r10, r22 | |
r11 = IADD.s32 r11, r23 | |
r12 = IADD.s32 r12, r16 | |
r13 = IADD.s32 r13, r17 | |
r14 = IADD.s32 r14, r18 | |
r15 = IADD.s32 r15, r19 | |
r28 = IMUL.i32 r32, r28 | |
r29 = IMUL.i32 r1, r29 | |
r30 = IMUL.i32 r2, r30 | |
r31 = IMUL.i32 r3, r31 | |
r24 = IMUL.i32 r4, r24 | |
r25 = IMUL.i32 r5, r25 | |
r26 = IMUL.i32 r6, r26 | |
r27 = IMUL.i32 r7, r27 | |
r28 = IADD.s32 r28, r32 | |
r29 = IADD.s32 r29, r1 | |
r30 = IADD.s32 r30, r2 | |
r31 = IADD.s32 r31, r3 | |
r24 = IADD.s32 r24, r4 | |
r25 = IADD.s32 r25, r5 | |
r26 = IADD.s32 r26, r6 | |
r27 = IADD.s32 r27, r7 | |
r20 = IMUL.i32 r8, r20 | |
r21 = IMUL.i32 r9, r21 | |
r22 = IMUL.i32 r10, r22 | |
r23 = IMUL.i32 r11, r23 | |
r16 = IMUL.i32 r12, r16 | |
r17 = IMUL.i32 r13, r17 | |
r18 = IMUL.i32 r14, r18 | |
r19 = IMUL.i32 r15, r19 | |
r20 = IADD.s32 r20, r8 | |
r21 = IADD.s32 r21, r9 | |
r22 = IADD.s32 r22, r10 | |
r23 = IADD.s32 r23, r11 | |
r16 = IADD.s32 r16, r12 | |
r17 = IADD.s32 r17, r13 | |
r18 = IADD.s32 r18, r14 | |
r19 = IADD.s32 r19, r15 | |
r33 = IADD.s32 r33, #0x1 | |
JUMP #0x0 -> block1 | |
} -> block1 from block3 | |
block5 { | |
r1 = IADD.s32 r28, r20 | |
r2 = IADD.s32 r29, r21 | |
r3 = IADD.s32 r30, r22 | |
r4 = IADD.s32 r31, r23 | |
r5 = IADD.s32 r24, r16 | |
r6 = IADD.s32 r25, r17 | |
r7 = IADD.s32 r26, r18 | |
r8 = IADD.s32 r27, r19 | |
r1 = IADD.s32 r1, r5 | |
r2 = IADD.s32 r2, r6 | |
r3 = IADD.s32 r3, r7 | |
r4 = IADD.s32 r4, r8 | |
r1 = IADD.s32 r1, r3 | |
r2 = IADD.s32 r2, r4 | |
r0 = LSHIFT_OR.i32 r0, #0x0, #0x2.b0 | |
r1 = IADD.s32 r1, r2 | |
r0 = IADD.s32 u0, r0 | |
r2 = ICMP.u32.i1.lt r0, u0 | |
r2 = IADD.s32 r2, u0[1] | |
STORE.i32 r1, r0, r2, byte_offset:0 | |
} from block2 | |
block0 { | |
id(0) nbb | |
* _.h00 = LSHIFT_OR.i32 r61, t, fau.y.b0 | |
+ _.h00 = IADD.s32 r60, t | |
* _.h00 = MKVEC.v2i16 t.h00, r62.h00 | |
+ r0 = IADD.s32 t1, t | |
* r1 = MOV.i32 fau.x | |
+ r2 = MOV.i32 fau.y | |
* r3 = MOV.i32 fau.x | |
+ r4 = MOV.i32 fau.y | |
* r5 = MOV.i32 fau.x | |
+ r6 = MOV.i32 fau.y | |
* NOP | |
+ r7 = MOV.i32 fau.x | |
200000001 400000003 600000005 800000007 | |
id(0) nbb | |
* r8 = MOV.i32 fau.x | |
+ r9 = MOV.i32 fau.y | |
* r10 = MOV.i32 fau.x | |
+ r11 = MOV.i32 fau.y | |
* r12 = MOV.i32 fau.x | |
+ r13 = MOV.i32 fau.y | |
* r14 = MOV.i32 fau.x | |
+ _.h00 = U32_TO_F32 r0 | |
* _.h00 = FMA.f32 t1, fau.y, t.neg | |
+ r16 = F32_TO_S32.rtz t | |
* r15 = MOV.i32 fau.y | |
+ r17 = MOV.i32 t1 | |
* NOP | |
+ r18 = MOV.i32 r16 | |
900000008 b0000000a d0000000c 2edbe6ff0000000e f00000000 | |
id(0) nbb | |
* r19 = MOV.i32 r16 | |
+ r20 = MOV.i32 r16 | |
* r21 = MOV.i32 r16 | |
+ r1 = IADD.s32 fau.x, r1 | |
* r22 = MOV.i32 r16 | |
+ r2 = IADD.s32 fau.x, r2 | |
* r23 = MOV.i32 r16 | |
+ r3 = IADD.s32 fau.x, r3 | |
* r24 = MOV.i32 r16 | |
+ r4 = IADD.s32 fau.x, r4 | |
* r25 = MOV.i32 r16 | |
+ r5 = IADD.s32 fau.x, r5 | |
* r26 = MOV.i32 r16 | |
+ r6 = IADD.s32 fau.x, r6 | |
* NOP | |
+ r7 = IADD.s32 fau.x, r7 | |
id(0) nbb r_uncond | |
* r27 = MOV.i32 r16 | |
+ r8 = IADD.s32 fau.x, r8 | |
* r28 = MOV.i32 r16 | |
+ r9 = IADD.s32 fau.x, r9 | |
* r29 = MOV.i32 r16 | |
+ r10 = IADD.s32 fau.x, r10 | |
* r30 = MOV.i32 r16 | |
+ r11 = IADD.s32 fau.x, r11 | |
* r31 = MOV.i32 r16 | |
+ r12 = IADD.s32 fau.x, r12 | |
* r32 = MOV.i32 fau.x | |
+ r13 = IADD.s32 fau.x, r13 | |
* r33 = MOV.i32 t | |
+ r14 = IADD.s32 fau.x, r14 | |
* NOP | |
+ r15 = IADD.s32 fau.x, r15 | |
} -> block1 | |
block1 { | |
id(0) nbb r_uncond pcrel(0) | |
* NOP | |
+ _.h00 = ICMP.s32.m1.ge r33, fau.x | |
* NOP | |
+ BRANCHZ.i16.eq t1.h00, fau.y -> block3 | |
4000000000000008 | |
} -> block3 block2 from block0 block4 | |
block2 { | |
id(0) nbb no_prefetch pcrel(0) | |
* NOP | |
+ JUMP fau.y -> block5 | |
4000000000000000 | |
} -> block5 from block1 | |
block3 { | |
} -> block4 from block1 | |
block4 { | |
id(0) nbb | |
* r1 = IMUL.i32 r29, r1 | |
+ NOP | |
* r8 = IMUL.i32 r20, r8 | |
+ NOP | |
* r12 = IMUL.i32 r16, r12 | |
+ NOP | |
* NOP | |
+ r1 = IADD.s32 r1, r29 | |
id(0) nbb | |
* r29 = IMUL.i32 r1, r29 | |
+ NOP | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* r17 = IMUL.i32 t1, r17 | |
+ NOP | |
* _.h00 = IMUL.i32 r18, r14 | |
+ r14 = IADD.s32 t, r18 | |
* r18 = IMUL.i32 t1, r18 | |
+ NOP | |
* _.h00 = IMUL.i32 r28, r32 | |
+ r32 = IADD.s32 t, r28 | |
* _.h00 = IMUL.i32 t1, r28 | |
+ r28 = IADD.s32 t, t1 | |
* r32 = IMUL.i32 t1, r32 | |
+ NOP | |
id(0) nbb | |
* _.h00 = IMUL.i32 r27, r7 | |
+ r7 = IADD.s32 t, r27 | |
* _.h00 = IMUL.i32 t1, r27 | |
+ r27 = IADD.s32 t, t1 | |
* r7 = IMUL.i32 t1, r7 | |
+ NOP | |
* _.h00 = IMUL.i32 r31, r3 | |
+ r3 = IADD.s32 t, r31 | |
* _.h00 = IMUL.i32 t1, r31 | |
+ r31 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* r31 = IMUL.i32 t1, r31 | |
+ NOP | |
* NOP | |
+ r18 = IADD.s32 r18, r14 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r18, r14 | |
+ r14 = IADD.s32 t, r18 | |
* r18 = IMUL.i32 t1, r18 | |
+ NOP | |
* _.h00 = IMUL.i32 r30, r2 | |
+ r2 = IADD.s32 t, r30 | |
* _.h00 = IMUL.i32 t1, r30 | |
+ r30 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r30 | |
+ r30 = IADD.s32 t, t1 | |
* r2 = IMUL.i32 t1, r2 | |
+ NOP | |
* NOP | |
+ r7 = IADD.s32 r7, r27 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r7, r27 | |
+ r27 = IADD.s32 t, r7 | |
* r7 = IMUL.i32 t1, r7 | |
+ NOP | |
* _.h00 = IMUL.i32 r21, r9 | |
+ r9 = IADD.s32 t, r21 | |
* _.h00 = IMUL.i32 t1, r21 | |
+ r21 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r9 | |
+ r9 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r21 | |
+ r21 = IADD.s32 t, t1 | |
* r9 = IMUL.i32 t1, r9 | |
+ NOP | |
* NOP | |
+ r18 = IADD.s32 r18, r14 | |
id(0) nbb | |
* r14 = IMUL.i32 r18, r14 | |
+ NOP | |
* _.h00 = IMUL.i32 r25, r5 | |
+ r5 = IADD.s32 t, r25 | |
* _.h00 = IMUL.i32 t1, r25 | |
+ r25 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r25 | |
+ r25 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* r25 = IMUL.i32 t1, r25 | |
+ NOP | |
* NOP | |
+ r9 = IADD.s32 r9, r21 | |
id(0) nbb | |
* r21 = IMUL.i32 r9, r21 | |
+ NOP | |
* _.h00 = IMUL.i32 r23, r11 | |
+ r11 = IADD.s32 t, r23 | |
* _.h00 = IMUL.i32 t1, r23 | |
+ r23 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r11 | |
+ r11 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r23 | |
+ r23 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r11 | |
+ r11 = IADD.s32 t, t1 | |
* r23 = IMUL.i32 t1, r23 | |
+ NOP | |
* NOP | |
+ r25 = IADD.s32 r25, r5 | |
id(0) nbb | |
* r5 = IMUL.i32 r25, r5 | |
+ NOP | |
* _.h00 = IMUL.i32 r26, r6 | |
+ r6 = IADD.s32 t, r26 | |
* _.h00 = IMUL.i32 t1, r26 | |
+ r26 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r26 | |
+ r26 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r6 | |
+ r6 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r26 | |
+ r26 = IADD.s32 t, t1 | |
* r6 = IMUL.i32 t1, r6 | |
+ NOP | |
id(0) nbb | |
* _.h00 = IMUL.i32 r19, r15 | |
+ r15 = IADD.s32 t, r19 | |
* _.h00 = IMUL.i32 t1, r19 | |
+ r19 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r19 | |
+ r19 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r19 | |
+ r19 = IADD.s32 t, t1 | |
* r15 = IMUL.i32 t1, r15 | |
+ NOP | |
* NOP | |
+ r8 = IADD.s32 r8, r20 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r8, r20 | |
+ r20 = IADD.s32 t, r8 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r20 | |
+ r20 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r20 | |
+ r20 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* r20 = IMUL.i32 t1, r20 | |
+ NOP | |
* NOP | |
+ r12 = IADD.s32 r12, r16 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r12, r16 | |
+ r16 = IADD.s32 t, r12 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r16 | |
+ r16 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r16 | |
+ r16 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r12 | |
+ r12 = IADD.s32 t, t1 | |
* r16 = IMUL.i32 t1, r16 | |
+ NOP | |
* NOP | |
+ r29 = IADD.s32 r29, r1 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r29, r1 | |
+ r1 = IADD.s32 t, r29 | |
* _.h00 = IMUL.i32 t1, r29 | |
+ r29 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r29 | |
+ r29 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r29 | |
+ r29 = IADD.s32 t, t1 | |
* r1 = IMUL.i32 t1, r1 | |
+ NOP | |
* NOP | |
+ r32 = IADD.s32 r32, r28 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r32, r28 | |
+ r28 = IADD.s32 t, r32 | |
* _.h00 = IMUL.i32 t1, r32 | |
+ r32 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r28 | |
+ r28 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r32 | |
+ r32 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r28 | |
+ r28 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r32 | |
+ r32 = IADD.s32 t, t1 | |
* r28 = IMUL.i32 t1, r28 | |
+ NOP | |
* NOP | |
+ r31 = IADD.s32 r31, r3 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r31, r3 | |
+ r3 = IADD.s32 t, r31 | |
* _.h00 = IMUL.i32 t1, r31 | |
+ r31 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r31 | |
+ r31 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r3 | |
+ r3 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r31 | |
+ r31 = IADD.s32 t, t1 | |
* r3 = IMUL.i32 t1, r3 | |
+ NOP | |
* NOP | |
+ r2 = IADD.s32 r2, r30 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r2, r30 | |
+ r30 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r30 | |
+ r30 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r30 | |
+ r30 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r2 | |
+ r2 = IADD.s32 t, t1 | |
* r30 = IMUL.i32 t1, r30 | |
+ NOP | |
* NOP | |
+ r14 = IADD.s32 r14, r18 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r14, r18 | |
+ r18 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 t1, r14 | |
+ r14 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r18 | |
+ r18 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r14 | |
+ r14 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r18 | |
+ r18 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r14 | |
+ r14 = IADD.s32 t, t1 | |
* r18 = IMUL.i32 t1, r18 | |
+ NOP | |
* NOP | |
+ r21 = IADD.s32 r21, r9 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r21, r9 | |
+ r9 = IADD.s32 t, r21 | |
* _.h00 = IMUL.i32 t1, r21 | |
+ r21 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r9 | |
+ r9 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r21 | |
+ r21 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r9 | |
+ r9 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r21 | |
+ r21 = IADD.s32 t, t1 | |
* r9 = IMUL.i32 t1, r9 | |
+ NOP | |
* NOP | |
+ r23 = IADD.s32 r23, r11 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r23, r11 | |
+ r11 = IADD.s32 t, r23 | |
* _.h00 = IMUL.i32 t1, r23 | |
+ r23 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r11 | |
+ r11 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r23 | |
+ r23 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r11 | |
+ r11 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r23 | |
+ r23 = IADD.s32 t, t1 | |
* r11 = IMUL.i32 t1, r11 | |
+ NOP | |
* NOP | |
+ r5 = IADD.s32 r5, r25 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r5, r25 | |
+ r25 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r25 | |
+ r25 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r25 | |
+ r25 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r5 | |
+ r5 = IADD.s32 t, t1 | |
* r25 = IMUL.i32 t1, r25 | |
+ NOP | |
* NOP | |
+ r15 = IADD.s32 r15, r19 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r15, r19 | |
+ r19 = IADD.s32 t, r15 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r19 | |
+ r19 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r19 | |
+ r19 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r15 | |
+ r15 = IADD.s32 t, t1 | |
* r19 = IMUL.i32 t1, r19 | |
+ NOP | |
* NOP | |
+ r20 = IADD.s32 r20, r8 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r20, r8 | |
+ r8 = IADD.s32 t, r20 | |
* _.h00 = IMUL.i32 t1, r20 | |
+ r20 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r20 | |
+ r20 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r8 | |
+ r8 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r20 | |
+ r20 = IADD.s32 t, t1 | |
* r8 = IMUL.i32 t1, r8 | |
+ NOP | |
* NOP | |
+ r1 = IADD.s32 r1, r29 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r1, r29 | |
+ r29 = IADD.s32 t, r1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r29 | |
+ r29 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r29 | |
+ r29 = IADD.s32 t, t1 | |
* _.h00 = IMUL.i32 t1, r1 | |
+ r1 = IADD.s32 t, t1 | |
* r29 = IMUL.i32 t1, r29 | |
+ NOP | |
* NOP | |
+ r9 = IADD.s32 r9, r21 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r9, r21 | |
+ r21 = IADD.s32 t, r9 | |
* _.h00 = IMUL.i32 t1, r9 | |
+ r9 = IADD.s32 t, t1 | |
* r21 = IMUL.i32 t1, r21 | |
+ NOP | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* NOP | |
+ r17 = IADD.s32 r17, r13 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 r13, r17 | |
+ r17 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* NOP | |
+ r7 = IADD.s32 r7, r27 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r7, r27 | |
+ r27 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 r13, r17 | |
+ r17 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* NOP | |
+ r6 = IADD.s32 r6, r26 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r27, r7 | |
+ r7 = IADD.s32 t, r27 | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r6, r26 | |
+ r26 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r27 | |
+ r27 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* NOP | |
+ r16 = IADD.s32 r16, r12 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r13, r17 | |
+ r17 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* _.h00 = IMUL.i32 r26, r6 | |
+ r6 = IADD.s32 t, r26 | |
* _.h00 = IMUL.i32 r27, r7 | |
+ r7 = IADD.s32 t, r27 | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* _.h00 = IMUL.i32 r16, r12 | |
+ r12 = IADD.s32 t, r16 | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* NOP | |
+ r28 = IADD.s32 r28, r32 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r6, r26 | |
+ r26 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r27 | |
+ r27 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 r12, r16 | |
+ r16 = IADD.s32 t, r12 | |
* _.h00 = IMUL.i32 r13, r17 | |
+ r17 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 r28, r32 | |
+ r32 = IADD.s32 t, r28 | |
* NOP | |
+ r3 = IADD.s32 r3, r31 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* _.h00 = IMUL.i32 r26, r6 | |
+ r6 = IADD.s32 t, r26 | |
* _.h00 = IMUL.i32 r27, r7 | |
+ r7 = IADD.s32 t, r27 | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* _.h00 = IMUL.i32 r16, r12 | |
+ r12 = IADD.s32 t, r16 | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* _.h00 = IMUL.i32 r32, r28 | |
+ r28 = IADD.s32 t, r32 | |
* NOP | |
+ r30 = IADD.s32 r30, r2 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r3, r31 | |
+ r31 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r6, r26 | |
+ r26 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r27 | |
+ r27 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 r12, r16 | |
+ r16 = IADD.s32 t, r12 | |
* _.h00 = IMUL.i32 r13, r17 | |
+ r17 = IADD.s32 t, r13 | |
* NOP | |
+ r18 = IADD.s32 r18, r14 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r28, r32 | |
+ r32 = IADD.s32 t, r28 | |
* _.h00 = IMUL.i32 r30, r2 | |
+ r2 = IADD.s32 t, r30 | |
* _.h00 = IMUL.i32 r31, r3 | |
+ r3 = IADD.s32 t, r31 | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* _.h00 = IMUL.i32 r26, r6 | |
+ r6 = IADD.s32 t, r26 | |
* _.h00 = IMUL.i32 r27, r7 | |
+ r7 = IADD.s32 t, r27 | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* NOP | |
+ r11 = IADD.s32 r11, r23 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r16, r12 | |
+ r12 = IADD.s32 t, r16 | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* _.h00 = IMUL.i32 r18, r14 | |
+ r14 = IADD.s32 t, r18 | |
* _.h00 = IMUL.i32 r32, r28 | |
+ r28 = IADD.s32 t, r32 | |
* _.h00 = IMUL.i32 r2, r30 | |
+ r30 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r3, r31 | |
+ r31 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* NOP | |
+ r25 = IADD.s32 r25, r5 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r6, r26 | |
+ r26 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r27 | |
+ r27 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 r11, r23 | |
+ r23 = IADD.s32 t, r11 | |
* _.h00 = IMUL.i32 r12, r16 | |
+ r16 = IADD.s32 t, r12 | |
* _.h00 = IMUL.i32 r13, r17 | |
+ r17 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 r14, r18 | |
+ r18 = IADD.s32 t, r14 | |
* NOP | |
+ r19 = IADD.s32 r19, r15 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r28, r32 | |
+ r32 = IADD.s32 t, r28 | |
* _.h00 = IMUL.i32 r30, r2 | |
+ r2 = IADD.s32 t, r30 | |
* _.h00 = IMUL.i32 r31, r3 | |
+ r3 = IADD.s32 t, r31 | |
* _.h00 = IMUL.i32 r24, r4 | |
+ r4 = IADD.s32 t, r24 | |
* _.h00 = IMUL.i32 r25, r5 | |
+ r5 = IADD.s32 t, r25 | |
* _.h00 = IMUL.i32 r26, r6 | |
+ r6 = IADD.s32 t, r26 | |
* _.h00 = IMUL.i32 r27, r7 | |
+ r7 = IADD.s32 t, r27 | |
* NOP | |
+ r8 = IADD.s32 r8, r20 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r22, r10 | |
+ r10 = IADD.s32 t, r22 | |
* _.h00 = IMUL.i32 r23, r11 | |
+ r11 = IADD.s32 t, r23 | |
* _.h00 = IMUL.i32 r16, r12 | |
+ r12 = IADD.s32 t, r16 | |
* _.h00 = IMUL.i32 r17, r13 | |
+ r13 = IADD.s32 t, r17 | |
* _.h00 = IMUL.i32 r18, r14 | |
+ r14 = IADD.s32 t, r18 | |
* _.h00 = IMUL.i32 r19, r15 | |
+ r15 = IADD.s32 t, r19 | |
* _.h00 = IMUL.i32 r32, r28 | |
+ r28 = IADD.s32 t, r32 | |
* NOP | |
+ r29 = IADD.s32 r29, r1 | |
id(0) nbb | |
* _.h00 = IMUL.i32 r2, r30 | |
+ r30 = IADD.s32 t, r2 | |
* _.h00 = IMUL.i32 r3, r31 | |
+ r31 = IADD.s32 t, r3 | |
* _.h00 = IMUL.i32 r4, r24 | |
+ r24 = IADD.s32 t, r4 | |
* _.h00 = IMUL.i32 r5, r25 | |
+ r25 = IADD.s32 t, r5 | |
* _.h00 = IMUL.i32 r6, r26 | |
+ r26 = IADD.s32 t, r6 | |
* _.h00 = IMUL.i32 r7, r27 | |
+ r27 = IADD.s32 t, r7 | |
* _.h00 = IMUL.i32 r8, r20 | |
+ r20 = IADD.s32 t, r8 | |
* NOP | |
+ r21 = IADD.s32 r21, r9 | |
id(0) nbb r_uncond no_prefetch pcrel(1) | |
* _.h00 = IMUL.i32 r10, r22 | |
+ r22 = IADD.s32 t, r10 | |
* _.h00 = IMUL.i32 r11, r23 | |
+ r23 = IADD.s32 t, r11 | |
* _.h00 = IMUL.i32 r12, r16 | |
+ r16 = IADD.s32 t, r12 | |
* _.h00 = IMUL.i32 r13, r17 | |
+ r17 = IADD.s32 t, r13 | |
* _.h00 = IMUL.i32 r14, r18 | |
+ r18 = IADD.s32 t, r14 | |
* _.h00 = IMUL.i32 r15, r19 | |
+ r19 = IADD.s32 t, r15 | |
* NOP | |
+ r33 = IADD.s32 r33, fau.x | |
* NOP | |
+ JUMP fau.y -> block1 | |
0 4000000000000001 | |
} -> block1 from block3 | |
block5 { | |
id(0) nbb | |
* NOP | |
+ r1 = IADD.s32 r28, r20 | |
* NOP | |
+ r2 = IADD.s32 r29, r21 | |
* NOP | |
+ r3 = IADD.s32 r30, r22 | |
id(0) nbb | |
* NOP | |
+ r4 = IADD.s32 r31, r23 | |
* NOP | |
+ r5 = IADD.s32 r24, r16 | |
* NOP | |
+ r6 = IADD.s32 r25, r17 | |
* NOP | |
+ r7 = IADD.s32 r26, r18 | |
* NOP | |
+ r8 = IADD.s32 r27, r19 | |
* NOP | |
+ r1 = IADD.s32 r1, r5 | |
* NOP | |
+ r2 = IADD.s32 r2, r6 | |
* NOP | |
+ r3 = IADD.s32 r3, r7 | |
id(0) wait(0 ) nbb r_uncond | |
* NOP | |
+ r4 = IADD.s32 r4, r8 | |
* NOP | |
+ r1 = IADD.s32 r1, r3 | |
* NOP | |
+ _.h00 = IADD.s32 r2, r4 | |
* _.h00 = LSHIFT_OR.i32 r0, t, fau.y.b0 | |
+ r1 = IADD.s32 r1, t1 | |
* NOP | |
+ r0 = IADD.s32 fau.x, t0 | |
* NOP | |
+ _.h00 = ICMP.u32.i1.lt t1, fau.x | |
* NOP | |
+ _.h00 = IADD.s32 t1, fau.y | |
* NOP | |
+ STORE.i32 r1, r0, t1, byte_offset:0 | |
200000000 | |
} from block2 | |
slot 0 reads: r1 | |
clause_0: | |
ds(0) nbb ncph | |
{ | |
*LSHIFT_OR.i32 t0, r61, #0, 0x00000008 /* 0.000000 */ | |
+IADD.s32 t1, r60, t | |
*MKVEC.v2i16 t0, #0, r62 | |
+IADD.s32 r0:t1, t1, t | |
*MOV.i32 r1:t0, 0x00000001 /* 0.000000 */ | |
+MOV.i32 r2:t1, 0x00000002 /* 0.000000 */ | |
*MOV.i32 r3:t0, 0x00000003 /* 0.000000 */ | |
+MOV.i32 r4:t1, 0x00000004 /* 0.000000 */ | |
*MOV.i32 r5:t0, 0x00000005 /* 0.000000 */ | |
+MOV.i32 r6:t1, 0x00000006 /* 0.000000 */ | |
*NOP t0 | |
+MOV.i32 r7:t1, 0x00000007 /* 0.000000 */ | |
} | |
clause_7: | |
ds(0) nbb ncph | |
{ | |
*MOV.i32 r8:t0, 0x00000008 /* 0.000000 */ | |
+MOV.i32 r9:t1, 0x00000009 /* 0.000000 */ | |
*MOV.i32 r10:t0, 0x0000000a /* 0.000000 */ | |
+MOV.i32 r11:t1, 0x0000000b /* 0.000000 */ | |
*MOV.i32 r12:t0, 0x0000000c /* 0.000000 */ | |
+MOV.i32 r13:t1, 0x0000000d /* 0.000000 */ | |
*MOV.i32 r14:t0, 0x0000000e /* 0.000000 */ | |
+U32_TO_F32 t1, r0 | |
*FMA.f32 t0, t1, 0x2edbe6ff /* 0.000000 */, #0.neg | |
+F32_TO_S32.rtz r16:t1, t | |
*MOV.i32 r15:t0, 0x0000000f /* 0.000000 */ | |
+MOV.i32 r17:t1, t1 | |
*NOP t0 | |
+MOV.i32 r18:t1, r16 | |
} | |
clause_15: | |
ds(0) nbb ncph | |
{ | |
*MOV.i32 r19:t0, r16 | |
+MOV.i32 r20:t1, r16 | |
*MOV.i32 r21:t0, r16 | |
+IADD.s32 r1:t1, u1.w0, r1 | |
*MOV.i32 r22:t0, r16 | |
+IADD.s32 r2:t1, u1.w0, r2 | |
*MOV.i32 r23:t0, r16 | |
+IADD.s32 r3:t1, u1.w0, r3 | |
*MOV.i32 r24:t0, r16 | |
+IADD.s32 r4:t1, u1.w0, r4 | |
*MOV.i32 r25:t0, r16 | |
+IADD.s32 r5:t1, u1.w0, r5 | |
*MOV.i32 r26:t0, r16 | |
+IADD.s32 r6:t1, u1.w0, r6 | |
*NOP t0 | |
+IADD.s32 r7:t1, u1.w0, r7 | |
} | |
clause_21: | |
ds(0) nbb r_uncond ncph | |
{ | |
*MOV.i32 r27:t0, r16 | |
+IADD.s32 r8:t1, u1.w0, r8 | |
*MOV.i32 r28:t0, r16 | |
+IADD.s32 r9:t1, u1.w0, r9 | |
*MOV.i32 r29:t0, r16 | |
+IADD.s32 r10:t1, u1.w0, r10 | |
*MOV.i32 r30:t0, r16 | |
+IADD.s32 r11:t1, u1.w0, r11 | |
*MOV.i32 r31:t0, r16 | |
+IADD.s32 r12:t1, u1.w0, r12 | |
*MOV.i32 r32:t0, u1.w0 | |
+IADD.s32 r13:t1, u1.w0, r13 | |
*MOV.i32 r33:t0, #0 | |
+IADD.s32 r14:t1, u1.w0, r14 | |
*NOP t0 | |
+IADD.s32 r15:t1, u1.w0, r15 | |
} | |
clause_27: | |
ds(0) nbb r_uncond ncph | |
{ | |
*NOP t0 | |
+ICMP.s32.m1.ge t1, r33, 0x00000008 /* 0.000000 */ | |
*NOP t0 | |
+BRANCHZ.i16.eq t1, t1.h0, clause_32 | |
} | |
clause_30: | |
ds(0) nbb | |
{ | |
*NOP t0 | |
+JUMP t1, clause_252 | |
} | |
clause_32: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r1:t0, r29, r1 | |
+NOP t1 | |
*IMUL.i32 r8:t0, r20, r8 | |
+NOP t1 | |
*IMUL.i32 r12:t0, r16, r12 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r29 | |
} | |
clause_35: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r29:t0, r1, r29 | |
+NOP t1 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*IMUL.i32 r17:t0, t1, r17 | |
+NOP t1 | |
*IMUL.i32 t0, r18, r14 | |
+IADD.s32 r14:t1, t, r18 | |
*IMUL.i32 r18:t0, t1, r18 | |
+NOP t1 | |
*IMUL.i32 t0, r28, r32 | |
+IADD.s32 r32:t1, t, r28 | |
*IMUL.i32 t0, t1, r28 | |
+IADD.s32 r28:t1, t, t1 | |
*IMUL.i32 r32:t0, t1, r32 | |
+NOP t1 | |
} | |
clause_41: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r27, r7 | |
+IADD.s32 r7:t1, t, r27 | |
*IMUL.i32 t0, t1, r27 | |
+IADD.s32 r27:t1, t, t1 | |
*IMUL.i32 r7:t0, t1, r7 | |
+NOP t1 | |
*IMUL.i32 t0, r31, r3 | |
+IADD.s32 r3:t1, t, r31 | |
*IMUL.i32 t0, t1, r31 | |
+IADD.s32 r31:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 r31:t0, t1, r31 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r18:t1, r18, r14 | |
} | |
clause_47: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r18, r14 | |
+IADD.s32 r14:t1, t, r18 | |
*IMUL.i32 r18:t0, t1, r18 | |
+NOP t1 | |
*IMUL.i32 t0, r30, r2 | |
+IADD.s32 r2:t1, t, r30 | |
*IMUL.i32 t0, t1, r30 | |
+IADD.s32 r30:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r30 | |
+IADD.s32 r30:t1, t, t1 | |
*IMUL.i32 r2:t0, t1, r2 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r7:t1, r7, r27 | |
} | |
clause_53: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r7, r27 | |
+IADD.s32 r27:t1, t, r7 | |
*IMUL.i32 r7:t0, t1, r7 | |
+NOP t1 | |
*IMUL.i32 t0, r21, r9 | |
+IADD.s32 r9:t1, t, r21 | |
*IMUL.i32 t0, t1, r21 | |
+IADD.s32 r21:t1, t, t1 | |
*IMUL.i32 t0, t1, r9 | |
+IADD.s32 r9:t1, t, t1 | |
*IMUL.i32 t0, t1, r21 | |
+IADD.s32 r21:t1, t, t1 | |
*IMUL.i32 r9:t0, t1, r9 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r18:t1, r18, r14 | |
} | |
clause_59: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r14:t0, r18, r14 | |
+NOP t1 | |
*IMUL.i32 t0, r25, r5 | |
+IADD.s32 r5:t1, t, r25 | |
*IMUL.i32 t0, t1, r25 | |
+IADD.s32 r25:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 t0, t1, r25 | |
+IADD.s32 r25:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 r25:t0, t1, r25 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r9:t1, r9, r21 | |
} | |
clause_65: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r21:t0, r9, r21 | |
+NOP t1 | |
*IMUL.i32 t0, r23, r11 | |
+IADD.s32 r11:t1, t, r23 | |
*IMUL.i32 t0, t1, r23 | |
+IADD.s32 r23:t1, t, t1 | |
*IMUL.i32 t0, t1, r11 | |
+IADD.s32 r11:t1, t, t1 | |
*IMUL.i32 t0, t1, r23 | |
+IADD.s32 r23:t1, t, t1 | |
*IMUL.i32 t0, t1, r11 | |
+IADD.s32 r11:t1, t, t1 | |
*IMUL.i32 r23:t0, t1, r23 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r25:t1, r25, r5 | |
} | |
clause_71: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 r5:t0, r25, r5 | |
+NOP t1 | |
*IMUL.i32 t0, r26, r6 | |
+IADD.s32 r6:t1, t, r26 | |
*IMUL.i32 t0, t1, r26 | |
+IADD.s32 r26:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 t0, t1, r26 | |
+IADD.s32 r26:t1, t, t1 | |
*IMUL.i32 t0, t1, r6 | |
+IADD.s32 r6:t1, t, t1 | |
*IMUL.i32 t0, t1, r26 | |
+IADD.s32 r26:t1, t, t1 | |
*IMUL.i32 r6:t0, t1, r6 | |
+NOP t1 | |
} | |
clause_77: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r19, r15 | |
+IADD.s32 r15:t1, t, r19 | |
*IMUL.i32 t0, t1, r19 | |
+IADD.s32 r19:t1, t, t1 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 t0, t1, r19 | |
+IADD.s32 r19:t1, t, t1 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 t0, t1, r19 | |
+IADD.s32 r19:t1, t, t1 | |
*IMUL.i32 r15:t0, t1, r15 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r8:t1, r8, r20 | |
} | |
clause_83: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r8, r20 | |
+IADD.s32 r20:t1, t, r8 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r20 | |
+IADD.s32 r20:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r20 | |
+IADD.s32 r20:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 r20:t0, t1, r20 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r12:t1, r12, r16 | |
} | |
clause_89: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r12, r16 | |
+IADD.s32 r16:t1, t, r12 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 t0, t1, r16 | |
+IADD.s32 r16:t1, t, t1 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 t0, t1, r16 | |
+IADD.s32 r16:t1, t, t1 | |
*IMUL.i32 t0, t1, r12 | |
+IADD.s32 r12:t1, t, t1 | |
*IMUL.i32 r16:t0, t1, r16 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r29:t1, r29, r1 | |
} | |
clause_95: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r29, r1 | |
+IADD.s32 r1:t1, t, r29 | |
*IMUL.i32 t0, t1, r29 | |
+IADD.s32 r29:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r29 | |
+IADD.s32 r29:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r29 | |
+IADD.s32 r29:t1, t, t1 | |
*IMUL.i32 r1:t0, t1, r1 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r32:t1, r32, r28 | |
} | |
clause_101: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r32, r28 | |
+IADD.s32 r28:t1, t, r32 | |
*IMUL.i32 t0, t1, r32 | |
+IADD.s32 r32:t1, t, t1 | |
*IMUL.i32 t0, t1, r28 | |
+IADD.s32 r28:t1, t, t1 | |
*IMUL.i32 t0, t1, r32 | |
+IADD.s32 r32:t1, t, t1 | |
*IMUL.i32 t0, t1, r28 | |
+IADD.s32 r28:t1, t, t1 | |
*IMUL.i32 t0, t1, r32 | |
+IADD.s32 r32:t1, t, t1 | |
*IMUL.i32 r28:t0, t1, r28 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r31:t1, r31, r3 | |
} | |
clause_107: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r31, r3 | |
+IADD.s32 r3:t1, t, r31 | |
*IMUL.i32 t0, t1, r31 | |
+IADD.s32 r31:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r31 | |
+IADD.s32 r31:t1, t, t1 | |
*IMUL.i32 t0, t1, r3 | |
+IADD.s32 r3:t1, t, t1 | |
*IMUL.i32 t0, t1, r31 | |
+IADD.s32 r31:t1, t, t1 | |
*IMUL.i32 r3:t0, t1, r3 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r2:t1, r2, r30 | |
} | |
clause_113: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r2, r30 | |
+IADD.s32 r30:t1, t, r2 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r30 | |
+IADD.s32 r30:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 t0, t1, r30 | |
+IADD.s32 r30:t1, t, t1 | |
*IMUL.i32 t0, t1, r2 | |
+IADD.s32 r2:t1, t, t1 | |
*IMUL.i32 r30:t0, t1, r30 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r14:t1, r14, r18 | |
} | |
clause_119: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r14, r18 | |
+IADD.s32 r18:t1, t, r14 | |
*IMUL.i32 t0, t1, r14 | |
+IADD.s32 r14:t1, t, t1 | |
*IMUL.i32 t0, t1, r18 | |
+IADD.s32 r18:t1, t, t1 | |
*IMUL.i32 t0, t1, r14 | |
+IADD.s32 r14:t1, t, t1 | |
*IMUL.i32 t0, t1, r18 | |
+IADD.s32 r18:t1, t, t1 | |
*IMUL.i32 t0, t1, r14 | |
+IADD.s32 r14:t1, t, t1 | |
*IMUL.i32 r18:t0, t1, r18 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r21:t1, r21, r9 | |
} | |
clause_125: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r21, r9 | |
+IADD.s32 r9:t1, t, r21 | |
*IMUL.i32 t0, t1, r21 | |
+IADD.s32 r21:t1, t, t1 | |
*IMUL.i32 t0, t1, r9 | |
+IADD.s32 r9:t1, t, t1 | |
*IMUL.i32 t0, t1, r21 | |
+IADD.s32 r21:t1, t, t1 | |
*IMUL.i32 t0, t1, r9 | |
+IADD.s32 r9:t1, t, t1 | |
*IMUL.i32 t0, t1, r21 | |
+IADD.s32 r21:t1, t, t1 | |
*IMUL.i32 r9:t0, t1, r9 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r23:t1, r23, r11 | |
} | |
clause_131: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r23, r11 | |
+IADD.s32 r11:t1, t, r23 | |
*IMUL.i32 t0, t1, r23 | |
+IADD.s32 r23:t1, t, t1 | |
*IMUL.i32 t0, t1, r11 | |
+IADD.s32 r11:t1, t, t1 | |
*IMUL.i32 t0, t1, r23 | |
+IADD.s32 r23:t1, t, t1 | |
*IMUL.i32 t0, t1, r11 | |
+IADD.s32 r11:t1, t, t1 | |
*IMUL.i32 t0, t1, r23 | |
+IADD.s32 r23:t1, t, t1 | |
*IMUL.i32 r11:t0, t1, r11 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r5:t1, r5, r25 | |
} | |
clause_137: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r5, r25 | |
+IADD.s32 r25:t1, t, r5 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 t0, t1, r25 | |
+IADD.s32 r25:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 t0, t1, r25 | |
+IADD.s32 r25:t1, t, t1 | |
*IMUL.i32 t0, t1, r5 | |
+IADD.s32 r5:t1, t, t1 | |
*IMUL.i32 r25:t0, t1, r25 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r15:t1, r15, r19 | |
} | |
clause_143: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r15, r19 | |
+IADD.s32 r19:t1, t, r15 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 t0, t1, r19 | |
+IADD.s32 r19:t1, t, t1 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 t0, t1, r19 | |
+IADD.s32 r19:t1, t, t1 | |
*IMUL.i32 t0, t1, r15 | |
+IADD.s32 r15:t1, t, t1 | |
*IMUL.i32 r19:t0, t1, r19 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r20:t1, r20, r8 | |
} | |
clause_149: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r20, r8 | |
+IADD.s32 r8:t1, t, r20 | |
*IMUL.i32 t0, t1, r20 | |
+IADD.s32 r20:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r20 | |
+IADD.s32 r20:t1, t, t1 | |
*IMUL.i32 t0, t1, r8 | |
+IADD.s32 r8:t1, t, t1 | |
*IMUL.i32 t0, t1, r20 | |
+IADD.s32 r20:t1, t, t1 | |
*IMUL.i32 r8:t0, t1, r8 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r29 | |
} | |
clause_155: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r1, r29 | |
+IADD.s32 r29:t1, t, r1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r29 | |
+IADD.s32 r29:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 t0, t1, r29 | |
+IADD.s32 r29:t1, t, t1 | |
*IMUL.i32 t0, t1, r1 | |
+IADD.s32 r1:t1, t, t1 | |
*IMUL.i32 r29:t0, t1, r29 | |
+NOP t1 | |
*NOP t0 | |
+IADD.s32 r9:t1, r9, r21 | |
} | |
clause_161: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r9, r21 | |
+IADD.s32 r21:t1, t, r9 | |
*IMUL.i32 t0, t1, r9 | |
+IADD.s32 r9:t1, t, t1 | |
*IMUL.i32 r21:t0, t1, r21 | |
+NOP t1 | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*NOP t0 | |
+IADD.s32 r17:t1, r17, r13 | |
} | |
clause_167: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*IMUL.i32 t0, r13, r17 | |
+IADD.s32 r17:t1, t, r13 | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*NOP t0 | |
+IADD.s32 r7:t1, r7, r27 | |
} | |
clause_173: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*IMUL.i32 t0, r7, r27 | |
+IADD.s32 r27:t1, t, r7 | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*IMUL.i32 t0, r13, r17 | |
+IADD.s32 r17:t1, t, r13 | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*NOP t0 | |
+IADD.s32 r6:t1, r6, r26 | |
} | |
clause_179: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r27, r7 | |
+IADD.s32 r7:t1, t, r27 | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*IMUL.i32 t0, r6, r26 | |
+IADD.s32 r26:t1, t, r6 | |
*IMUL.i32 t0, r7, r27 | |
+IADD.s32 r27:t1, t, r7 | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*NOP t0 | |
+IADD.s32 r16:t1, r16, r12 | |
} | |
clause_185: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r13, r17 | |
+IADD.s32 r17:t1, t, r13 | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*IMUL.i32 t0, r26, r6 | |
+IADD.s32 r6:t1, t, r26 | |
*IMUL.i32 t0, r27, r7 | |
+IADD.s32 r7:t1, t, r27 | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*IMUL.i32 t0, r16, r12 | |
+IADD.s32 r12:t1, t, r16 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*NOP t0 | |
+IADD.s32 r28:t1, r28, r32 | |
} | |
clause_191: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*IMUL.i32 t0, r6, r26 | |
+IADD.s32 r26:t1, t, r6 | |
*IMUL.i32 t0, r7, r27 | |
+IADD.s32 r27:t1, t, r7 | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*IMUL.i32 t0, r12, r16 | |
+IADD.s32 r16:t1, t, r12 | |
*IMUL.i32 t0, r13, r17 | |
+IADD.s32 r17:t1, t, r13 | |
*IMUL.i32 t0, r28, r32 | |
+IADD.s32 r32:t1, t, r28 | |
*NOP t0 | |
+IADD.s32 r3:t1, r3, r31 | |
} | |
clause_197: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*IMUL.i32 t0, r26, r6 | |
+IADD.s32 r6:t1, t, r26 | |
*IMUL.i32 t0, r27, r7 | |
+IADD.s32 r7:t1, t, r27 | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*IMUL.i32 t0, r16, r12 | |
+IADD.s32 r12:t1, t, r16 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*IMUL.i32 t0, r32, r28 | |
+IADD.s32 r28:t1, t, r32 | |
*NOP t0 | |
+IADD.s32 r30:t1, r30, r2 | |
} | |
clause_203: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r3, r31 | |
+IADD.s32 r31:t1, t, r3 | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*IMUL.i32 t0, r6, r26 | |
+IADD.s32 r26:t1, t, r6 | |
*IMUL.i32 t0, r7, r27 | |
+IADD.s32 r27:t1, t, r7 | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*IMUL.i32 t0, r12, r16 | |
+IADD.s32 r16:t1, t, r12 | |
*IMUL.i32 t0, r13, r17 | |
+IADD.s32 r17:t1, t, r13 | |
*NOP t0 | |
+IADD.s32 r18:t1, r18, r14 | |
} | |
clause_209: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r28, r32 | |
+IADD.s32 r32:t1, t, r28 | |
*IMUL.i32 t0, r30, r2 | |
+IADD.s32 r2:t1, t, r30 | |
*IMUL.i32 t0, r31, r3 | |
+IADD.s32 r3:t1, t, r31 | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*IMUL.i32 t0, r26, r6 | |
+IADD.s32 r6:t1, t, r26 | |
*IMUL.i32 t0, r27, r7 | |
+IADD.s32 r7:t1, t, r27 | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*NOP t0 | |
+IADD.s32 r11:t1, r11, r23 | |
} | |
clause_215: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r16, r12 | |
+IADD.s32 r12:t1, t, r16 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*IMUL.i32 t0, r18, r14 | |
+IADD.s32 r14:t1, t, r18 | |
*IMUL.i32 t0, r32, r28 | |
+IADD.s32 r28:t1, t, r32 | |
*IMUL.i32 t0, r2, r30 | |
+IADD.s32 r30:t1, t, r2 | |
*IMUL.i32 t0, r3, r31 | |
+IADD.s32 r31:t1, t, r3 | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*NOP t0 | |
+IADD.s32 r25:t1, r25, r5 | |
} | |
clause_221: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r6, r26 | |
+IADD.s32 r26:t1, t, r6 | |
*IMUL.i32 t0, r7, r27 | |
+IADD.s32 r27:t1, t, r7 | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*IMUL.i32 t0, r11, r23 | |
+IADD.s32 r23:t1, t, r11 | |
*IMUL.i32 t0, r12, r16 | |
+IADD.s32 r16:t1, t, r12 | |
*IMUL.i32 t0, r13, r17 | |
+IADD.s32 r17:t1, t, r13 | |
*IMUL.i32 t0, r14, r18 | |
+IADD.s32 r18:t1, t, r14 | |
*NOP t0 | |
+IADD.s32 r19:t1, r19, r15 | |
} | |
clause_227: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r28, r32 | |
+IADD.s32 r32:t1, t, r28 | |
*IMUL.i32 t0, r30, r2 | |
+IADD.s32 r2:t1, t, r30 | |
*IMUL.i32 t0, r31, r3 | |
+IADD.s32 r3:t1, t, r31 | |
*IMUL.i32 t0, r24, r4 | |
+IADD.s32 r4:t1, t, r24 | |
*IMUL.i32 t0, r25, r5 | |
+IADD.s32 r5:t1, t, r25 | |
*IMUL.i32 t0, r26, r6 | |
+IADD.s32 r6:t1, t, r26 | |
*IMUL.i32 t0, r27, r7 | |
+IADD.s32 r7:t1, t, r27 | |
*NOP t0 | |
+IADD.s32 r8:t1, r8, r20 | |
} | |
clause_233: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r22, r10 | |
+IADD.s32 r10:t1, t, r22 | |
*IMUL.i32 t0, r23, r11 | |
+IADD.s32 r11:t1, t, r23 | |
*IMUL.i32 t0, r16, r12 | |
+IADD.s32 r12:t1, t, r16 | |
*IMUL.i32 t0, r17, r13 | |
+IADD.s32 r13:t1, t, r17 | |
*IMUL.i32 t0, r18, r14 | |
+IADD.s32 r14:t1, t, r18 | |
*IMUL.i32 t0, r19, r15 | |
+IADD.s32 r15:t1, t, r19 | |
*IMUL.i32 t0, r32, r28 | |
+IADD.s32 r28:t1, t, r32 | |
*NOP t0 | |
+IADD.s32 r29:t1, r29, r1 | |
} | |
clause_239: | |
ds(0) nbb ncph | |
{ | |
*IMUL.i32 t0, r2, r30 | |
+IADD.s32 r30:t1, t, r2 | |
*IMUL.i32 t0, r3, r31 | |
+IADD.s32 r31:t1, t, r3 | |
*IMUL.i32 t0, r4, r24 | |
+IADD.s32 r24:t1, t, r4 | |
*IMUL.i32 t0, r5, r25 | |
+IADD.s32 r25:t1, t, r5 | |
*IMUL.i32 t0, r6, r26 | |
+IADD.s32 r26:t1, t, r6 | |
*IMUL.i32 t0, r7, r27 | |
+IADD.s32 r27:t1, t, r7 | |
*IMUL.i32 t0, r8, r20 | |
+IADD.s32 r20:t1, t, r8 | |
*NOP t0 | |
+IADD.s32 r21:t1, r21, r9 | |
} | |
clause_245: | |
ds(0) nbb r_uncond | |
{ | |
*IMUL.i32 t0, r10, r22 | |
+IADD.s32 r22:t1, t, r10 | |
*IMUL.i32 t0, r11, r23 | |
+IADD.s32 r23:t1, t, r11 | |
*IMUL.i32 t0, r12, r16 | |
+IADD.s32 r16:t1, t, r12 | |
*IMUL.i32 t0, r13, r17 | |
+IADD.s32 r17:t1, t, r13 | |
*IMUL.i32 t0, r14, r18 | |
+IADD.s32 r18:t1, t, r14 | |
*IMUL.i32 t0, r15, r19 | |
+IADD.s32 r19:t1, t, r15 | |
*NOP t0 | |
+IADD.s32 r33:t1, r33, 0x00000001 /* 0.000000 */ | |
*NOP t0 | |
+JUMP t1, clause_27 | |
} | |
clause_252: | |
ds(0) nbb ncph | |
{ | |
*NOP t0 | |
+IADD.s32 r1:t1, r28, r20 | |
*NOP t0 | |
+IADD.s32 r2:t1, r29, r21 | |
*NOP t0 | |
+IADD.s32 r3:t1, r30, r22 | |
} | |
clause_255: | |
ds(0) nbb ncph next_store dwb(0) | |
{ | |
*NOP t0 | |
+IADD.s32 r4:t1, r31, r23 | |
*NOP t0 | |
+IADD.s32 r5:t1, r24, r16 | |
*NOP t0 | |
+IADD.s32 r6:t1, r25, r17 | |
*NOP t0 | |
+IADD.s32 r7:t1, r26, r18 | |
*NOP t0 | |
+IADD.s32 r8:t1, r27, r19 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r5 | |
*NOP t0 | |
+IADD.s32 r2:t1, r2, r6 | |
*NOP t0 | |
+IADD.s32 r3:t1, r3, r7 | |
} | |
clause_261: | |
ds(0) eos store | |
{ | |
*NOP t0 | |
+IADD.s32 r4:t1, r4, r8 | |
*NOP t0 | |
+IADD.s32 r1:t1, r1, r3 | |
*NOP t0 | |
+IADD.s32 t1, r2, r4 | |
*LSHIFT_OR.i32 t0, r0, #0, 0x00000002 /* 0.000000 */ | |
+IADD.s32 r1:t1, r1, t1 | |
*NOP t0 | |
+IADD.s32 r0:t1, u0.w0, t0 | |
*NOP t0 | |
+ICMP.u32.gt t1, u0.w0, t1 | |
*NOP t0 | |
+IADD.s32 t1, t1, u0.w1 | |
*NOP t0 | |
+STORE.i32 t1, r0, t1, @r1 | |
} | |
e20eea22 compute_sp_v16_int 16.765 GFLOPs 16.012ms |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment