Skip to content

Instantly share code, notes, and snippets.

@Groovounet
Last active February 14, 2018 17:05
Show Gist options
  • Save Groovounet/eee06640917a027cb7e8a041ce919cd2 to your computer and use it in GitHub Desktop.
Save Groovounet/eee06640917a027cb7e8a041ce919cd2 to your computer and use it in GitHub Desktop.
Dependent texture fetches with Polaris 10 ASM
#version 430 core
#define FETCH_COUNT 16
uniform sampler2D TextureDiffuse[FETCH_COUNT];
in vec4 gl_FragCoord;
layout(location = 0, index = 0) out vec4 Color;
void main()
{
vec2 Coord = gl_FragCoord.xy * (1.0 / 2048.0);
for(int i = 0; i < FETCH_COUNT; ++i)
Coord = texture(TextureDiffuse[i], Coord).xy + 0.000001;
Color = vec4(Coord * 1.000001, 0.0, 1.0);
}
1 | 3 | :: : | label_basic_block_1: s_mov_b64 s[36:37], exec
2 | 3 | :: : | s_wqm_b64 exec, exec
3 | 3 | :: : | s_mov_b32 s0, s1
4 | 3 | :: : | s_movk_i32 s1, 0x0000
5 | 3 | :: : | s_movk_i32 s3, 0x0000
6 | 3 | :: : | s_load_dwordx8 s[8:15], s[0:1], 0x00
7 | 3 | :: : | s_load_dwordx8 s[16:23], s[2:3], 0x00
8 | 3 | :: : | s_load_dwordx8 s[24:31], s[0:1], 0x20
9 | 3 | :: : | s_andn2_b32 s5, s5, 0x3fff0000
10 | 4 | ^ :: : | v_mov_b32 v0, 0
11 | 5 | :^:: : | v_mov_b32 v1, 1.0
12 | 5 | :::: : | s_buffer_load_dwordx4 s[4:7], s[4:7], 0x10
13 | 5 | :::: : | s_waitcnt lgkmcnt(0)
14 | 5 | ::x: : | v_add_f32 v2, s4, v2
15 | 6 | ::::^: | v_mov_b32 v4, s5
16 | 6 | :::xv: | v_mad_legacy_f32 v3, v3, s6, v4
17 | 6 | ::x::: | v_mul_f32 v2, 0x3a000000, v2
18 | 6 | :::x:: | v_mul_f32 v3, 0x3a000000, v3
19 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[8:15], s[16:19]
20 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x40
21 | 6 | :::::: | s_load_dwordx8 s[12:19], s[2:3], 0x20
22 | 6 | :::::: | s_waitcnt vmcnt(0)
23 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
24 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
25 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[24:31], s[20:23]
26 | 6 | :::::: | s_load_dwordx8 s[20:27], s[0:1], 0x60
27 | 6 | :::::: | s_waitcnt vmcnt(0)
28 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
29 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
30 | 6 | :::::: | s_waitcnt lgkmcnt(0)
31 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[12:15]
32 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x80
33 | 6 | :::::: | s_load_dwordx8 s[28:35], s[2:3], 0x40
34 | 6 | :::::: | s_waitcnt vmcnt(0)
35 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
36 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
37 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[20:27], s[16:19]
38 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0xa0
39 | 6 | :::::: | s_waitcnt vmcnt(0)
40 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
41 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
42 | 6 | :::::: | s_waitcnt lgkmcnt(0)
43 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[28:31]
44 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0xc0
45 | 6 | :::::: | s_load_dwordx8 s[20:27], s[2:3], 0x60
46 | 6 | :::::: | s_waitcnt vmcnt(0)
47 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
48 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
49 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[32:35]
50 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0xe0
51 | 6 | :::::: | s_waitcnt vmcnt(0)
52 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
53 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
54 | 6 | :::::: | s_waitcnt lgkmcnt(0)
55 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[20:23]
56 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x100
57 | 6 | :::::: | s_load_dwordx8 s[28:35], s[2:3], 0x80
58 | 6 | :::::: | s_waitcnt vmcnt(0)
59 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
60 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
61 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[24:27]
62 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x120
63 | 6 | :::::: | s_waitcnt vmcnt(0)
64 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
65 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
66 | 6 | :::::: | s_waitcnt lgkmcnt(0)
67 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[28:31]
68 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x140
69 | 6 | :::::: | s_load_dwordx8 s[20:27], s[2:3], 0xa0
70 | 6 | :::::: | s_waitcnt vmcnt(0)
71 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
72 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
73 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[32:35]
74 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x160
75 | 6 | :::::: | s_waitcnt vmcnt(0)
76 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
77 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
78 | 6 | :::::: | s_waitcnt lgkmcnt(0)
79 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[20:23]
80 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x180
81 | 6 | :::::: | s_load_dwordx8 s[28:35], s[2:3], 0xc0
82 | 6 | :::::: | s_waitcnt vmcnt(0)
83 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
84 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
85 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[24:27]
86 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x1a0
87 | 6 | :::::: | s_waitcnt vmcnt(0)
88 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
89 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
90 | 6 | :::::: | s_waitcnt lgkmcnt(0)
91 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[28:31]
92 | 6 | :::::: | s_load_dwordx8 s[4:11], s[0:1], 0x1c0
93 | 6 | :::::: | s_load_dwordx8 s[20:27], s[2:3], 0xe0
94 | 6 | :::::: | s_waitcnt vmcnt(0)
95 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
96 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
97 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[32:35]
98 | 6 | :::::: | s_load_dwordx8 s[12:19], s[0:1], 0x1e0
99 | 6 | :::::: | s_waitcnt vmcnt(0)
100 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
101 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
102 | 6 | :::::: | s_waitcnt lgkmcnt(0)
103 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[4:11], s[20:23]
104 | 6 | :::::: | s_waitcnt vmcnt(0)
105 | 6 | ::x::: | v_add_f32 v2, 0x358637bd, v2
106 | 6 | :::x:: | v_add_f32 v3, 0x358637bd, v3
107 | 6 | ::xxvv | image_sample v[2:3], v[2:5], s[12:19], s[24:27]
108 | 4 | :::: | s_waitcnt vmcnt(0)
109 | 4 | ::x: | v_add_f32 v2, 0x358637bd, v2
110 | 4 | :::x | v_add_f32 v3, 0x358637bd, v3
111 | 4 | ::x: | v_mul_f32 v2, 0x3f800008, v2
112 | 4 | :::x | v_mul_f32 v3, 0x3f800008, v3
113 | 4 | :::: | s_mov_b64 exec, s[36:37]
114 | 4 | ::xv | v_cvt_pkrtz_f16_f32 v2, v2, v3
115 | 3 | xv: | v_cvt_pkrtz_f16_f32 v0, v0, v1
116 | 2 | v v | exp mrt0, v2, v2, v0, v0
117 | 0 | | s_endpgm
Maximum # VGPR used 6, # VGPR allocated: 6
shader main
asic(VI)
type(PS)
// s_ps_state in s0
s_mov_b64 s[36:37], exec // 000000000000: BEA4017E
s_wqm_b64 exec, exec // 000000000004: BEFE077E
s_mov_b32 s0, s1 // 000000000008: BE800001
s_movk_i32 s1, 0x0000 // 00000000000C: B0010000
s_movk_i32 s3, 0x0000 // 000000000010: B0030000
s_load_dwordx8 s[8:15], s[0:1], 0x00 // 000000000014: C00E0200 00000000
s_load_dwordx8 s[16:23], s[2:3], 0x00 // 00000000001C: C00E0401 00000000
s_load_dwordx8 s[24:31], s[0:1], 0x20 // 000000000024: C00E0600 00000020
s_andn2_b32 s5, s5, 0x3fff0000 // 00000000002C: 8905FF05 3FFF0000
v_mov_b32 v0, 0 // 000000000034: 7E000280
v_mov_b32 v1, 1.0 // 000000000038: 7E0202F2
s_buffer_load_dwordx4 s[4:7], s[4:7], 0x10 // 00000000003C: C02A0102 00000010
s_waitcnt lgkmcnt(0) // 000000000044: BF8C007F
v_add_f32 v2, s4, v2 // 000000000048: 02040404
v_mov_b32 v4, s5 // 00000000004C: 7E080205
v_mad_legacy_f32 v3, v3, s6, v4 // 000000000050: D1C00003 04100D03
v_mul_f32 v2, 0x3a000000, v2 // 000000000058: 0A0404FF 3A000000
v_mul_f32 v3, 0x3a000000, v3 // 000000000060: 0A0606FF 3A000000
image_sample v[2:3], v[2:5], s[8:15], s[16:19] dmask:0x3 // 000000000068: F0800300 00820202
s_load_dwordx8 s[4:11], s[0:1], 0x40 // 000000000070: C00E0100 00000040
s_load_dwordx8 s[12:19], s[2:3], 0x20 // 000000000078: C00E0301 00000020
s_waitcnt vmcnt(0) // 000000000080: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 000000000084: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 00000000008C: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[24:31], s[20:23] dmask:0x3 // 000000000094: F0800300 00A60202
s_load_dwordx8 s[20:27], s[0:1], 0x60 // 00000000009C: C00E0500 00000060
s_waitcnt vmcnt(0) // 0000000000A4: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000000A8: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 0000000000B0: 020606FF 358637BD
s_waitcnt lgkmcnt(0) // 0000000000B8: BF8C007F
image_sample v[2:3], v[2:5], s[4:11], s[12:15] dmask:0x3 // 0000000000BC: F0800300 00610202
s_load_dwordx8 s[4:11], s[0:1], 0x80 // 0000000000C4: C00E0100 00000080
s_load_dwordx8 s[28:35], s[2:3], 0x40 // 0000000000CC: C00E0701 00000040
s_waitcnt vmcnt(0) // 0000000000D4: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000000D8: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 0000000000E0: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[20:27], s[16:19] dmask:0x3 // 0000000000E8: F0800300 00850202
s_load_dwordx8 s[12:19], s[0:1], 0xa0 // 0000000000F0: C00E0300 000000A0
s_waitcnt vmcnt(0) // 0000000000F8: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000000FC: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000104: 020606FF 358637BD
s_waitcnt lgkmcnt(0) // 00000000010C: BF8C007F
image_sample v[2:3], v[2:5], s[4:11], s[28:31] dmask:0x3 // 000000000110: F0800300 00E10202
s_load_dwordx8 s[4:11], s[0:1], 0xc0 // 000000000118: C00E0100 000000C0
s_load_dwordx8 s[20:27], s[2:3], 0x60 // 000000000120: C00E0501 00000060
s_waitcnt vmcnt(0) // 000000000128: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 00000000012C: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000134: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[12:19], s[32:35] dmask:0x3 // 00000000013C: F0800300 01030202
s_load_dwordx8 s[12:19], s[0:1], 0xe0 // 000000000144: C00E0300 000000E0
s_waitcnt vmcnt(0) // 00000000014C: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 000000000150: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000158: 020606FF 358637BD
s_waitcnt lgkmcnt(0) // 000000000160: BF8C007F
image_sample v[2:3], v[2:5], s[4:11], s[20:23] dmask:0x3 // 000000000164: F0800300 00A10202
s_load_dwordx8 s[4:11], s[0:1], 0x100 // 00000000016C: C00E0100 00000100
s_load_dwordx8 s[28:35], s[2:3], 0x80 // 000000000174: C00E0701 00000080
s_waitcnt vmcnt(0) // 00000000017C: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 000000000180: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000188: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[12:19], s[24:27] dmask:0x3 // 000000000190: F0800300 00C30202
s_load_dwordx8 s[12:19], s[0:1], 0x120 // 000000000198: C00E0300 00000120
s_waitcnt vmcnt(0) // 0000000001A0: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000001A4: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 0000000001AC: 020606FF 358637BD
s_waitcnt lgkmcnt(0) // 0000000001B4: BF8C007F
image_sample v[2:3], v[2:5], s[4:11], s[28:31] dmask:0x3 // 0000000001B8: F0800300 00E10202
s_load_dwordx8 s[4:11], s[0:1], 0x140 // 0000000001C0: C00E0100 00000140
s_load_dwordx8 s[20:27], s[2:3], 0xa0 // 0000000001C8: C00E0501 000000A0
s_waitcnt vmcnt(0) // 0000000001D0: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000001D4: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 0000000001DC: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[12:19], s[32:35] dmask:0x3 // 0000000001E4: F0800300 01030202
s_load_dwordx8 s[12:19], s[0:1], 0x160 // 0000000001EC: C00E0300 00000160
s_waitcnt vmcnt(0) // 0000000001F4: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000001F8: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000200: 020606FF 358637BD
s_waitcnt lgkmcnt(0) // 000000000208: BF8C007F
image_sample v[2:3], v[2:5], s[4:11], s[20:23] dmask:0x3 // 00000000020C: F0800300 00A10202
s_load_dwordx8 s[4:11], s[0:1], 0x180 // 000000000214: C00E0100 00000180
s_load_dwordx8 s[28:35], s[2:3], 0xc0 // 00000000021C: C00E0701 000000C0
s_waitcnt vmcnt(0) // 000000000224: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 000000000228: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000230: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[12:19], s[24:27] dmask:0x3 // 000000000238: F0800300 00C30202
s_load_dwordx8 s[12:19], s[0:1], 0x1a0 // 000000000240: C00E0300 000001A0
s_waitcnt vmcnt(0) // 000000000248: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 00000000024C: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000254: 020606FF 358637BD
s_waitcnt lgkmcnt(0) // 00000000025C: BF8C007F
image_sample v[2:3], v[2:5], s[4:11], s[28:31] dmask:0x3 // 000000000260: F0800300 00E10202
s_load_dwordx8 s[4:11], s[0:1], 0x1c0 // 000000000268: C00E0100 000001C0
s_load_dwordx8 s[20:27], s[2:3], 0xe0 // 000000000270: C00E0501 000000E0
s_waitcnt vmcnt(0) // 000000000278: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 00000000027C: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 000000000284: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[12:19], s[32:35] dmask:0x3 // 00000000028C: F0800300 01030202
s_load_dwordx8 s[12:19], s[0:1], 0x1e0 // 000000000294: C00E0300 000001E0
s_waitcnt vmcnt(0) // 00000000029C: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000002A0: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 0000000002A8: 020606FF 358637BD
s_waitcnt lgkmcnt(0) // 0000000002B0: BF8C007F
image_sample v[2:3], v[2:5], s[4:11], s[20:23] dmask:0x3 // 0000000002B4: F0800300 00A10202
s_waitcnt vmcnt(0) // 0000000002BC: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000002C0: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 0000000002C8: 020606FF 358637BD
image_sample v[2:3], v[2:5], s[12:19], s[24:27] dmask:0x3 // 0000000002D0: F0800300 00C30202
s_waitcnt vmcnt(0) // 0000000002D8: BF8C0F70
v_add_f32 v2, 0x358637bd, v2 // 0000000002DC: 020404FF 358637BD
v_add_f32 v3, 0x358637bd, v3 // 0000000002E4: 020606FF 358637BD
v_mul_f32 v2, 0x3f800008, v2 // 0000000002EC: 0A0404FF 3F800008
v_mul_f32 v3, 0x3f800008, v3 // 0000000002F4: 0A0606FF 3F800008
s_mov_b64 exec, s[36:37] // 0000000002FC: BEFE0124
v_cvt_pkrtz_f16_f32 v2, v2, v3 // 000000000300: D2960002 00020702
v_cvt_pkrtz_f16_f32 v0, v0, v1 // 000000000308: D2960000 00020300
exp mrt0, v2, v2, v0, v0 done compr vm // 000000000310: C4001C0F 00000002
s_endpgm // 000000000318: BF810000
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment