Created
October 10, 2017 15:17
-
-
Save masahi/ba5b376d2ecd15c72f6ee599a84287a7 to your computer and use it in GitHub Desktop.
The output of $/opt/rocm/hcc/compiler/bin/llvm-objdump -disassemble -mcpu=gfx803 rocm_kernel.co
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rocm_kernel.co: file format ELF64-amdgpu-hsacobj | |
Disassembly of section .text: | |
myadd__kernel0: | |
s_load_dword s0, s[4:5], 0x18 // 000000001100: C0020002 00000018 | |
v_lshlrev_b32_e32 v0, 7, v0 // 000000001108: 24000087 | |
s_waitcnt lgkmcnt(0) // 00000000110C: BF8C007F | |
v_sub_i32_e32 v1, vcc, s0, v0 // 000000001110: 34020000 | |
v_cmp_lt_i32_e32 vcc, s6, v1 // 000000001114: 7D820206 | |
s_and_saveexec_b64 s[0:1], vcc // 000000001118: BE80206A | |
s_xor_b64 s[0:1], exec, s[0:1] // 00000000111C: 8880007E | |
s_cbranch_execz BB0_2 // 000000001120: BF88001C | |
BB0_1: | |
s_load_dwordx2 s[2:3], s[4:5], 0x0 // 000000001124: C0060082 00000000 | |
s_load_dwordx2 s[8:9], s[4:5], 0x8 // 00000000112C: C0060202 00000008 | |
v_add_i32_e32 v0, vcc, s6, v0 // 000000001134: 32000006 | |
s_load_dwordx2 s[4:5], s[4:5], 0x10 // 000000001138: C0060102 00000010 | |
v_ashrrev_i32_e32 v1, 31, v0 // 000000001140: 2202009F | |
v_lshlrev_b64 v[0:1], 2, v[0:1] // 000000001144: D28F0000 00020082 | |
s_waitcnt lgkmcnt(0) // 00000000114C: BF8C007F | |
v_mov_b32_e32 v5, s9 // 000000001150: 7E0A0209 | |
v_mov_b32_e32 v6, s3 // 000000001154: 7E0C0203 | |
v_add_i32_e32 v2, vcc, s4, v0 // 000000001158: 32040004 | |
v_mov_b32_e32 v3, s5 // 00000000115C: 7E060205 | |
v_addc_u32_e32 v3, vcc, v3, v1, vcc // 000000001160: 38060303 | |
v_add_i32_e32 v4, vcc, s8, v0 // 000000001164: 32080008 | |
v_addc_u32_e32 v5, vcc, v5, v1, vcc // 000000001168: 380A0305 | |
flat_load_dword v2, v[2:3] // 00000000116C: DC500000 02000002 | |
flat_load_dword v3, v[4:5] // 000000001174: DC500000 03000004 | |
v_add_i32_e32 v0, vcc, s2, v0 // 00000000117C: 32000002 | |
v_addc_u32_e32 v1, vcc, v6, v1, vcc // 000000001180: 38020306 | |
s_waitcnt vmcnt(0) lgkmcnt(0) // 000000001184: BF8C0070 | |
v_add_f32_e32 v2, v2, v3 // 000000001188: 02040702 | |
flat_store_dword v[0:1], v2 // 00000000118C: DC700000 00000200 | |
BB0_2: | |
s_or_b64 exec, exec, s[0:1] // 000000001194: 87FE007E | |
s_endpgm // 000000001198: BF810000 | |
s_nop 0 // 00000000119C: BF800000 | |
s_nop 0 // 0000000011A0: BF800000 | |
s_nop 0 // 0000000011A4: BF800000 | |
s_nop 0 // 0000000011A8: BF800000 | |
s_nop 0 // 0000000011AC: BF800000 | |
s_nop 0 // 0000000011B0: BF800000 | |
s_nop 0 // 0000000011B4: BF800000 | |
s_nop 0 // 0000000011B8: BF800000 | |
s_nop 0 // 0000000011BC: BF800000 | |
s_nop 0 // 0000000011C0: BF800000 | |
s_nop 0 // 0000000011C4: BF800000 | |
s_nop 0 // 0000000011C8: BF800000 | |
s_nop 0 // 0000000011CC: BF800000 | |
s_nop 0 // 0000000011D0: BF800000 | |
s_nop 0 // 0000000011D4: BF800000 | |
s_nop 0 // 0000000011D8: BF800000 | |
s_nop 0 // 0000000011DC: BF800000 | |
s_nop 0 // 0000000011E0: BF800000 | |
s_nop 0 // 0000000011E4: BF800000 | |
s_nop 0 // 0000000011E8: BF800000 | |
s_nop 0 // 0000000011EC: BF800000 | |
s_nop 0 // 0000000011F0: BF800000 | |
s_nop 0 // 0000000011F4: BF800000 | |
s_nop 0 // 0000000011F8: BF800000 | |
s_nop 0 // 0000000011FC: BF800000 | |
myadd__kernel1: | |
s_load_dword s0, s[4:5], 0x10 // 000000001300: C0020002 00000010 | |
v_lshlrev_b32_e32 v0, 7, v0 // 000000001308: 24000087 | |
s_waitcnt lgkmcnt(0) // 00000000130C: BF8C007F | |
v_sub_i32_e32 v1, vcc, s0, v0 // 000000001310: 34020000 | |
v_cmp_lt_i32_e32 vcc, s6, v1 // 000000001314: 7D820206 | |
s_and_saveexec_b64 s[0:1], vcc // 000000001318: BE80206A | |
s_xor_b64 s[0:1], exec, s[0:1] // 00000000131C: 8880007E | |
s_cbranch_execz BB1_2 // 000000001320: BF880015 | |
BB1_1: | |
s_load_dwordx2 s[2:3], s[4:5], 0x0 // 000000001324: C0060082 00000000 | |
v_add_i32_e32 v0, vcc, s6, v0 // 00000000132C: 32000006 | |
s_load_dwordx2 s[4:5], s[4:5], 0x8 // 000000001330: C0060102 00000008 | |
v_ashrrev_i32_e32 v1, 31, v0 // 000000001338: 2202009F | |
v_lshlrev_b64 v[0:1], 2, v[0:1] // 00000000133C: D28F0000 00020082 | |
s_waitcnt lgkmcnt(0) // 000000001344: BF8C007F | |
v_mov_b32_e32 v4, s3 // 000000001348: 7E080203 | |
v_add_i32_e32 v2, vcc, s4, v0 // 00000000134C: 32040004 | |
v_mov_b32_e32 v3, s5 // 000000001350: 7E060205 | |
v_addc_u32_e32 v3, vcc, v3, v1, vcc // 000000001354: 38060303 | |
flat_load_dword v2, v[2:3] // 000000001358: DC500000 02000002 | |
v_add_i32_e32 v0, vcc, s2, v0 // 000000001360: 32000002 | |
v_addc_u32_e32 v1, vcc, v4, v1, vcc // 000000001364: 38020304 | |
s_waitcnt vmcnt(0) lgkmcnt(0) // 000000001368: BF8C0070 | |
v_add_f32_e32 v2, 1.0, v2 // 00000000136C: 020404F2 | |
flat_store_dword v[0:1], v2 // 000000001370: DC700000 00000200 | |
BB1_2: | |
s_or_b64 exec, exec, s[0:1] // 000000001378: 87FE007E | |
s_endpgm // 00000000137C: BF810000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment