Skip to content

Instantly share code, notes, and snippets.

@masahi
Created October 10, 2017 15:17
Show Gist options
  • Save masahi/ba5b376d2ecd15c72f6ee599a84287a7 to your computer and use it in GitHub Desktop.
Save masahi/ba5b376d2ecd15c72f6ee599a84287a7 to your computer and use it in GitHub Desktop.
The output of $/opt/rocm/hcc/compiler/bin/llvm-objdump -disassemble -mcpu=gfx803 rocm_kernel.co
rocm_kernel.co: file format ELF64-amdgpu-hsacobj
Disassembly of section .text:
myadd__kernel0:
s_load_dword s0, s[4:5], 0x18 // 000000001100: C0020002 00000018
v_lshlrev_b32_e32 v0, 7, v0 // 000000001108: 24000087
s_waitcnt lgkmcnt(0) // 00000000110C: BF8C007F
v_sub_i32_e32 v1, vcc, s0, v0 // 000000001110: 34020000
v_cmp_lt_i32_e32 vcc, s6, v1 // 000000001114: 7D820206
s_and_saveexec_b64 s[0:1], vcc // 000000001118: BE80206A
s_xor_b64 s[0:1], exec, s[0:1] // 00000000111C: 8880007E
s_cbranch_execz BB0_2 // 000000001120: BF88001C
BB0_1:
s_load_dwordx2 s[2:3], s[4:5], 0x0 // 000000001124: C0060082 00000000
s_load_dwordx2 s[8:9], s[4:5], 0x8 // 00000000112C: C0060202 00000008
v_add_i32_e32 v0, vcc, s6, v0 // 000000001134: 32000006
s_load_dwordx2 s[4:5], s[4:5], 0x10 // 000000001138: C0060102 00000010
v_ashrrev_i32_e32 v1, 31, v0 // 000000001140: 2202009F
v_lshlrev_b64 v[0:1], 2, v[0:1] // 000000001144: D28F0000 00020082
s_waitcnt lgkmcnt(0) // 00000000114C: BF8C007F
v_mov_b32_e32 v5, s9 // 000000001150: 7E0A0209
v_mov_b32_e32 v6, s3 // 000000001154: 7E0C0203
v_add_i32_e32 v2, vcc, s4, v0 // 000000001158: 32040004
v_mov_b32_e32 v3, s5 // 00000000115C: 7E060205
v_addc_u32_e32 v3, vcc, v3, v1, vcc // 000000001160: 38060303
v_add_i32_e32 v4, vcc, s8, v0 // 000000001164: 32080008
v_addc_u32_e32 v5, vcc, v5, v1, vcc // 000000001168: 380A0305
flat_load_dword v2, v[2:3] // 00000000116C: DC500000 02000002
flat_load_dword v3, v[4:5] // 000000001174: DC500000 03000004
v_add_i32_e32 v0, vcc, s2, v0 // 00000000117C: 32000002
v_addc_u32_e32 v1, vcc, v6, v1, vcc // 000000001180: 38020306
s_waitcnt vmcnt(0) lgkmcnt(0) // 000000001184: BF8C0070
v_add_f32_e32 v2, v2, v3 // 000000001188: 02040702
flat_store_dword v[0:1], v2 // 00000000118C: DC700000 00000200
BB0_2:
s_or_b64 exec, exec, s[0:1] // 000000001194: 87FE007E
s_endpgm // 000000001198: BF810000
s_nop 0 // 00000000119C: BF800000
s_nop 0 // 0000000011A0: BF800000
s_nop 0 // 0000000011A4: BF800000
s_nop 0 // 0000000011A8: BF800000
s_nop 0 // 0000000011AC: BF800000
s_nop 0 // 0000000011B0: BF800000
s_nop 0 // 0000000011B4: BF800000
s_nop 0 // 0000000011B8: BF800000
s_nop 0 // 0000000011BC: BF800000
s_nop 0 // 0000000011C0: BF800000
s_nop 0 // 0000000011C4: BF800000
s_nop 0 // 0000000011C8: BF800000
s_nop 0 // 0000000011CC: BF800000
s_nop 0 // 0000000011D0: BF800000
s_nop 0 // 0000000011D4: BF800000
s_nop 0 // 0000000011D8: BF800000
s_nop 0 // 0000000011DC: BF800000
s_nop 0 // 0000000011E0: BF800000
s_nop 0 // 0000000011E4: BF800000
s_nop 0 // 0000000011E8: BF800000
s_nop 0 // 0000000011EC: BF800000
s_nop 0 // 0000000011F0: BF800000
s_nop 0 // 0000000011F4: BF800000
s_nop 0 // 0000000011F8: BF800000
s_nop 0 // 0000000011FC: BF800000
myadd__kernel1:
s_load_dword s0, s[4:5], 0x10 // 000000001300: C0020002 00000010
v_lshlrev_b32_e32 v0, 7, v0 // 000000001308: 24000087
s_waitcnt lgkmcnt(0) // 00000000130C: BF8C007F
v_sub_i32_e32 v1, vcc, s0, v0 // 000000001310: 34020000
v_cmp_lt_i32_e32 vcc, s6, v1 // 000000001314: 7D820206
s_and_saveexec_b64 s[0:1], vcc // 000000001318: BE80206A
s_xor_b64 s[0:1], exec, s[0:1] // 00000000131C: 8880007E
s_cbranch_execz BB1_2 // 000000001320: BF880015
BB1_1:
s_load_dwordx2 s[2:3], s[4:5], 0x0 // 000000001324: C0060082 00000000
v_add_i32_e32 v0, vcc, s6, v0 // 00000000132C: 32000006
s_load_dwordx2 s[4:5], s[4:5], 0x8 // 000000001330: C0060102 00000008
v_ashrrev_i32_e32 v1, 31, v0 // 000000001338: 2202009F
v_lshlrev_b64 v[0:1], 2, v[0:1] // 00000000133C: D28F0000 00020082
s_waitcnt lgkmcnt(0) // 000000001344: BF8C007F
v_mov_b32_e32 v4, s3 // 000000001348: 7E080203
v_add_i32_e32 v2, vcc, s4, v0 // 00000000134C: 32040004
v_mov_b32_e32 v3, s5 // 000000001350: 7E060205
v_addc_u32_e32 v3, vcc, v3, v1, vcc // 000000001354: 38060303
flat_load_dword v2, v[2:3] // 000000001358: DC500000 02000002
v_add_i32_e32 v0, vcc, s2, v0 // 000000001360: 32000002
v_addc_u32_e32 v1, vcc, v4, v1, vcc // 000000001364: 38020304
s_waitcnt vmcnt(0) lgkmcnt(0) // 000000001368: BF8C0070
v_add_f32_e32 v2, 1.0, v2 // 00000000136C: 020404F2
flat_store_dword v[0:1], v2 // 000000001370: DC700000 00000200
BB1_2:
s_or_b64 exec, exec, s[0:1] // 000000001378: 87FE007E
s_endpgm // 00000000137C: BF810000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment