Created
October 10, 2017 15:22
-
-
Save masahi/6c7f270240891bc7e1e82dd221e05903 to your computer and use it in GitHub Desktop.
The output of $ llc-5.0 -march=amdgcn -mcpu=gfx803 myadd_kernel.ll
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.text | |
.hsa_code_object_version 2,1 | |
.hsa_code_object_isa 8,0,3,"AMD","AMDGPU" | |
.globl myadd__kernel0 ; -- Begin function myadd__kernel0 | |
.p2align 8 | |
.type myadd__kernel0,@function | |
.amdgpu_hsa_kernel myadd__kernel0 | |
myadd__kernel0: ; @myadd__kernel0 | |
.amd_kernel_code_t | |
amd_code_version_major = 1 | |
amd_code_version_minor = 1 | |
amd_machine_kind = 1 | |
amd_machine_version_major = 8 | |
amd_machine_version_minor = 0 | |
amd_machine_version_stepping = 3 | |
kernel_code_entry_byte_offset = 256 | |
kernel_code_prefetch_byte_size = 0 | |
max_scratch_backing_memory_byte_size = 0 | |
granulated_workitem_vgpr_count = 1 | |
granulated_wavefront_sgpr_count = 1 | |
priority = 0 | |
float_mode = 192 | |
priv = 0 | |
enable_dx10_clamp = 1 | |
debug_mode = 0 | |
enable_ieee_mode = 1 | |
enable_sgpr_private_segment_wave_byte_offset = 0 | |
user_sgpr_count = 6 | |
enable_trap_handler = 1 | |
enable_sgpr_workgroup_id_x = 1 | |
enable_sgpr_workgroup_id_y = 0 | |
enable_sgpr_workgroup_id_z = 0 | |
enable_sgpr_workgroup_info = 0 | |
enable_vgpr_workitem_id = 0 | |
enable_exception_msb = 0 | |
granulated_lds_size = 0 | |
enable_exception = 0 | |
enable_sgpr_private_segment_buffer = 1 | |
enable_sgpr_dispatch_ptr = 0 | |
enable_sgpr_queue_ptr = 0 | |
enable_sgpr_kernarg_segment_ptr = 1 | |
enable_sgpr_dispatch_id = 0 | |
enable_sgpr_flat_scratch_init = 0 | |
enable_sgpr_private_segment_size = 0 | |
enable_sgpr_grid_workgroup_count_x = 0 | |
enable_sgpr_grid_workgroup_count_y = 0 | |
enable_sgpr_grid_workgroup_count_z = 0 | |
enable_ordered_append_gds = 0 | |
private_element_size = 1 | |
is_ptr64 = 1 | |
is_dynamic_callstack = 0 | |
is_debug_enabled = 0 | |
is_xnack_enabled = 0 | |
workitem_private_segment_byte_size = 0 | |
workgroup_group_segment_byte_size = 0 | |
gds_segment_byte_size = 0 | |
kernarg_segment_byte_size = 28 | |
workgroup_fbarrier_count = 0 | |
wavefront_sgpr_count = 12 | |
workitem_vgpr_count = 7 | |
reserved_vgpr_first = 0 | |
reserved_vgpr_count = 0 | |
reserved_sgpr_first = 0 | |
reserved_sgpr_count = 0 | |
debug_wavefront_private_segment_offset_sgpr = 0 | |
debug_private_segment_buffer_sgpr = 0 | |
kernarg_segment_alignment = 4 | |
group_segment_alignment = 4 | |
private_segment_alignment = 4 | |
wavefront_size = 6 | |
call_convention = -1 | |
runtime_loader_kernel_symbol = 0 | |
.end_amd_kernel_code_t | |
; BB#0: ; %entry | |
s_load_dword s0, s[4:5], 0x18 | |
v_lshlrev_b32_e32 v0, 7, v0 | |
s_waitcnt lgkmcnt(0) | |
v_sub_i32_e32 v1, vcc, s0, v0 | |
v_cmp_lt_i32_e32 vcc, s6, v1 | |
s_and_saveexec_b64 s[0:1], vcc | |
s_xor_b64 s[0:1], exec, s[0:1] | |
; mask branch BB0_2 | |
s_cbranch_execz BB0_2 | |
BB0_1: ; %if_then | |
s_load_dwordx2 s[2:3], s[4:5], 0x0 | |
s_load_dwordx2 s[8:9], s[4:5], 0x8 | |
v_add_i32_e32 v0, vcc, s6, v0 | |
s_load_dwordx2 s[4:5], s[4:5], 0x10 | |
v_ashrrev_i32_e32 v1, 31, v0 | |
v_lshlrev_b64 v[0:1], 2, v[0:1] | |
s_waitcnt lgkmcnt(0) | |
v_mov_b32_e32 v5, s9 | |
v_mov_b32_e32 v6, s3 | |
v_add_i32_e32 v2, vcc, s4, v0 | |
v_mov_b32_e32 v3, s5 | |
v_addc_u32_e32 v3, vcc, v3, v1, vcc | |
v_add_i32_e32 v4, vcc, s8, v0 | |
v_addc_u32_e32 v5, vcc, v5, v1, vcc | |
flat_load_dword v2, v[2:3] | |
flat_load_dword v3, v[4:5] | |
v_add_i32_e32 v0, vcc, s2, v0 | |
v_addc_u32_e32 v1, vcc, v6, v1, vcc | |
s_waitcnt vmcnt(0) lgkmcnt(0) | |
v_add_f32_e32 v2, v2, v3 | |
flat_store_dword v[0:1], v2 | |
BB0_2: ; %if_end | |
s_or_b64 exec, exec, s[0:1] | |
s_endpgm | |
.Lfunc_end0: | |
.size myadd__kernel0, .Lfunc_end0-myadd__kernel0 | |
; -- End function | |
.section .AMDGPU.csdata | |
; Kernel info: | |
; codeLenInByte = 156 | |
; NumSgprs: 12 | |
; NumVgprs: 7 | |
; ScratchSize: 0 | |
; FloatMode: 192 | |
; IeeeMode: 1 | |
; LDSByteSize: 0 bytes/workgroup (compile time only) | |
; SGPRBlocks: 1 | |
; VGPRBlocks: 1 | |
; NumSGPRsForWavesPerEU: 12 | |
; NumVGPRsForWavesPerEU: 7 | |
; ReservedVGPRFirst: 0 | |
; ReservedVGPRCount: 0 | |
; COMPUTE_PGM_RSRC2:USER_SGPR: 6 | |
; COMPUTE_PGM_RSRC2:TRAP_HANDLER: 1 | |
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1 | |
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 | |
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 | |
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 | |
.text | |
.globl myadd__kernel1 ; -- Begin function myadd__kernel1 | |
.p2align 8 | |
.type myadd__kernel1,@function | |
.amdgpu_hsa_kernel myadd__kernel1 | |
myadd__kernel1: ; @myadd__kernel1 | |
.amd_kernel_code_t | |
amd_code_version_major = 1 | |
amd_code_version_minor = 1 | |
amd_machine_kind = 1 | |
amd_machine_version_major = 8 | |
amd_machine_version_minor = 0 | |
amd_machine_version_stepping = 3 | |
kernel_code_entry_byte_offset = 256 | |
kernel_code_prefetch_byte_size = 0 | |
max_scratch_backing_memory_byte_size = 0 | |
granulated_workitem_vgpr_count = 1 | |
granulated_wavefront_sgpr_count = 1 | |
priority = 0 | |
float_mode = 192 | |
priv = 0 | |
enable_dx10_clamp = 1 | |
debug_mode = 0 | |
enable_ieee_mode = 1 | |
enable_sgpr_private_segment_wave_byte_offset = 0 | |
user_sgpr_count = 6 | |
enable_trap_handler = 1 | |
enable_sgpr_workgroup_id_x = 1 | |
enable_sgpr_workgroup_id_y = 0 | |
enable_sgpr_workgroup_id_z = 0 | |
enable_sgpr_workgroup_info = 0 | |
enable_vgpr_workitem_id = 0 | |
enable_exception_msb = 0 | |
granulated_lds_size = 0 | |
enable_exception = 0 | |
enable_sgpr_private_segment_buffer = 1 | |
enable_sgpr_dispatch_ptr = 0 | |
enable_sgpr_queue_ptr = 0 | |
enable_sgpr_kernarg_segment_ptr = 1 | |
enable_sgpr_dispatch_id = 0 | |
enable_sgpr_flat_scratch_init = 0 | |
enable_sgpr_private_segment_size = 0 | |
enable_sgpr_grid_workgroup_count_x = 0 | |
enable_sgpr_grid_workgroup_count_y = 0 | |
enable_sgpr_grid_workgroup_count_z = 0 | |
enable_ordered_append_gds = 0 | |
private_element_size = 1 | |
is_ptr64 = 1 | |
is_dynamic_callstack = 0 | |
is_debug_enabled = 0 | |
is_xnack_enabled = 0 | |
workitem_private_segment_byte_size = 0 | |
workgroup_group_segment_byte_size = 0 | |
gds_segment_byte_size = 0 | |
kernarg_segment_byte_size = 20 | |
workgroup_fbarrier_count = 0 | |
wavefront_sgpr_count = 9 | |
workitem_vgpr_count = 5 | |
reserved_vgpr_first = 0 | |
reserved_vgpr_count = 0 | |
reserved_sgpr_first = 0 | |
reserved_sgpr_count = 0 | |
debug_wavefront_private_segment_offset_sgpr = 0 | |
debug_private_segment_buffer_sgpr = 0 | |
kernarg_segment_alignment = 4 | |
group_segment_alignment = 4 | |
private_segment_alignment = 4 | |
wavefront_size = 6 | |
call_convention = -1 | |
runtime_loader_kernel_symbol = 0 | |
.end_amd_kernel_code_t | |
; BB#0: ; %entry | |
s_load_dword s0, s[4:5], 0x10 | |
v_lshlrev_b32_e32 v0, 7, v0 | |
s_waitcnt lgkmcnt(0) | |
v_sub_i32_e32 v1, vcc, s0, v0 | |
v_cmp_lt_i32_e32 vcc, s6, v1 | |
s_and_saveexec_b64 s[0:1], vcc | |
s_xor_b64 s[0:1], exec, s[0:1] | |
; mask branch BB1_2 | |
s_cbranch_execz BB1_2 | |
BB1_1: ; %if_then | |
s_load_dwordx2 s[2:3], s[4:5], 0x0 | |
v_add_i32_e32 v0, vcc, s6, v0 | |
s_load_dwordx2 s[4:5], s[4:5], 0x8 | |
v_ashrrev_i32_e32 v1, 31, v0 | |
v_lshlrev_b64 v[0:1], 2, v[0:1] | |
s_waitcnt lgkmcnt(0) | |
v_mov_b32_e32 v4, s3 | |
v_add_i32_e32 v2, vcc, s4, v0 | |
v_mov_b32_e32 v3, s5 | |
v_addc_u32_e32 v3, vcc, v3, v1, vcc | |
flat_load_dword v2, v[2:3] | |
v_add_i32_e32 v0, vcc, s2, v0 | |
v_addc_u32_e32 v1, vcc, v4, v1, vcc | |
s_waitcnt vmcnt(0) lgkmcnt(0) | |
v_add_f32_e32 v2, 1.0, v2 | |
flat_store_dword v[0:1], v2 | |
BB1_2: ; %if_end | |
s_or_b64 exec, exec, s[0:1] | |
s_endpgm | |
.Lfunc_end1: | |
.size myadd__kernel1, .Lfunc_end1-myadd__kernel1 | |
; -- End function | |
.section .AMDGPU.csdata | |
; Kernel info: | |
; codeLenInByte = 128 | |
; NumSgprs: 9 | |
; NumVgprs: 5 | |
; ScratchSize: 0 | |
; FloatMode: 192 | |
; IeeeMode: 1 | |
; LDSByteSize: 0 bytes/workgroup (compile time only) | |
; SGPRBlocks: 1 | |
; VGPRBlocks: 1 | |
; NumSGPRsForWavesPerEU: 9 | |
; NumVGPRsForWavesPerEU: 5 | |
; ReservedVGPRFirst: 0 | |
; ReservedVGPRCount: 0 | |
; COMPUTE_PGM_RSRC2:USER_SGPR: 6 | |
; COMPUTE_PGM_RSRC2:TRAP_HANDLER: 1 | |
; COMPUTE_PGM_RSRC2:TGID_X_EN: 1 | |
; COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 | |
; COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 | |
; COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 | |
.section ".note.GNU-stack" | |
.amdgpu_code_object_metadata | |
--- | |
Version: [ 1, 0 ] | |
Kernels: | |
- Name: myadd__kernel0 | |
Args: | |
- Size: 8 | |
Align: 8 | |
ValueKind: GlobalBuffer | |
ValueType: F32 | |
AddrSpaceQual: Global | |
- Size: 8 | |
Align: 8 | |
ValueKind: GlobalBuffer | |
ValueType: F32 | |
AccQual: ReadOnly | |
AddrSpaceQual: Global | |
- Size: 8 | |
Align: 8 | |
ValueKind: GlobalBuffer | |
ValueType: F32 | |
AccQual: ReadOnly | |
AddrSpaceQual: Global | |
- Size: 4 | |
Align: 4 | |
ValueKind: ByValue | |
ValueType: I32 | |
CodeProps: | |
KernargSegmentSize: 28 | |
WavefrontNumSGPRs: 12 | |
WorkitemNumVGPRs: 7 | |
KernargSegmentAlign: 4 | |
GroupSegmentAlign: 4 | |
PrivateSegmentAlign: 4 | |
WavefrontSize: 6 | |
- Name: myadd__kernel1 | |
Args: | |
- Size: 8 | |
Align: 8 | |
ValueKind: GlobalBuffer | |
ValueType: I8 | |
AddrSpaceQual: Global | |
- Size: 8 | |
Align: 8 | |
ValueKind: GlobalBuffer | |
ValueType: F32 | |
AccQual: ReadOnly | |
AddrSpaceQual: Global | |
- Size: 4 | |
Align: 4 | |
ValueKind: ByValue | |
ValueType: I32 | |
CodeProps: | |
KernargSegmentSize: 20 | |
WavefrontNumSGPRs: 9 | |
WorkitemNumVGPRs: 5 | |
KernargSegmentAlign: 4 | |
GroupSegmentAlign: 4 | |
PrivateSegmentAlign: 4 | |
WavefrontSize: 6 | |
... | |
.end_amdgpu_code_object_metadata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment