Skip to content

Instantly share code, notes, and snippets.

@DiamondLovesYou
Last active October 23, 2018 10:12
Show Gist options
  • Save DiamondLovesYou/ed624b20fe4777766a8629347484d3e7 to your computer and use it in GitHub Desktop.
Save DiamondLovesYou/ed624b20fe4777766a8629347484d3e7 to your computer and use it in GitHub Desktop.
LLVM module for a JIT-ed Rust function (with addrspacecast optimizations)
; ModuleID = 'jit-methods.7rcbfp3g-cgu.0'
source_filename = "jit-methods.7rcbfp3g-cgu.0"
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
target triple = "amdgcn-amd-amdhsa-amdgiz"
%"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" = type { [0 x i8], %"ndarray::ViewRepr<&mut f32>", [0 x i8], float*, [0 x i64], %"ndarray::dimension::dim::Dim<[usize; 1]>", [0 x i64], %"ndarray::dimension::dim::Dim<[usize; 1]>", [0 x i64] }
%"ndarray::ViewRepr<&mut f32>" = type { [0 x i8], %"core::marker::PhantomData<&mut f32>", [0 x i8] }
%"core::marker::PhantomData<&mut f32>" = type {}
%"ndarray::dimension::dim::Dim<[usize; 1]>" = type { [0 x i64], [1 x i64], [0 x i64] }
%"hsa_rt_sys::hsa_kernel_dispatch_packet_s" = type { [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i16, [0 x i16], i32, [0 x i32], i32, [0 x i32], i32, [0 x i32], i32, [0 x i32], i32, [0 x i32], i64, [0 x i64], i8*, [0 x i64], i64, [0 x i64], %"hsa_rt_sys::hsa_signal_s", [0 x i64] }
%"hsa_rt_sys::hsa_signal_s" = type { [0 x i64], i64, [0 x i64] }
%"unwind::libunwind::_Unwind_Exception" = type { [0 x i64], i64, [0 x i64], void (i32, %"unwind::libunwind::_Unwind_Exception"*)*, [0 x i64], [6 x i64], [0 x i64] }
%"unwind::libunwind::_Unwind_Context" = type { [0 x i8] }
; ndarray::vector_foreach
; Function Attrs: nounwind nonlazybind
define amdgpu_kernel void @_ZN7ndarray14vector_foreach17h800258d818eaa10aE(%"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>", float %value) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
%1 = tail call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #4
%2 = icmp eq i8 addrspace(4)* %1, null
br i1 %2, label %bb2.i.i, label %_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit
bb2.i.i: ; preds = %start
tail call void @llvm.trap() #4
unreachable
_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit: ; preds = %start
%.fca.3.extract = extractvalue %"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" %0, 3
%.fca.7.1.0.extract = extractvalue %"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" %0, 7, 1, 0
%.fca.5.1.0.extract = extractvalue %"ndarray::ArrayBase<ndarray::ViewRepr<&mut f32>, ndarray::dimension::dim::Dim<[usize; 1]>>" %0, 5, 1, 0
%3 = ptrtoint i8 addrspace(4)* %1 to i64
%4 = tail call i32 @llvm.amdgcn.workitem.id.x() #4
%5 = zext i32 %4 to i64
%6 = tail call i32 @llvm.amdgcn.workgroup.id.x() #4
%7 = zext i32 %6 to i64
%8 = inttoptr i64 %3 to %"hsa_rt_sys::hsa_kernel_dispatch_packet_s"*
%9 = getelementptr inbounds %"hsa_rt_sys::hsa_kernel_dispatch_packet_s", %"hsa_rt_sys::hsa_kernel_dispatch_packet_s"* %8, i64 0, i32 5
%10 = load i16, i16* %9, align 4, !noalias !2
%11 = zext i16 %10 to i64
%12 = mul nuw nsw i64 %11, %7
%13 = add nuw nsw i64 %12, %5
%14 = icmp ule i64 %.fca.5.1.0.extract, %13
%15 = mul i64 %.fca.7.1.0.extract, %13
%16 = getelementptr inbounds float, float* %.fca.3.extract, i64 %15
%17 = icmp eq float* %16, null
%18 = or i1 %14, %17
br i1 %18, label %bb5, label %bb11
bb5: ; preds = %_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit, %bb11
ret void
bb11: ; preds = %_ZN14legionella_std15dispatch_packet17ha1cc34938f13f9f1E.exit
%19 = load float, float* %16, align 4
%20 = fadd float %19, 1.000000e+00
%21 = fmul float %20, %value
%22 = fadd float %21, 1.000000e+00
%23 = fmul float %22, %value
%24 = fadd float %23, 1.000000e+00
%25 = fmul float %24, %value
%26 = fadd float %25, 1.000000e+00
%27 = fmul float %26, %value
%28 = fadd float %27, 1.000000e+00
%29 = fmul float %28, %value
%30 = fadd float %29, 1.000000e+00
%31 = fmul float %30, %value
%32 = fadd float %31, 1.000000e+00
%33 = fmul float %32, %value
%34 = fadd float %33, 1.000000e+00
%35 = fmul float %34, %value
%36 = fadd float %35, 1.000000e+00
%37 = fmul float %36, %value
%38 = fadd float %37, 1.000000e+00
%39 = fmul float %38, %value
%40 = fadd float %39, 1.000000e+00
%41 = fmul float %40, %value
%42 = fadd float %41, 1.000000e+00
%43 = fmul float %42, %value
%44 = fadd float %43, 1.000000e+00
%45 = fmul float %44, %value
%46 = fadd float %45, 1.000000e+00
%47 = fmul float %46, %value
%48 = fadd float %47, 1.000000e+00
%49 = fmul float %48, %value
%50 = fadd float %49, 1.000000e+00
%51 = fmul float %50, %value
store float %51, float* %16, align 4
br label %bb5
}
; Function Attrs: nounwind nonlazybind
declare i32 @rust_eh_personality(i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*) unnamed_addr #1
; Function Attrs: nounwind readnone speculatable
declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() unnamed_addr #2
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workitem.id.x() unnamed_addr #2
; Function Attrs: nounwind readnone speculatable
declare i32 @llvm.amdgcn.workgroup.id.x() unnamed_addr #2
; Function Attrs: noreturn nounwind
declare void @llvm.trap() #3
attributes #0 = { nounwind nonlazybind "probe-stack"="__rust_probestack" "target-features"="+dpp,+s-memrealtime,+trap-handler,+16-bit-insts" }
attributes #1 = { nounwind nonlazybind "probe-stack"="__rust_probestack" "target-cpu"="gfx803" "target-features"="+dpp,+s-memrealtime,+trap-handler,+16-bit-insts" }
attributes #2 = { nounwind readnone speculatable }
attributes #3 = { noreturn nounwind }
attributes #4 = { nounwind }
!llvm.module.flags = !{!0, !1}
!0 = !{i32 7, !"PIE Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!3, !5, !7}
!3 = distinct !{!3, !4, !"_ZN94_$LT$legionella_std..workitem..AxisDimX$u20$as$u20$legionella_std..workitem..WorkGroupAxis$GT$14workgroup_size17h2eda80386d22ece3E: %p"}
!4 = distinct !{!4, !"_ZN94_$LT$legionella_std..workitem..AxisDimX$u20$as$u20$legionella_std..workitem..WorkGroupAxis$GT$14workgroup_size17h2eda80386d22ece3E"}
!5 = distinct !{!5, !6, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$9global_id17hab4fbc456f370bafE: %self"}
!6 = distinct !{!6, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$9global_id17hab4fbc456f370bafE"}
!7 = distinct !{!7, !8, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$11global_id_x17h0d11ea891cad49ecE: %self"}
!8 = distinct !{!8, !"_ZN14legionella_std8workitem48_$LT$impl$u20$legionella_std..DispatchPacket$GT$11global_id_x17h0d11ea891cad49ecE"}
.text
.hsa_code_object_version 2,1
.hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
.section .text._ZN7ndarray14vector_foreach17h800258d818eaa10aE,#alloc,#execinstr
.globl _ZN7ndarray14vector_foreach17h800258d818eaa10aE
.p2align 8
.type _ZN7ndarray14vector_foreach17h800258d818eaa10aE,@function
.amdgpu_hsa_kernel _ZN7ndarray14vector_foreach17h800258d818eaa10aE
_ZN7ndarray14vector_foreach17h800258d818eaa10aE:
.amd_kernel_code_t
amd_code_version_major = 1
amd_code_version_minor = 2
amd_machine_kind = 1
amd_machine_version_major = 8
amd_machine_version_minor = 0
amd_machine_version_stepping = 3
kernel_code_entry_byte_offset = 256
kernel_code_prefetch_byte_size = 0
granulated_workitem_vgpr_count = 1
granulated_wavefront_sgpr_count = 1
priority = 0
float_mode = 192
priv = 0
enable_dx10_clamp = 1
debug_mode = 0
enable_ieee_mode = 1
enable_sgpr_private_segment_wave_byte_offset = 0
user_sgpr_count = 10
enable_trap_handler = 0
enable_sgpr_workgroup_id_x = 1
enable_sgpr_workgroup_id_y = 0
enable_sgpr_workgroup_id_z = 0
enable_sgpr_workgroup_info = 0
enable_vgpr_workitem_id = 0
enable_exception_msb = 0
granulated_lds_size = 0
enable_exception = 0
enable_sgpr_private_segment_buffer = 1
enable_sgpr_dispatch_ptr = 1
enable_sgpr_queue_ptr = 1
enable_sgpr_kernarg_segment_ptr = 1
enable_sgpr_dispatch_id = 0
enable_sgpr_flat_scratch_init = 0
enable_sgpr_private_segment_size = 0
enable_sgpr_grid_workgroup_count_x = 0
enable_sgpr_grid_workgroup_count_y = 0
enable_sgpr_grid_workgroup_count_z = 0
enable_ordered_append_gds = 0
private_element_size = 1
is_ptr64 = 1
is_dynamic_callstack = 0
is_debug_enabled = 0
is_xnack_enabled = 0
workitem_private_segment_byte_size = 0
workgroup_group_segment_byte_size = 0
gds_segment_byte_size = 0
kernarg_segment_byte_size = 28
workgroup_fbarrier_count = 0
wavefront_sgpr_count = 13
workitem_vgpr_count = 5
reserved_vgpr_first = 0
reserved_vgpr_count = 0
reserved_sgpr_first = 0
reserved_sgpr_count = 0
debug_wavefront_private_segment_offset_sgpr = 0
debug_private_segment_buffer_sgpr = 0
kernarg_segment_alignment = 4
group_segment_alignment = 4
private_segment_alignment = 4
wavefront_size = 6
call_convention = -1
runtime_loader_kernel_symbol = 0
.end_amd_kernel_code_t
s_cmp_lg_u64 s[4:5], 0
s_cbranch_scc0 BB0_4
s_add_u32 s4, s4, 4
s_addc_u32 s5, s5, 0
v_mov_b32_e32 v1, s4
v_mov_b32_e32 v2, s5
flat_load_ushort v2, v[1:2]
v_mov_b32_e32 v1, 0
s_load_dwordx2 s[2:3], s[8:9], 0x0
s_load_dwordx2 s[0:1], s[8:9], 0x8
s_load_dwordx2 s[6:7], s[8:9], 0x10
s_waitcnt lgkmcnt(0)
v_mov_b32_e32 v3, s3
s_waitcnt vmcnt(0)
v_mad_u64_u32 v[0:1], s[4:5], v2, s10, v[0:1]
v_cmp_gt_u64_e64 s[0:1], s[0:1], v[0:1]
v_mul_lo_i32 v1, s6, v1
v_mul_hi_u32 v2, s6, v0
v_mul_lo_i32 v4, s7, v0
v_mul_lo_i32 v0, s6, v0
v_add_u32_e32 v1, vcc, v2, v1
v_add_u32_e32 v1, vcc, v4, v1
v_lshlrev_b64 v[0:1], 2, v[0:1]
v_add_u32_e32 v0, vcc, s2, v0
v_addc_u32_e32 v1, vcc, v3, v1, vcc
v_cmp_ne_u64_e32 vcc, 0, v[0:1]
s_and_b64 s[0:1], s[0:1], vcc
s_and_saveexec_b64 s[2:3], s[0:1]
s_cbranch_execz BB0_3
BB0_2:
flat_load_dword v2, v[0:1]
s_load_dword s0, s[8:9], 0x18
s_waitcnt vmcnt(0) lgkmcnt(0)
v_add_f32_e32 v2, 1.0, v2
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mad_f32 v2, v2, s0, 1.0
v_mul_f32_e32 v2, s0, v2
flat_store_dword v[0:1], v2
BB0_3:
s_endpgm
BB0_4:
s_mov_b64 s[0:1], s[6:7]
s_trap 2
s_trap 2
.Lfunc_end0:
.size _ZN7ndarray14vector_foreach17h800258d818eaa10aE, .Lfunc_end0-_ZN7ndarray14vector_foreach17h800258d818eaa10aE
.section ".note.GNU-stack"
.amd_amdgpu_isa "amdgcn-amd-amdhsa-amdgiz-gfx803"
.amd_amdgpu_hsa_metadata
---
Version: [ 1, 0 ]
Kernels:
- Name: _ZN7ndarray14vector_foreach17h800258d818eaa10aE
SymbolName: '_ZN7ndarray14vector_foreach17h800258d818eaa10aE@kd'
Args:
- Size: 24
Align: 8
ValueKind: ByValue
ValueType: Struct
- Name: value
Size: 4
Align: 4
ValueKind: ByValue
ValueType: F32
CodeProps:
KernargSegmentSize: 28
GroupSegmentFixedSize: 0
PrivateSegmentFixedSize: 0
KernargSegmentAlign: 8
WavefrontSize: 64
NumSGPRs: 13
NumVGPRs: 5
MaxFlatWorkGroupSize: 256
...
.end_amd_amdgpu_hsa_metadata
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment