Skip to content

Instantly share code, notes, and snippets.

@maxwindiff
Last active January 1, 2023 07:43
Show Gist options
  • Save maxwindiff/11d141f9896254fa2bd1c8d860b3ec14 to your computer and use it in GitHub Desktop.
Save maxwindiff/11d141f9896254fa2bd1c8d860b3ec14 to your computer and use it in GitHub Desktop.
julia> Metal.code_llvm(load_store, Tuple{MtlDeviceArray{Float32}, MtlDeviceArray{Float32}})
; @ REPL[4]:1 within `load_store`
define void @julia_load_store_1697({}* nonnull %0, {}* nonnull %1) local_unnamed_addr #0 {
top:
%2 = alloca {}*, i32 2, align 8
; @ REPL[4]:2 within `load_store`
; ┌ @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 within `#simdgroup_load` @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9
; │┌ @ Base.jl:38 within `getproperty`
%3 = getelementptr inbounds {}*, {}** %2, i32 0
store {}* %0, {}** %3, align 8
%4 = getelementptr inbounds {}*, {}** %2, i32 1
store {}* inttoptr (i64 4380189320 to {}*), {}** %4, align 8
%5 = call nonnull {}* @jl_f_getfield({}* null, {}** %2, i32 2)
; │└
%6 = bitcast {}* %5 to i64*
%7 = getelementptr inbounds i64, i64* %6, i64 -1
%8 = load atomic i64, i64* %7 unordered, align 8
%9 = and i64 %8, -16
%10 = inttoptr i64 %9 to {}*
%11 = icmp eq {}* %10, inttoptr (i64 4560188640 to {}*)
br i1 %11, label %pass, label %fail
fail: ; preds = %top
call fastcc void @gpu_report_exception() #2
call fastcc void @gpu_signal_exception() #2
call void @llvm.trap()
unreachable
pass: ; preds = %top
call fastcc void @gpu_report_exception() #2
call fastcc void @gpu_signal_exception() #2
call void @llvm.trap()
unreachable
; └
}
julia> @device_code_llvm @metal threads=(8, 8) load_store(a, b)
; CompilerJob of kernel #load_store(MtlDeviceMatrix{Float32, 1}, MtlDeviceMatrix{Float32, 1}) for GPUCompiler.MetalCompilerTarget
; @ REPL[4]:1 within `load_store`
define cc103 void @_Z21julia_load_store_350614MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE({ i8 addrspace(1)*, [2 x i64] } addrspace(1)* %0, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1) local_unnamed_addr #1 {
conversion:
; @ REPL[4]:2 within `load_store`
; ┌ @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 within `#simdgroup_load` @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9
call fastcc void @gpu_report_exception() #2
call fastcc void @gpu_signal_exception() #2
call void @llvm.trap()
unreachable
; └
}
julia> @device_code_metal @metal threads=(8, 8) load_store(a, b)
// CompilerJob of kernel #load_store(MtlDeviceMatrix{Float32, 1}, MtlDeviceMatrix{Float32, 1}) for GPUCompiler.MetalCompilerTarget
[header]
container version: 1.8.0
unknown version: 2.0.0
unknown: 5
zero?: 0
length: 3056
programs_offset: 88
programs_length: 192
reflection_offset: 288
reflection_length: 8
debug_offset: 296
debug_length: 8
bitcode_offset: 304
bitcode_length: 2752
program_count: 1
################################################################################
[program]
name: _Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE
type: kernel
version: 2.4.0
language: 2.4.0
rel offsets (refl, dbg, bc): 0, 0, 0
bitcode size: 2752
hash: FBDB066CBAE4E373C091744476C92BB6AC2487AF3A0856B7052786DA012CEEED
tess info: 0
soffset: 0
offset: 304
size: 2752, 2752
; ModuleID = 'bc_module'
source_filename = "text"
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32"
target triple = "air64-apple-macosx13.1.0"
; Function Attrs: cold noreturn nounwind
declare void @llvm.trap() #0
declare void @gpu_report_exception() unnamed_addr
declare void @gpu_signal_exception() unnamed_addr
define void @_Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE({ i8 addrspace(1)*, [2 x i64] } addrspace(1)* %0, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1) local_unnamed_addr {
conversion:
tail call void @gpu_report_exception()
tail call void @gpu_signal_exception()
tail call void @llvm.trap()
unreachable
}
attributes #0 = { cold noreturn nounwind }
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
!julia.kernel = !{!9}
!air.kernel = !{!10}
!llvm.ident = !{!15}
!air.version = !{!16}
!air.language_version = !{!17}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 7, !"air.max_device_buffers", i32 31}
!3 = !{i32 7, !"air.max_constant_buffers", i32 31}
!4 = !{i32 7, !"air.max_threadgroup_buffers", i32 31}
!5 = !{i32 7, !"air.max_textures", i32 128}
!6 = !{i32 7, !"air.max_read_write_textures", i32 8}
!7 = !{i32 7, !"air.max_samplers", i32 16}
!8 = !{i32 2, !"SDK Version", [2 x i32] [i32 13, i32 1]}
!9 = !{void ({ i8 addrspace(1)*, [2 x i64] } addrspace(1)*, { i8 addrspace(1)*, [2 x i64] } addrspace(1)*)* @_Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE}
!10 = !{void ({ i8 addrspace(1)*, [2 x i64] } addrspace(1)*, { i8 addrspace(1)*, [2 x i64] } addrspace(1)*)* @_Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE, !11, !12}
!11 = !{}
!12 = !{!13, !14}
!13 = !{i32 0, !"air.buffer", !"air.location_index", i32 0, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 24, !"air.arg_type_align_size", i32 8}
!14 = !{i32 1, !"air.buffer", !"air.location_index", i32 1, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 24, !"air.arg_type_align_size", i32 8}
!15 = !{!"Apple metal version 31001.322 (metalfe-31001.322.1)"}
!16 = !{i32 2, i32 4, i32 0}
!17 = !{!"Metal", i32 2, i32 4, i32 0}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment