Last active
January 1, 2023 07:43
-
-
Save maxwindiff/11d141f9896254fa2bd1c8d860b3ec14 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
julia> Metal.code_llvm(load_store, Tuple{MtlDeviceArray{Float32}, MtlDeviceArray{Float32}}) | |
; @ REPL[4]:1 within `load_store` | |
define void @julia_load_store_1697({}* nonnull %0, {}* nonnull %1) local_unnamed_addr #0 { | |
top: | |
%2 = alloca {}*, i32 2, align 8 | |
; @ REPL[4]:2 within `load_store` | |
; ┌ @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 within `#simdgroup_load` @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 | |
; │┌ @ Base.jl:38 within `getproperty` | |
%3 = getelementptr inbounds {}*, {}** %2, i32 0 | |
store {}* %0, {}** %3, align 8 | |
%4 = getelementptr inbounds {}*, {}** %2, i32 1 | |
store {}* inttoptr (i64 4380189320 to {}*), {}** %4, align 8 | |
%5 = call nonnull {}* @jl_f_getfield({}* null, {}** %2, i32 2) | |
; │└ | |
%6 = bitcast {}* %5 to i64* | |
%7 = getelementptr inbounds i64, i64* %6, i64 -1 | |
%8 = load atomic i64, i64* %7 unordered, align 8 | |
%9 = and i64 %8, -16 | |
%10 = inttoptr i64 %9 to {}* | |
%11 = icmp eq {}* %10, inttoptr (i64 4560188640 to {}*) | |
br i1 %11, label %pass, label %fail | |
fail: ; preds = %top | |
call fastcc void @gpu_report_exception() #2 | |
call fastcc void @gpu_signal_exception() #2 | |
call void @llvm.trap() | |
unreachable | |
pass: ; preds = %top | |
call fastcc void @gpu_report_exception() #2 | |
call fastcc void @gpu_signal_exception() #2 | |
call void @llvm.trap() | |
unreachable | |
; └ | |
} | |
julia> @device_code_llvm @metal threads=(8, 8) load_store(a, b) | |
; CompilerJob of kernel #load_store(MtlDeviceMatrix{Float32, 1}, MtlDeviceMatrix{Float32, 1}) for GPUCompiler.MetalCompilerTarget | |
; @ REPL[4]:1 within `load_store` | |
define cc103 void @_Z21julia_load_store_350614MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE({ i8 addrspace(1)*, [2 x i64] } addrspace(1)* %0, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1) local_unnamed_addr #1 { | |
conversion: | |
; @ REPL[4]:2 within `load_store` | |
; ┌ @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 within `#simdgroup_load` @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 @ /Users/kichi/.julia/dev/Metal/src/device/intrinsics/simd.jl:9 | |
call fastcc void @gpu_report_exception() #2 | |
call fastcc void @gpu_signal_exception() #2 | |
call void @llvm.trap() | |
unreachable | |
; └ | |
} | |
julia> @device_code_metal @metal threads=(8, 8) load_store(a, b) | |
// CompilerJob of kernel #load_store(MtlDeviceMatrix{Float32, 1}, MtlDeviceMatrix{Float32, 1}) for GPUCompiler.MetalCompilerTarget | |
[header] | |
container version: 1.8.0 | |
unknown version: 2.0.0 | |
unknown: 5 | |
zero?: 0 | |
length: 3056 | |
programs_offset: 88 | |
programs_length: 192 | |
reflection_offset: 288 | |
reflection_length: 8 | |
debug_offset: 296 | |
debug_length: 8 | |
bitcode_offset: 304 | |
bitcode_length: 2752 | |
program_count: 1 | |
################################################################################ | |
[program] | |
name: _Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE | |
type: kernel | |
version: 2.4.0 | |
language: 2.4.0 | |
rel offsets (refl, dbg, bc): 0, 0, 0 | |
bitcode size: 2752 | |
hash: FBDB066CBAE4E373C091744476C92BB6AC2487AF3A0856B7052786DA012CEEED | |
tess info: 0 | |
soffset: 0 | |
offset: 304 | |
size: 2752, 2752 | |
; ModuleID = 'bc_module' | |
source_filename = "text" | |
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-n8:16:32" | |
target triple = "air64-apple-macosx13.1.0" | |
; Function Attrs: cold noreturn nounwind | |
declare void @llvm.trap() #0 | |
declare void @gpu_report_exception() unnamed_addr | |
declare void @gpu_signal_exception() unnamed_addr | |
define void @_Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE({ i8 addrspace(1)*, [2 x i64] } addrspace(1)* %0, { i8 addrspace(1)*, [2 x i64] } addrspace(1)* %1) local_unnamed_addr { | |
conversion: | |
tail call void @gpu_report_exception() | |
tail call void @gpu_signal_exception() | |
tail call void @llvm.trap() | |
unreachable | |
} | |
attributes #0 = { cold noreturn nounwind } | |
!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} | |
!julia.kernel = !{!9} | |
!air.kernel = !{!10} | |
!llvm.ident = !{!15} | |
!air.version = !{!16} | |
!air.language_version = !{!17} | |
!0 = !{i32 2, !"Dwarf Version", i32 4} | |
!1 = !{i32 2, !"Debug Info Version", i32 3} | |
!2 = !{i32 7, !"air.max_device_buffers", i32 31} | |
!3 = !{i32 7, !"air.max_constant_buffers", i32 31} | |
!4 = !{i32 7, !"air.max_threadgroup_buffers", i32 31} | |
!5 = !{i32 7, !"air.max_textures", i32 128} | |
!6 = !{i32 7, !"air.max_read_write_textures", i32 8} | |
!7 = !{i32 7, !"air.max_samplers", i32 16} | |
!8 = !{i32 2, !"SDK Version", [2 x i32] [i32 13, i32 1]} | |
!9 = !{void ({ i8 addrspace(1)*, [2 x i64] } addrspace(1)*, { i8 addrspace(1)*, [2 x i64] } addrspace(1)*)* @_Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE} | |
!10 = !{void ({ i8 addrspace(1)*, [2 x i64] } addrspace(1)*, { i8 addrspace(1)*, [2 x i64] } addrspace(1)*)* @_Z21julia_load_store_403814MtlDeviceArrayI7Float32Li2ELi1EES_IS0_Li2ELi1EE, !11, !12} | |
!11 = !{} | |
!12 = !{!13, !14} | |
!13 = !{i32 0, !"air.buffer", !"air.location_index", i32 0, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 24, !"air.arg_type_align_size", i32 8} | |
!14 = !{i32 1, !"air.buffer", !"air.location_index", i32 1, i32 1, !"air.read_write", !"air.address_space", i32 1, !"air.arg_type_size", i32 24, !"air.arg_type_align_size", i32 8} | |
!15 = !{!"Apple metal version 31001.322 (metalfe-31001.322.1)"} | |
!16 = !{i32 2, i32 4, i32 0} | |
!17 = !{!"Metal", i32 2, i32 4, i32 0} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment