Skip to content

Instantly share code, notes, and snippets.

CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
@lcw
lcw / forward!_1.lowered.jl
Created October 24, 2019 21:59
forward subs notspilling
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
@lcw
lcw / forward!_1.opt.ll
Created October 24, 2019 21:58
forward subs spilling
; ModuleID = 'overdub'
source_filename = "overdub"
target triple = "nvptx64-nvidia-cuda"
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
@lcw
lcw / forward!_1.opt.ll
Created October 24, 2019 21:56
forward subs spilling
; ModuleID = 'overdub'
source_filename = "overdub"
target triple = "nvptx64-nvidia-cuda"
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
@lcw
lcw / forward!_1.lowered.jl
Created October 24, 2019 21:55
forward subs spilling
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
@lcw
lcw / try.jl
Created August 23, 2019 20:51
broken?
using GPUifyLoops, Cthulhu, CuArrays, CUDAnative
function kernel!(A, B)
@inbounds @loop for i in (1:size(A,1);
(blockIdx().x-1)*blockDim().x + threadIdx().x)
A[i] = B[i]
end
nothing
end
─────────────────────────────────────────────────────────────────────────────==26324== Profiling application: julia --project=env/gpu test/DGmethods/compressible_Navier_Stokes/dycoms3d.jl
==26324== Profiling result:
==26324== Metric result:
Invocations Metric Name Metric Description Min Max Avg
Device "Tesla V100-SXM2-16GB (0)"
Kernel: ptxcall_knl_dof_iteration__6
55 inst_per_warp Instructions per warp 5.4765e+03 5.6132e+03 5.4946e+03
55 branch_efficiency Branch Efficiency 99.36% 99.41% 99.40%
55 warp_execution_efficiency Warp Execution Efficiency 81.70% 83.28% 83.09%
55 warp_nonpred_execution_efficiency Warp Non-Predicated Execution Efficiency 78.49% 80.00%