Skip to content

Instantly share code, notes, and snippets.

Lucas C Wilcox lcw

Block or report user

Report or block lcw

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View 1d_lu_benchmarks.md

Benchmarking Direct Columnwise Solvers

Here we are benchmarking the direct columnwise solvers using CLIMA version 4b8f986deb4f8c5d9db3db16083a0518bc971bba.

Global Simulations

The goal here is to test the cost of the solvers per time step. We leave it to other studies to see if the expected time step size can be obtained.

View forward!_1.lowered.jl
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
View forward!_1.lowered.jl
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
@lcw
lcw / forward!_1.lowered.jl
Created Oct 24, 2019
forward subs notspilling
View forward!_1.lowered.jl
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
@lcw
lcw / forward!_1.opt.ll
Created Oct 24, 2019
forward subs spilling
View forward!_1.opt.ll
; ModuleID = 'overdub'
source_filename = "overdub"
target triple = "nvptx64-nvidia-cuda"
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
@lcw
lcw / forward!_1.opt.ll
Created Oct 24, 2019
forward subs spilling
View forward!_1.opt.ll
; ModuleID = 'overdub'
source_filename = "overdub"
target triple = "nvptx64-nvidia-cuda"
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #0
; Function Attrs: nounwind readnone
declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #0
@lcw
lcw / forward!_1.lowered.jl
Created Oct 24, 2019
forward subs spilling
View forward!_1.lowered.jl
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
View forward!_1.lowered.jl
CodeInfo(
@ /home/lucas/research/code/Heptapus.jl/examples/banded/try.jl:17 within `forward!'
1 ── #self# = Core.getfield(##overdub_arguments#372, 1)
│ b = Core.getfield(##overdub_arguments#372, 2)
│ L = Core.getfield(##overdub_arguments#372, 3)
│ #unused#@_7 = Core.getfield(##overdub_arguments#372, 4)
│ #unused#@_8 = Core.getfield(##overdub_arguments#372, 5)
│ #unused#@_9 = Core.getfield(##overdub_arguments#372, 6)
│ #unused#@_10 = Core.getfield(##overdub_arguments#372, 7)
│ Core.NewvarNode(:(val))
View try.jl
using GPUifyLoops, Cthulhu, CuArrays, CUDAnative
function kernel!(A, B)
@inbounds @loop for i in (1:size(A,1);
(blockIdx().x-1)*blockDim().x + threadIdx().x)
A[i] = B[i]
end
nothing
end
You can’t perform that action at this time.