Skip to content

Instantly share code, notes, and snippets.

@DhairyaLGandhi
Last active March 12, 2021 19:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DhairyaLGandhi/12d06b9389e13130cf4c03c67885be91 to your computer and use it in GitHub Desktop.
Save DhairyaLGandhi/12d06b9389e13130cf4c03c67885be91 to your computer and use it in GitHub Desktop.
CUDNN Test
using BenchmarkTools, TimerOutputs
using Flux, Metalhead
using Flux.CUDA
function fw(m, ip)
CUDA.@sync m(ip)
end
function benchmark_cu(batchsize = 64)
resnet = ResNet()
ip = rand(Float32, 224, 224, 3, batchsize)
# gresnet = resnet |> gpu
# gip = gpu(ip)
b = @benchmarkable(
fw(gresnet, gip),
setup=(gresnet = $resnet |> gpu;
gip = gpu($ip)),
teardown=(GC.gc(); CUDA.reclaim()))
display(run(b))
println()
end
function bw(m, ip)
CUDA.@sync gradient((m, x) -> sum(m(x)), m, ip)
end
function benchmark_bw_cu(batchsize = 64)
resnet = ResNet()
ip = rand(Float32, 224, 224, 3, batchsize)
# gresnet = resnet |> gpu
# gip = gpu(ip)
b = @benchmarkable(
bw(gresnet, gip),
setup=(gresnet = $resnet |> gpu;
gip = gpu($ip)),
teardown=(GC.gc(); CUDA.reclaim()))
display(run(b))
println()
end
for n in (5, 15, 32, 64)
to = TimerOutput()
benchmark_bw_cu(n)
end
[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
GPUCompiler = "61eb1bfa-7361-4325-ad38-22787b887f55"
LLVM = "929cbde3-209d-540e-8aea-75f648917ca0"
Metalhead = "dbeba491-748d-5e0e-a39e-b530a07fa0cc"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment