Skip to content

Instantly share code, notes, and snippets.

@mfalt
Created March 3, 2022 12:09
Show Gist options
  • Save mfalt/080737690c39660b6051809d5240aca6 to your computer and use it in GitHub Desktop.
Save mfalt/080737690c39660b6051809d5240aca6 to your computer and use it in GitHub Desktop.
ERROR: LoadError: TaskFailedException
nested task error: Out of GPU memory trying to allocate 897.217 MiB
Effective GPU memory usage: 52.47% (5.726 GiB/10.913 GiB)
Memory pool usage: 3.117 GiB (3.656 GiB reserved)
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/CUDA/0IDh2/src/pool.jl:219 [inlined]
[2] macro expansion
@ ./timing.jl:299 [inlined]
[3] #_alloc#180
@ ~/.julia/packages/CUDA/0IDh2/src/pool.jl:187 [inlined]
[4] #alloc#179
@ ~/.julia/packages/CUDA/0IDh2/src/pool.jl:173 [inlined]
[5] alloc
@ ~/.julia/packages/CUDA/0IDh2/src/pool.jl:169 [inlined]
[6] CuArray
@ ~/.julia/packages/CUDA/0IDh2/src/array.jl:44 [inlined]
[7] CuArray
@ ~/.julia/packages/CUDA/0IDh2/src/array.jl:125 [inlined]
[8] CuArray
@ ~/.julia/packages/CUDA/0IDh2/src/array.jl:132 [inlined]
[9] with_workspace(f::CUDA.CUDNN.var"#1150#1153"{Vector{CUDA.CUDNN.cudnnConvolutionFwdAlgoPerfStruct}, Vector{Int32}, Int64, CUDA.CUDNN.cudnnTensorDescriptor, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnFilterDescriptor, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnConvolutionDescriptor, CUDA.CUDNN.cudnnTensorDescriptor, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}}, eltyp::Type{UInt8}, size::CUDA.CUDNN.var"#workspaceSize#1152"{CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}}, fallback::Nothing; keep::Bool)
@ CUDA.APIUtils ~/.julia/packages/CUDA/0IDh2/lib/utils/call.jl:65
[10] with_workspace
@ ~/.julia/packages/CUDA/0IDh2/lib/utils/call.jl:58 [inlined]
[11] #with_workspace#1
@ ~/.julia/packages/CUDA/0IDh2/lib/utils/call.jl:53 [inlined]
[12] with_workspace (repeats 2 times)
@ ~/.julia/packages/CUDA/0IDh2/lib/utils/call.jl:53 [inlined]
[13] #1149
@ ~/.julia/packages/CUDA/0IDh2/lib/cudnn/convolution.jl:179 [inlined]
[14] get!(default::CUDA.CUDNN.var"#1149#1151"{CUDA.CUDNN.cudnnTensorDescriptor, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnFilterDescriptor, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CUDNN.cudnnConvolutionDescriptor, CUDA.CUDNN.cudnnTensorDescriptor, CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}}, h::Dict{Tuple, CUDA.CUDNN.cudnnConvolutionFwdAlgoPerfStruct}, key::Tuple{CUDA.CUDNN.cudnnTensorDescriptor, CUDA.CUDNN.cudnnFilterDescriptor, CUDA.CUDNN.cudnnConvolutionDescriptor, Nothing, CUDA.CUDNN.cudnnActivationMode_t})
@ Base ./dict.jl:464
[15] cudnnConvolutionFwdAlgoPerf(xDesc::CUDA.CUDNN.cudnnTensorDescriptor, x::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, wDesc::CUDA.CUDNN.cudnnFilterDescriptor, w::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, convDesc::CUDA.CUDNN.cudnnConvolutionDescriptor, yDesc::CUDA.CUDNN.cudnnTensorDescriptor, y::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, biasDesc::Nothing, activation::CUDA.CUDNN.cudnnActivationMode_t)
@ CUDA.CUDNN ~/.julia/packages/CUDA/0IDh2/lib/cudnn/convolution.jl:174
[16] cudnnConvolutionForwardAD(w::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, x::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, bias::Nothing, z::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}; y::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, activation::CUDA.CUDNN.cudnnActivationMode_t, convDesc::CUDA.CUDNN.cudnnConvolutionDescriptor, wDesc::CUDA.CUDNN.cudnnFilterDescriptor, xDesc::CUDA.CUDNN.cudnnTensorDescriptor, yDesc::CUDA.CUDNN.cudnnTensorDescriptor, zDesc::CUDA.CUDNN.cudnnTensorDescriptor, biasDesc::Nothing, alpha::Base.RefValue{Float32}, beta::Base.RefValue{Float32}, dw::Base.RefValue{Any}, dx::Base.RefValue{Any}, dz::Base.RefValue{Any}, dbias::Base.RefValue{Any}, dready::Base.RefValue{Bool})
@ CUDA.CUDNN ~/.julia/packages/CUDA/0IDh2/lib/cudnn/convolution.jl:102
[17] #cudnnConvolutionForwardWithDefaults#1143
@ ~/.julia/packages/CUDA/0IDh2/lib/cudnn/convolution.jl:96 [inlined]
[18] #cudnnConvolutionForward!#1142
@ ~/.julia/packages/CUDA/0IDh2/lib/cudnn/convolution.jl:53 [inlined]
[19] conv!(y::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, x::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, w::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cdims::DenseConvDims{2, (5, 5), 3, 6, 1, (1, 1), (0, 0, 0, 0), (1, 1), false}; alpha::Int64, beta::Int64, algo::Int64)
@ NNlibCUDA ~/.julia/packages/NNlibCUDA/IeeBk/src/cudnn/conv.jl:34
[20] conv!
@ ~/.julia/packages/NNlibCUDA/IeeBk/src/cudnn/conv.jl:27 [inlined]
[21] conv(x::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, w::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cdims::DenseConvDims{2, (5, 5), 3, 6, 1, (1, 1), (0, 0, 0, 0), (1, 1), false}; kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ NNlib ~/.julia/packages/NNlib/tvMmZ/src/conv.jl:91
[22] conv(x::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, w::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, cdims::DenseConvDims{2, (5, 5), 3, 6, 1, (1, 1), (0, 0, 0, 0), (1, 1), false})
@ NNlib ~/.julia/packages/NNlib/tvMmZ/src/conv.jl:89
[23] (::Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}})(x::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
@ Flux ~/.julia/packages/Flux/BPPNj/src/layers/conv.jl:166
[24] applychain(fs::Tuple{Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, MaxPool{2, 4}, Conv{2, 4, typeof(relu), CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, MaxPool{2, 4}, typeof(flatten), Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(relu), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}, Dense{typeof(identity), CUDA.CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, CUDA.CuArray{Float32, 1, CUDA.Mem.DeviceBuffer}}}, x::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
@ Flux ~/.julia/packages/Flux/BPPNj/src/layers/basic.jl:47
[25] Chain
@ ~/.julia/packages/Flux/BPPNj/src/layers/basic.jl:49 [inlined]
[26] inference(imgs::CUDA.CuArray{Float32, 4, CUDA.Mem.DeviceBuffer})
@ Main ~/knightvision/piece_recognition/KnightVisionServer/test/test_cuda.jl:15
[27] (::var"#5#6")()
@ Main ./threadingconstructs.jl:178
...and 11 more exceptions.
Stacktrace:
[1] sync_end(c::Channel{Any})
@ Base ./task.jl:381
[2] top-level scope
@ task.jl:400
in expression starting at /home/mattias/knightvision/piece_recognition/KnightVisionServer/test/test_cuda.jl:25
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment