Skip to content

Instantly share code, notes, and snippets.

@appleparan
Created May 20, 2019 17:17
Show Gist options
  • Save appleparan/767520b35cb58c86efaf7494fd6c9c34 to your computer and use it in GitHub Desktop.
Save appleparan/767520b35cb58c86efaf7494fd6c9c34 to your computer and use it in GitHub Desktop.
┌ Debug: Initializing CUDA after call to cuMemAlloc
└ @ CUDAnative ~/.julia/packages/CUDAnative/wU0tS/src/init.jl:32
┌ Debug: (Re)compiling function
│ job = CUDAnative.CompilerJob(getfield(GPUArrays, Symbol("##23#24"))(), Tuple{CuArrays.CuKernelState,CuDeviceArray{Float32,1,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(CUDAnative.abs),Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}}, v"6.1.0", true, nothing, nothing, nothing, nothing)
└ @ CUDAnative ~/.julia/packages/CUDAnative/wU0tS/src/compiler/driver.jl:36
┌ Debug: Compiled getfield(GPUArrays, Symbol("##23#24"))() to PTX 6.1.0 for SM 6.1.0 using 11 registers.
│ Memory usage: 0 bytes local, 0 bytes shared, 256 bytes constant
└ @ CUDAnative ~/.julia/packages/CUDAnative/wU0tS/src/execution.jl:380
┌ Debug: (Re)compiling function
│ job = CUDAnative.CompilerJob(getfield(GPUArrays, Symbol("##23#24"))(), Tuple{CuArrays.CuKernelState,CuDeviceArray{Float32,1,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(float),Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}}}}, v"6.1.0", true, nothing, nothing, nothing, nothing)
└ @ CUDAnative ~/.julia/packages/CUDAnative/wU0tS/src/compiler/driver.jl:36
┌ Debug: Compiled getfield(GPUArrays, Symbol("##23#24"))() to PTX 6.1.0 for SM 6.1.0 using 11 registers.
│ Memory usage: 0 bytes local, 0 bytes shared, 256 bytes constant
└ @ CUDAnative ~/.julia/packages/CUDAnative/wU0tS/src/execution.jl:380
┌ Debug: (Re)compiling function
│ job = CUDAnative.CompilerJob(getfield(GPUArrays, Symbol("##23#24"))(), Tuple{CuArrays.CuKernelState,CuDeviceArray{Float32,1,CUDAnative.AS.Global},Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(+),Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}},Float32}}}, v"6.1.0", true, nothing, nothing, nothing, nothing)
└ @ CUDAnative ~/.julia/packages/CUDAnative/wU0tS/src/compiler/driver.jl:36
┌ Debug: Compiled getfield(GPUArrays, Symbol("##23#24"))() to PTX 6.1.0 for SM 6.1.0 using 11 registers.
│ Memory usage: 0 bytes local, 0 bytes shared, 256 bytes constant
└ @ CUDAnative ~/.julia/packages/CUDAnative/wU0tS/src/execution.jl:380
ERROR: LoadError: LLVM error: Program used external function '__nv_fabsf' which could not be resolved!
Stacktrace:
[1] handle_error(::Cstring) at /home/appleparan/.julia/packages/LLVM/tg8MX/src/core/context.jl:103
[2] broadcast(::typeof(Tracker.partial), ::Base.RefValue{typeof(CUDAnative.abs)}, ::CuArray{Float32,1}, ::Int64, ::Vararg{Any,N} where N) at ./broadcast.jl:707
[3] ∇broadcast at /home/appleparan/.julia/packages/Tracker/RRYy6/src/lib/array.jl:484 [inlined]
[4] copy(::Base.Broadcast.Broadcasted{Tracker.TrackedStyle,Tuple{Base.OneTo{Int64}},typeof(Tracker.partial),Tuple{Base.RefValue{typeof(CUDAnative.abs)},CuArray{Float32,1},Int64,TrackedArray{…,CuArray{Float32,1}}}}) at /home/appleparan/.julia/packages/Tracker/RRYy6/src/lib/array.jl:515
[5] materialize at ./broadcast.jl:753 [inlined]
[6] #547 at /home/appleparan/.julia/packages/Tracker/RRYy6/src/lib/array.jl:488 [inlined]
[7] ntuple at ./tuple.jl:159 [inlined]
[8] (::getfield(Tracker, Symbol("#back#548")){1,typeof(CUDAnative.abs),Tuple{TrackedArray{…,CuArray{Float32,1}}}})(::CuArray{Float32,1}) at /home/appleparan/.julia/packages/Tracker/RRYy6/src/lib/array.jl:488
[9] back_(::Tracker.Call{getfield(Tracker, Symbol("#back#548")){1,typeof(CUDAnative.abs),Tuple{TrackedArray{…,CuArray{Float32,1}}}},Tuple{Tracker.Tracked{CuArray{Float32,1}}}}, ::CuArray{Float32,1}, ::Bool) at /home/appleparan/.julia/packages/Tracker/RRYy6/src/back.jl:35
[10] back(::Tracker.Tracked{CuArray{Float32,1}}, ::CuArray{Float32,1}, ::Bool) at /home/appleparan/.julia/packages/Tracker/RRYy6/src/back.jl:58
[11] #13 at /home/appleparan/.julia/packages/Tracker/RRYy6/src/back.jl:38 [inlined]
[12] foreach at ./abstractarray.jl:1867 [inlined]
[13] back_(::Tracker.Call{getfield(Tracker, Symbol("##482#483")){TrackedArray{…,CuArray{Float32,1}}},Tuple{Tracker.Tracked{CuArray{Float32,1}}}}, ::Float32, ::Bool) at /home/appleparan/.julia/packages/Tracker/RRYy6/src/back.jl:38
[14] back(::Tracker.Tracked{Float32}, ::Int64, ::Bool) at /home/appleparan/.julia/packages/Tracker/RRYy6/src/back.jl:58
[15] #back!#15 at /home/appleparan/.julia/packages/Tracker/RRYy6/src/back.jl:77 [inlined]
[16] #back! at ./none:0 [inlined]
[17] #back!#32 at /home/appleparan/.julia/packages/Tracker/RRYy6/src/lib/real.jl:16 [inlined]
[18] (::getfield(Tracker, Symbol("#kw##back!")))(::NamedTuple{(:once,),Tuple{Bool}}, ::typeof(back!), ::Tracker.TrackedReal{Float32}) at ./none:0
[19] top-level scope at /home/appleparan/.julia/packages/CUDAnative/wU0tS/src/reflection.jl:164
[20] include at ./boot.jl:326 [inlined]
[21] include_relative(::Module, ::String) at ./loading.jl:1038
[22] include(::Module, ::String) at ./sysimg.jl:29
[23] exec_options(::Base.JLOptions) at ./client.jl:267
[24] _start() at ./client.jl:436
in expression starting at /home/appleparan/src/Test_ML/test.jl:28
Body::Nothing
1 ── %1 = Base.llvmcall::Core.IntrinsicFunction
│ %2 = (%1)(Ptr{Nothing} @0x00000000024ba588, UInt32, Tuple{})::UInt32
│ %3 = (Core.zext_int)(Core.Int64, %2)::Int64
│ %4 = (Base.add_int)(%3, 1)::Int64
│ %5 = (Base.sub_int)(%4, 1)::Int64
│ %6 = Base.llvmcall::Core.IntrinsicFunction
│ %7 = (%6)(Ptr{Nothing} @0x00000000036eabf8, UInt32, Tuple{})::UInt32
│ %8 = (Core.zext_int)(Core.Int64, %7)::Int64
│ %9 = (Base.mul_int)(%5, %8)::Int64
│ %10 = Base.llvmcall::Core.IntrinsicFunction
│ %11 = (%10)(Ptr{Nothing} @0x00000000035d02d8, UInt32, Tuple{})::UInt32
│ %12 = (Core.zext_int)(Core.Int64, %11)::Int64
│ %13 = (Base.add_int)(%12, 1)::Int64
│ %14 = (Base.add_int)(%9, %13)::Int64
│ %15 = (Base.getfield)(dest, :shape)::Tuple{Int64}
│ %16 = (getfield)(%15, 1)::Int64
│ %17 = (Base.slt_int)(%16, %14)::Bool
└─── goto #3 if not %17
2 ── return
3 ── %20 = (Base.sub_int)(%14, 1)::Int64
│ %21 = (Base.add_int)(%20, 1)::Int64
│ %22 = (Core.tuple)(%21)::Tuple{Int64}
│ %23 = %new(CartesianIndex{1}, %22)::CartesianIndex{1}
└─── goto #8 if not false
4 ── %25 = (Base.getfield)(bc′, :axes)::Tuple{Base.OneTo{Int64}}
│ %26 = (Base.getfield)(%25, 1, true)::Base.OneTo{Int64}
│ %27 = (Base.sle_int)(1, %21)::Bool
│ %28 = (Base.getfield)(%26, :stop)::Int64
│ %29 = (Base.sle_int)(%21, %28)::Bool
│ %30 = (Base.and_int)(%27, %29)::Bool
│ %31 = (Base.and_int)(%30, true)::Bool
└─── goto #6 if not %31
5 ── goto #7
6 ── %34 = Base.throw_boundserror::typeof(Base.throw_boundserror)
│ %35 = (Core.tuple)(%23)::Tuple{CartesianIndex{1}}
│ invoke %34(_4::Base.Broadcast.Broadcasted{Nothing,Tuple{Base.OneTo{Int64}},typeof(+),Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}},Float32}}, %35::Tuple{CartesianIndex{1}})
└─── $(Expr(:unreachable))
7 ┄─ nothing
8 ┄─ %39 = (Base.getfield)(bc′, :args)::Tuple{Base.Broadcast.Extruded{CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}},Float32}
│ %40 = (Base.getfield)(%39, 1, false)::Base.Broadcast.Extruded{CuDeviceArray{Float32,1,CUDAnative.AS.Global},Tuple{Bool},Tuple{Int64}}
│ %41 = (Base.getfield)(%40, :x)::CuDeviceArray{Float32,1,CUDAnative.AS.Global}
│ %42 = (Base.getfield)(%40, :keeps)::Tuple{Bool}
│ %43 = (Base.getfield)(%40, :defaults)::Tuple{Int64}
│ %44 = (Base.getfield)(%42, 1, true)::Bool
│ %45 = (Base.getfield)(%43, 1, true)::Int64
│ %46 = (Base.Broadcast.ifelse)(%44, %21, %45)::Int64
└─── goto #13 if not false
9 ── %48 = (Core.tuple)(%46)::Tuple{Int64}
│ %49 = (Base.getfield)(%41, :shape)::Tuple{Int64}
│ %50 = (Base.getfield)(%49, 1, true)::Int64
│ %51 = (Base.slt_int)(%50, 0)::Bool
│ %52 = (Base.ifelse)(%51, 0, %50)::Int64
│ %53 = (Base.sle_int)(1, %46)::Bool
│ %54 = (Base.sle_int)(%46, %52)::Bool
│ %55 = (Base.and_int)(%53, %54)::Bool
└─── goto #11 if not %55
10 ─ goto #12
11 ─ invoke Base.throw_boundserror(%41::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, %48::Tuple{Int64})
└─── $(Expr(:unreachable))
12 ┄ nothing
13 ┄ %61 = (Base.getfield)(%41, :ptr)::CUDAnative.DevicePtr{Float32,CUDAnative.AS.Global}
│ %62 = Base.llvmcall::Core.IntrinsicFunction
│ %63 = (Base.sub_int)(%46, 1)::Int64
│ %64 = (%62)(Ptr{Nothing} @0x000000000375b7d8, Float32, Tuple{CUDAnative.DevicePtr{Float32,CUDAnative.AS.Global},Int64}, %61, %63)::Float32
└─── goto #14
14 ─ goto #15
15 ─ goto #16
16 ─ goto #17
17 ─ %69 = (getfield)(%39, 2)::Float32
└─── goto #18
18 ─ %71 = (Base.add_float)(%64, %69)::Float32
└─── goto #19
19 ─ goto #20
20 ─ goto #25 if not false
21 ─ %75 = (Core.tuple)(%21)::Tuple{Int64}
│ %76 = (Base.getfield)(dest, :shape)::Tuple{Int64}
│ %77 = (Base.getfield)(%76, 1, true)::Int64
│ %78 = (Base.slt_int)(%77, 0)::Bool
│ %79 = (Base.ifelse)(%78, 0, %77)::Int64
│ %80 = (Base.sle_int)(1, %21)::Bool
│ %81 = (Base.sle_int)(%21, %79)::Bool
│ %82 = (Base.and_int)(%80, %81)::Bool
└─── goto #23 if not %82
22 ─ goto #24
23 ─ invoke Base.throw_boundserror(_3::CuDeviceArray{Float32,1,CUDAnative.AS.Global}, %75::Tuple{Int64})
└─── $(Expr(:unreachable))
24 ┄ nothing
25 ┄ %88 = (Base.getfield)(dest, :ptr)::CUDAnative.DevicePtr{Float32,CUDAnative.AS.Global}
│ %89 = Base.llvmcall::Core.IntrinsicFunction
│ %90 = (Base.sub_int)(%21, 1)::Int64
│ (%89)(Ptr{Nothing} @0x0000000003c5d8a8, Nothing, Tuple{CUDAnative.DevicePtr{Float32,CUDAnative.AS.Global},Float32,Int64}, %88, %71, %90)
└─── goto #26
26 ─ goto #27
27 ─ goto #28
28 ─ return
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment