-
-
Notifications
You must be signed in to change notification settings - Fork 212
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
nested AD on CUDA array #1450
Comments
The same issue appears with Flux: using CUDA, Flux,Zygote, Optimisers
function mweFlux(dev)
D = Dense(5,1, relu) |> dev
ps, r = Optimisers.destructure(D)
x = rand(Float32,5, 2) |> dev
g(ps) = sum(abs2,only(gradient(x -> sum(r(ps)(x)),x)))
gradient(x->g(x),ps)
end julia> mweFlux(cpu)
┌ Warning: second derivatives of Restructure may not work yet, sorry!
└ @ Optimisers ~/.julia/packages/Optimisers/TxzMn/src/destructure.jl:166
(Float32[1.233114, -0.5352969, -1.307327, 0.31797743, 0.41771483, 0.0],)
julia> mweFlux(gpu)
ERROR: try/catch is not supported.
Refer to the Zygote documentation for fixes.
https://fluxml.ai/Zygote.jl/latest/limitations
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] instrument(ir::IRTools.Inner.IR)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:128
[3] #Primal#31
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:227 [inlined]
[4] Primal
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:226 [inlined]
[5] Zygote.Adjoint(ir::IRTools.Inner.IR; varargs::Nothing, normalise::Bool)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:352
[6] _generate_pullback_via_decomposition(T::Type, world::Nothing)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/emit.jl:101
[7] _generate_pullback(::Type, ::Nothing, ::Type, ::Type, ::Vararg{Type})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:27
[8] #s86#1607
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:102 [inlined]
[9] var"#s86#1607"(::Any, ctx::Any, f::Any, args::Any)
@ Zygote ./none:0
[10] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
@ Core ./boot.jl:602
[11] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:310 [inlined]
[12] _pullback(::Zygote.Context{false}, ::typeof(cufunction), ::GPUArrays.var"#broadcast_kernel#26", ::Type{Tuple{CUDA.CuKernelContext, CuDeviceMatrix{Float32, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, Int64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[13] macro expansion
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:104 [inlined]
[14] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:17 [inlined]
[15] _pullback(::Zygote.Context{false}, ::CUDA.var"##launch_heuristic#1080", ::Int64, ::Int64, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[16] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:838
[17] adjoint
@ ~/.julia/packages/Zygote/4rucm/src/lib/lib.jl:203 [inlined]
[18] adjoint(::Zygote.Context{false}, ::typeof(Core._apply_iterate), ::typeof(iterate), ::Function, ::Tuple{Int64, Int64, typeof(GPUArrays.launch_heuristic), CUDA.CuArrayBackend, GPUArrays.var"#broadcast_kernel#26"}, ::Tuple{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, Int64})
@ Zygote ./none:0
[19] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:66 [inlined]
[20] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:15 [inlined]
[21] _pullback(::Zygote.Context{false}, ::typeof(Core.kwcall), ::NamedTuple{(:elements, :elements_per_thread), Tuple{Int64, Int64}}, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[22] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:65 [inlined]
[23] _pullback(::Zygote.Context{false}, ::typeof(GPUArrays._copyto!), ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{2}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[24] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:41 [inlined]
[25] _pullback
@ ./broadcast.jl:881 [inlined]
[26] _pullback(::Zygote.Context{false}, ::typeof(Base.Broadcast.materialize!), ::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}, typeof(identity), Tuple{Float32}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[27] _pullback
@ ./broadcast.jl:877 [inlined]
[28] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/lib/broadcast.jl:369 [inlined]
[29] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#1453#1456"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[30] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:71 [inlined]
[31] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[32] _pullback
@ ./REPL[8]:5 [inlined]
... |
As the labels suggest, this is a problem with Zygote rules being hit while diffing GPU code which are not themselves differentuable. What's really needed is a MWE which doesn't use any libraries aside from Zygote and CUDA. |
Here is a MWE: using Zygote,CUDA
function MWE(D,x)
g(D) = sum(only(gradient(x -> sum(D*x),x)))
gradient(x->g(x),D)
end julia> D = randn(Float64, 1, 5);
julia> x = randn(Float64, 5);
julia> MWE(D,x)
([1.0 1.0 … 1.0 1.0],)
julia> D = CUDA.randn(Float64, 1, 5);
julia> x = CUDA.randn(Float64, 5);
julia> MWE(D,x)
ERROR: try/catch is not supported.
Refer to the Zygote documentation for fixes.
https://fluxml.ai/Zygote.jl/latest/limitations
Stacktrace:
[1] error(s::String)
@ Base ./error.jl:35
[2] instrument(ir::IRTools.Inner.IR)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:128
[3] #Primal#31
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:227 [inlined]
[4] Primal
@ ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:226 [inlined]
[5] Zygote.Adjoint(ir::IRTools.Inner.IR; varargs::Nothing, normalise::Bool)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/reverse.jl:352
[6] _generate_pullback_via_decomposition(T::Type, world::Nothing)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/emit.jl:101
[7] _generate_pullback(::Type, ::Nothing, ::Type, ::Type, ::Vararg{Type})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:27
[8] #s86#1607
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:102 [inlined]
[9] var"#s86#1607"(::Any, ctx::Any, f::Any, args::Any)
@ Zygote ./none:0
[10] (::Core.GeneratedFunctionStub)(::Any, ::Vararg{Any})
@ Core ./boot.jl:602
[11] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:310 [inlined]
[12] _pullback(::Zygote.Context{false}, ::typeof(cufunction), ::GPUArrays.var"#broadcast_kernel#26", ::Type{Tuple{CUDA.CuKernelContext, CuDeviceVector{Float64, 1}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, Int64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[13] macro expansion
@ ~/.julia/packages/CUDA/35NC6/src/compiler/execution.jl:104 [inlined]
[14] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:17 [inlined]
[15] _pullback(::Zygote.Context{false}, ::CUDA.var"##launch_heuristic#1080", ::Int64, ::Int64, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[16] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:838
[17] adjoint
@ ~/.julia/packages/Zygote/4rucm/src/lib/lib.jl:203 [inlined]
[18] adjoint(::Zygote.Context{false}, ::typeof(Core._apply_iterate), ::typeof(iterate), ::Function, ::Tuple{Int64, Int64, typeof(GPUArrays.launch_heuristic), CUDA.CuArrayBackend, GPUArrays.var"#broadcast_kernel#26"}, ::Tuple{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, Int64})
@ Zygote ./none:0
[19] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:66 [inlined]
[20] _pullback
@ ~/.julia/packages/CUDA/35NC6/src/gpuarrays.jl:15 [inlined]
[21] _pullback(::Zygote.Context{false}, ::typeof(Core.kwcall), ::NamedTuple{(:elements, :elements_per_thread), Tuple{Int64, Int64}}, ::typeof(GPUArrays.launch_heuristic), ::CUDA.CuArrayBackend, ::GPUArrays.var"#broadcast_kernel#26", ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}}, ::Int64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[22] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:65 [inlined]
[23] _pullback(::Zygote.Context{false}, ::typeof(GPUArrays._copyto!), ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{CUDA.CuArrayStyle{1}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[24] _pullback
@ ~/.julia/packages/GPUArrays/5XhED/src/host/broadcast.jl:41 [inlined]
[25] _pullback
@ ./broadcast.jl:881 [inlined]
[26] _pullback(::Zygote.Context{false}, ::typeof(Base.Broadcast.materialize!), ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}, ::Base.Broadcast.Broadcasted{Base.Broadcast.DefaultArrayStyle{0}, Tuple{Base.OneTo{Int64}}, typeof(identity), Tuple{Float64}})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[27] _pullback
@ ./broadcast.jl:877 [inlined]
[28] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/lib/broadcast.jl:369 [inlined]
[29] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[30] _pullback
@ ~/.julia/packages/ZygoteRules/OgCVT/src/adjoint.jl:71 [inlined]
[31] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[32] _pullback
@ ./REPL[17]:2 [inlined]
[33] _pullback(ctx::Zygote.Context{false}, f::Zygote.Pullback{Tuple{var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, Tuple{Zygote.var"#2184#back#303"{Zygote.var"#back#302"{:D, Zygote.Context{false}, var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.ZBack{ChainRules.var"#times_pullback#1481"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[34] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:45 [inlined]
[35] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#75#76"{Zygote.Pullback{Tuple{var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, Tuple{Zygote.var"#2184#back#303"{Zygote.var"#back#302"{:D, Zygote.Context{false}, var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}}, Zygote.ZBack{ChainRules.var"#times_pullback#1481"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, Zygote.var"#4229#back#1457"{Zygote.var"#1453#1456"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}}}}, args::Float64)
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[36] _pullback
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:97 [inlined]
[37] _pullback(::Zygote.Context{false}, ::typeof(gradient), ::var"#17#20"{CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}}, ::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[38] _pullback
@ ./REPL[17]:2 [inlined]
[39] _pullback(ctx::Zygote.Context{false}, f::var"#g#19"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[40] _pullback
@ ./REPL[17]:3 [inlined]
[41] _pullback(ctx::Zygote.Context{false}, f::var"#18#21"{var"#g#19"{CuArray{Float64, 1, CUDA.Mem.DeviceBuffer}}}, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface2.jl:0
[42] pullback(f::Function, cx::Zygote.Context{false}, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:44
[43] pullback
@ ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:42 [inlined]
[44] gradient(f::Function, args::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer})
@ Zygote ~/.julia/packages/Zygote/4rucm/src/compiler/interface.jl:96
[45] MWE(D::CuArray{Float64, 2, CUDA.Mem.DeviceBuffer}, x::CuArray{Float64, 1, CUDA.Mem.DeviceBuffer})
@ Main ./REPL[17]:3
[46] top-level scope
@ REPL[23]:1
[47] top-level scope
@ ~/.julia/packages/CUDA/35NC6/src/initialization.jl:190 |
As Zygote has evolved since I open this issue, I give a try to the propose MWE to see if things changed and I get a new error message julia> using Zygote,CUDA
julia> function MWE(D,x)
g(D) = sum(only(gradient(x -> sum(D*x),x)))
gradient(x->g(x),D)
end
MWE (generic function with 1 method)
julia> x = randn(Float32, 5);
julia> D = randn(Float32, 1, 5);
julia> MWE(D,x)
(Float32[1.0 1.0 … 1.0 1.0],)
julia> x = CUDA.randn(Float32, 5);
julia> D = CUDA.randn(Float32, 1, 5);
julia> MWE(D,x)
ERROR: `llvmcall` requires the compiler
Stacktrace:
[1] macro expansion
@ ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0 [inlined]
[2] _pullback(::Zygote.Context{…}, ::Core.IntrinsicFunction, ::Tuple{…}, ::Type{…}, ::Type{…}, ::Bool)
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:91
[3] assume
@ ~/.julia/packages/LLVM/b3kFs/src/interop/intrinsics.jl:16 [inlined]
[4] driver_version
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/version.jl:20 [inlined]
[5] isvalid
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/context.jl:71 [inlined]
[6] _pullback(ctx::Zygote.Context{false}, f::typeof(CUDA.isvalid), args::CuContext)
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[7] validate_task_local_state
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/state.jl:61 [inlined]
[8] _pullback(ctx::Zygote.Context{false}, f::typeof(CUDA.validate_task_local_state), args::CUDA.TaskLocalState)
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[9] task_local_state!
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/state.jl:72 [inlined]
[10] _pullback(::Zygote.Context{false}, ::typeof(CUDA.task_local_state!))
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[11] active_state
@ ~/.julia/packages/CUDA/1kIOw/lib/cudadrv/state.jl:110 [inlined]
[12] #cufunction#1171
@ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:373 [inlined]
[13] _pullback(::Zygote.Context{…}, ::CUDA.var"##cufunction#1171", ::@Kwargs{…}, ::typeof(cufunction), ::GPUArrays.var"#gpu_broadcast_kernel_linear#38", ::Type{…})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[14] cufunction
@ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:372 [inlined]
[15] _pullback(::Zygote.Context{…}, ::typeof(Core.kwcall), ::@NamedTuple{…}, ::typeof(cufunction), ::GPUArrays.var"#gpu_broadcast_kernel_linear#38", ::Type{…})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[16] #_#4
@ ~/.julia/packages/CUDA/1kIOw/src/compiler/execution.jl:112 [inlined]
[17] _pullback(::Zygote.Context{…}, ::CUDA.CUDAKernels.var"##_#4", ::Tuple{…}, ::Nothing, ::KernelAbstractions.Kernel{…}, ::CuArray{…}, ::Base.Broadcast.Broadcasted{…})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[18] _apply(::Function, ::Vararg{Any})
@ Core ./boot.jl:946
[19] adjoint
@ ~/.julia/packages/Zygote/TWpme/src/lib/lib.jl:202 [inlined]
[20] adjoint(::Zygote.Context{…}, ::typeof(Core._apply_iterate), ::typeof(iterate), ::Function, ::Tuple{…}, ::Tuple{…})
@ Zygote ./none:0
[21] _pullback
@ ~/.julia/packages/ZygoteRules/CkVIK/src/adjoint.jl:67 [inlined]
[22] Kernel
@ ~/.julia/packages/CUDA/1kIOw/src/CUDAKernels.jl:89 [inlined]
[23] _pullback(::Zygote.Context{…}, ::typeof(Core.kwcall), ::@NamedTuple{…}, ::KernelAbstractions.Kernel{…}, ::CuArray{…}, ::Base.Broadcast.Broadcasted{…})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[24] _copyto!
@ ~/.julia/packages/GPUArrays/Mot2g/src/host/broadcast.jl:71 [inlined]
[25] _pullback(::Zygote.Context{…}, ::typeof(GPUArrays._copyto!), ::CuArray{…}, ::Base.Broadcast.Broadcasted{…})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[26] materialize!
@ ~/.julia/packages/GPUArrays/Mot2g/src/host/broadcast.jl:38 [inlined]
[27] materialize!
@ ./broadcast.jl:880 [inlined]
[28] _pullback(::Zygote.Context{…}, ::typeof(Base.Broadcast.materialize!), ::CuArray{…}, ::Base.Broadcast.Broadcasted{…})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[29] materialize!
@ ./broadcast.jl:876 [inlined]
[30] #1460
@ ~/.julia/packages/Zygote/TWpme/src/lib/broadcast.jl:369 [inlined]
[31] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#1460#1463"{CuArray{Float32, 1, CUDA.DeviceMemory}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[32] #4231#back
@ ~/.julia/packages/ZygoteRules/CkVIK/src/adjoint.jl:72 [inlined]
[33] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#4231#back#1464"{Zygote.var"#1460#1463"{CuArray{…}}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[34] Pullback
@ ./REPL[3]:2 [inlined]
[35] _pullback(ctx::Zygote.Context{false}, f::Zygote.Pullback{Tuple{…}, Tuple{…}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[36] #78
@ ~/.julia/packages/Zygote/TWpme/src/compiler/interface.jl:91 [inlined]
[37] _pullback(ctx::Zygote.Context{false}, f::Zygote.var"#78#79"{Zygote.Pullback{Tuple{…}, Tuple{…}}}, args::Float32)
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[38] gradient
@ ~/.julia/packages/Zygote/TWpme/src/compiler/interface.jl:148 [inlined]
[39] _pullback(::Zygote.Context{…}, ::typeof(gradient), ::var"#1#4"{…}, ::CuArray{…})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[40] g
@ ./REPL[3]:2 [inlined]
[41] _pullback(ctx::Zygote.Context{false}, f::var"#g#3"{CuArray{…}}, args::CuArray{Float32, 2, CUDA.DeviceMemory})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[42] #2
@ ./REPL[3]:3 [inlined]
[43] _pullback(ctx::Zygote.Context{false}, f::var"#2#5"{var"#g#3"{CuArray{…}}}, args::CuArray{Float32, 2, CUDA.DeviceMemory})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface2.jl:0
[44] pullback(f::Function, cx::Zygote.Context{false}, args::CuArray{Float32, 2, CUDA.DeviceMemory})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface.jl:90
[45] pullback
@ ~/.julia/packages/Zygote/TWpme/src/compiler/interface.jl:88 [inlined]
[46] gradient(f::Function, args::CuArray{Float32, 2, CUDA.DeviceMemory})
@ Zygote ~/.julia/packages/Zygote/TWpme/src/compiler/interface.jl:147
[47] MWE(D::CuArray{Float32, 2, CUDA.DeviceMemory}, x::CuArray{Float32, 1, CUDA.DeviceMemory})
@ Main ./REPL[3]:3
[48] top-level scope
@ REPL[10]:1
Some type information was truncated. Use `show(err)` to see complete types.
I suppose a adjoint is missing for CuArray somewhere but I can't figure out where. |
Can you try again with Zygote >=0.7.1? Looking at the stacktrace, this error may have been addressed. That said, further testing may turn up other errors. Getting nested AD and GPU support to work in Zygote is quite challenging. |
It works, Yes! |
Hi,
I'm trying to implement a gradient penalty with Lux. It is fine on CPU but raise a "try/catch" error on GPU (CUDA). It is seems to be linked to the try catch here but I'm not able to figure out what could be the problem.
The text was updated successfully, but these errors were encountered: