|
| 1 | +module ChainRulesTest |
| 2 | + |
| 3 | +using LinearAlgebra |
| 4 | +using ChainRulesCore |
| 5 | +using DiffResults |
| 6 | +using ReverseDiff |
| 7 | +using Test |
| 8 | + |
| 9 | +f(x) = sum(4x .+ 1) |
| 10 | + |
| 11 | +function ChainRulesCore.rrule(::typeof(f), x) |
| 12 | + r = f(x) |
| 13 | + function back(d) |
| 14 | + #= |
| 15 | + The proper derivative of `f` is 4, but in order to |
| 16 | + check if `ChainRulesCore.rrule` had taken over the compuation, |
| 17 | + we define a rrule that returns 3 as `f`'s derivative. |
| 18 | +
|
| 19 | + After importing this rrule into ReverseDiff, if we get 3 |
| 20 | + rather than 4 when we compute the derivative of `f`, it means |
| 21 | + the importing mechanism works. |
| 22 | + =# |
| 23 | + return ChainRulesCore.NoTangent(), fill(3 * d, size(x)) |
| 24 | + end |
| 25 | + return r, back |
| 26 | +end |
| 27 | + |
| 28 | +ReverseDiff.@grad_from_chainrules f(x::ReverseDiff.TrackedArray) |
| 29 | + |
| 30 | + |
| 31 | +g(x, y) = sum(4x .+ 4y) |
| 32 | + |
| 33 | +function ChainRulesCore.rrule(::typeof(g), x, y) |
| 34 | + r = g(x, y) |
| 35 | + function back(d) |
| 36 | + # same as above, use 3 and 5 as the derivatives |
| 37 | + return ChainRulesCore.NoTangent(), fill(3 * d, size(x)), fill(5 * d, size(x)) |
| 38 | + end |
| 39 | + return r, back |
| 40 | +end |
| 41 | + |
| 42 | +ReverseDiff.@grad_from_chainrules g(x::ReverseDiff.TrackedArray, y) |
| 43 | +ReverseDiff.@grad_from_chainrules g(x, y::ReverseDiff.TrackedArray) |
| 44 | +ReverseDiff.@grad_from_chainrules g(x::ReverseDiff.TrackedArray, y::ReverseDiff.TrackedArray) |
| 45 | + |
| 46 | +@testset "rrule in ChainRules and ReverseDiff" begin |
| 47 | + ## ChainRules |
| 48 | + # function f |
| 49 | + input = rand(3, 3) |
| 50 | + output, back = ChainRulesCore.rrule(f, input); |
| 51 | + _, d = back(1) |
| 52 | + @test output == f(input) |
| 53 | + @test d == fill(3, size(input)) |
| 54 | + # function g |
| 55 | + inputs = rand(3, 3), rand(3, 3) |
| 56 | + output, back = ChainRulesCore.rrule(g, inputs...); |
| 57 | + _, d1, d2 = back(1) |
| 58 | + @test output == g(inputs...) |
| 59 | + @test d1 == fill(3, size(inputs[1])) |
| 60 | + @test d2 == fill(5, size(inputs[2])) |
| 61 | + |
| 62 | + |
| 63 | + ## ReverseDiff |
| 64 | + #function f |
| 65 | + inputs = (rand(3, 3), ) |
| 66 | + |
| 67 | + results = (similar(inputs[1]),) |
| 68 | + f_tape = ReverseDiff.GradientTape(x -> f(x) + 2, (rand(3, 3),)) |
| 69 | + ReverseDiff.gradient!(results, f_tape, inputs) |
| 70 | + |
| 71 | + @test results[1] == fill(3, size(inputs[1])) |
| 72 | + |
| 73 | + results = (similar(inputs[1]),) |
| 74 | + compiled_tape = ReverseDiff.CompiledTape(f_tape) |
| 75 | + ReverseDiff.gradient!(results, compiled_tape, inputs) |
| 76 | + @test results[1] == fill(3, size(inputs[1])) |
| 77 | + |
| 78 | + # function g |
| 79 | + inputs = rand(3, 3), rand(3, 3) |
| 80 | + |
| 81 | + results = (similar(inputs[1]), similar(inputs[2])) |
| 82 | + f_tape = ReverseDiff.GradientTape((x, y) -> g(x, y) + 2, (rand(3, 3), rand(3, 3))) |
| 83 | + ReverseDiff.gradient!(results, f_tape, inputs) |
| 84 | + |
| 85 | + @test results[1] == fill(3, size(inputs[1])) |
| 86 | + @test results[2] == fill(5, size(inputs[2])) |
| 87 | + |
| 88 | + results = (similar(inputs[1]), similar(inputs[2]),) |
| 89 | + compiled_tape = ReverseDiff.CompiledTape(f_tape) |
| 90 | + ReverseDiff.gradient!(results, compiled_tape, inputs) |
| 91 | + @test results[1] == fill(3, size(inputs[1])) |
| 92 | + @test results[2] == fill(5, size(inputs[2])) |
| 93 | + |
| 94 | +end |
| 95 | + |
| 96 | +### Tape test |
| 97 | +@testset "Tape test: Ensure ordinary call is not tracked" begin |
| 98 | + tp = ReverseDiff.InstructionTape() |
| 99 | + |
| 100 | + f(x) = sum(2x .+ g([1, 2], [3, 4])) |
| 101 | + x = rand(3, 3) |
| 102 | + xt = ReverseDiff.track(copy(x), tp) |
| 103 | + # record |
| 104 | + yt = f(xt) |
| 105 | + @test length(tp) == 3 # sum, broadcast+, broadcast*, but not `g` |
| 106 | +end |
| 107 | + |
| 108 | +### Functions with varargs and kwargs |
| 109 | +# Varargs |
| 110 | +f_vararg(x, args...) = sum(4x .+ sum(args)) |
| 111 | + |
| 112 | +function ChainRulesCore.rrule(::typeof(f_vararg), x, args...) |
| 113 | + r = f_vararg(x, args...) |
| 114 | + function back(d) |
| 115 | + return ChainRulesCore.NoTangent(), fill(3 * d, size(x)) |
| 116 | + end |
| 117 | + return r, back |
| 118 | +end |
| 119 | + |
| 120 | +ReverseDiff.@grad_from_chainrules f_vararg(x::ReverseDiff.TrackedArray, args...) |
| 121 | + |
| 122 | +@testset "Function with Varargs" begin |
| 123 | + inputs = (rand(3, 3), ) |
| 124 | + |
| 125 | + results = (similar(inputs[1]),) |
| 126 | + f_tape = ReverseDiff.GradientTape(x -> f_vararg(x, 1, 2, 3) + 2, (rand(3, 3),)) |
| 127 | + ReverseDiff.gradient!(results, f_tape, inputs) |
| 128 | + |
| 129 | + @test results[1] == fill(3, size(inputs[1])) |
| 130 | +end |
| 131 | + |
| 132 | + |
| 133 | +# Vargs and kwargs |
| 134 | +f_kw(x, args...; k=1, kwargs...) = sum(4x .+ sum(args) .+ (k + kwargs[:j])) |
| 135 | + |
| 136 | +function ChainRulesCore.rrule(::typeof(f_kw), x, args...; k=1, kwargs...) |
| 137 | + r = f_kw(x, args...; k=k, kwargs...) |
| 138 | + function back(d) |
| 139 | + return ChainRulesCore.NoTangent(), fill(3 * d, size(x)) |
| 140 | + end |
| 141 | + return r, back |
| 142 | +end |
| 143 | + |
| 144 | +ReverseDiff.@grad_from_chainrules f_kw(x::ReverseDiff.TrackedArray, args...; k=1, kwargs...) |
| 145 | + |
| 146 | +@testset "Function with Varargs and kwargs" begin |
| 147 | + inputs = (rand(3, 3), ) |
| 148 | + |
| 149 | + results = (similar(inputs[1]),) |
| 150 | + f_tape = ReverseDiff.GradientTape(x -> f_kw(x, 1, 2, 3; k=2, j=3) + 2, (rand(3, 3),)) |
| 151 | + ReverseDiff.gradient!(results, f_tape, inputs) |
| 152 | + |
| 153 | + @test results[1] == fill(3, size(inputs[1])) |
| 154 | +end |
| 155 | + |
| 156 | +### Mix @grad and @grad_from_chainrules |
| 157 | + |
| 158 | +h(x) = 10x |
| 159 | +h(x::ReverseDiff.TrackedArray) = ReverseDiff.track(h, x) |
| 160 | +ReverseDiff.@grad function h(x) |
| 161 | + xv = ReverseDiff.value(x) |
| 162 | + return h(xv), Δ -> (Δ * 7,) # use 7 asits derivatives |
| 163 | +end |
| 164 | + |
| 165 | +@testset "ReverseDiff and ChainRules Mixed" begin |
| 166 | + t(x) = g(x, h(x)) |
| 167 | + inputs = (rand(3, 3), ) |
| 168 | + results = (similar(inputs[1]),) |
| 169 | + |
| 170 | + g_tape = ReverseDiff.GradientTape(t, (rand(3, 3),)) |
| 171 | + ReverseDiff.gradient!(results, g_tape, inputs) |
| 172 | + @test results[1] == fill(38, size(inputs[1])) # 38 = 3 + 5 * 7 |
| 173 | +end |
| 174 | + |
| 175 | +### Isolated Scope |
| 176 | +module IsolatedModuleForTestingScoping |
| 177 | +using ChainRulesCore |
| 178 | +using ReverseDiff: @grad_from_chainrules |
| 179 | + |
| 180 | +f(x) = sum(4x .+ 1) |
| 181 | + |
| 182 | +function ChainRulesCore.rrule(::typeof(f), x) |
| 183 | + r = f(x) |
| 184 | + function back(d) |
| 185 | + # return a distinguishable but improper grad |
| 186 | + return ChainRulesCore.NoTangent(), fill(3 * d, size(x)) |
| 187 | + end |
| 188 | + return r, back |
| 189 | +end |
| 190 | + |
| 191 | +@grad_from_chainrules f(x::TrackedArray) |
| 192 | + |
| 193 | +module SubModule |
| 194 | +using Test |
| 195 | +using ReverseDiff: TrackedArray, GradientTape, gradient! |
| 196 | +using ..IsolatedModuleForTestingScoping: f |
| 197 | +@testset "rrule in Isolated Scope" begin |
| 198 | + inputs = (rand(3, 3), ) |
| 199 | + |
| 200 | + results = (similar(inputs[1]),) |
| 201 | + f_tape = GradientTape(x -> f(x) + 2, (rand(3, 3),)) |
| 202 | + gradient!(results, f_tape, inputs) |
| 203 | + |
| 204 | + @test results[1] == fill(3, size(inputs[1])) |
| 205 | +end |
| 206 | + |
| 207 | +end # end of SubModule |
| 208 | +end # end of IsolatedModuleForTestingScoping |
| 209 | + |
| 210 | +end |
0 commit comments