add ConvolutionalLayerOp

schillic · schillic · commit 3c4933f80938 · 2024-05-25T17:40:38.000+02:00
diff --git a/Project.toml b/Project.toml
@@ -3,12 +3,14 @@ uuid = "02ac4b2c-022a-44aa-84a5-ea45a5754bcc"
 version = "0.2.2"
 
 [deps]
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 ReachabilityBase = "379f33d0-9447-4353-bd03-d664070e549f"
 Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [compat]
+LinearAlgebra = "<0.0.1, 1.6"
 ReachabilityBase = "0.1.1 - 0.2"
 Reexport = "0.2, 1"
 Requires = "0.5, 1"
diff --git a/docs/src/lib/Architecture.md b/docs/src/lib/Architecture.md
@@ -63,6 +63,7 @@ AbstractPoolingLayerOp
 
 ```@docs
 DenseLayerOp
+ConvolutionalLayerOp
 FlattenLayerOp
 MaxPoolingLayerOp
 MeanPoolingLayerOp
diff --git a/src/Architecture/Architecture.jl b/src/Architecture/Architecture.jl
@@ -6,17 +6,19 @@ Module containing data structures to represent controllers.
 module Architecture
 
 using Requires
+using LinearAlgebra: dot
 using Statistics: mean
 
 export AbstractNeuralNetwork, FeedforwardNetwork,
-       AbstractLayerOp, DenseLayerOp, FlattenLayerOp,
+       AbstractLayerOp, DenseLayerOp, ConvolutionalLayerOp, FlattenLayerOp,
        AbstractPoolingLayerOp, MaxPoolingLayerOp, MeanPoolingLayerOp,
        layers, dim_in, dim_out,
        ActivationFunction, Id, ReLU, Sigmoid, Tanh, LeakyReLU
 
 include("ActivationFunction.jl")
 include("LayerOps/AbstractLayerOp.jl")
 include("LayerOps/DenseLayerOp.jl")
+include("LayerOps/ConvolutionalLayerOp.jl")
 include("LayerOps/FlattenLayerOp.jl")
 include("LayerOps/PoolingLayerOp.jl")
 include("NeuralNetworks/AbstractNeuralNetwork.jl")
diff --git a/src/Architecture/LayerOps/ConvolutionalLayerOp.jl b/src/Architecture/LayerOps/ConvolutionalLayerOp.jl
@@ -0,0 +1,152 @@
+"""
+    ConvolutionalLayerOp{F, M, B} <: AbstractLayerOp
+
+A convolutional layer operation is a series of filters, each of which computes a
+small affine map followed by an activation function.
+
+### Fields
+
+- `weights`    -- vector with one weight matrix for each filter
+- `bias`       -- vector with one bias value for each filter
+- `activation` -- activation function
+
+### Notes
+
+Conversion from a `Flux.Conv` is supported.
+"""
+struct ConvolutionalLayerOp{F,W,B} <: AbstractLayerOp
+    weights::W
+    bias::B
+    activation::F
+
+    function ConvolutionalLayerOp(weights::W, bias::B, activation::F;
+                                  validate=Val(true)) where {F,W,B}
+        if validate isa Val{true} && !_isconsistent_ConvolutionalLayerOp(weights, bias)
+            throw(ArgumentError("inconsistent filter dimensions: weights " *
+                                "($(length(weights))) and biases ($(length(bias)))"))
+        end
+
+        return new{F,W,B}(weights, bias, activation)
+    end
+end
+
+function _isconsistent_ConvolutionalLayerOp(weights, bias)
+    if length(weights) != length(bias)
+        return false
+    elseif length(bias) == 0
+        return false
+    end
+    @inbounds begin
+        s = size(first(weights))
+        if length(s) != 3 || s[1] == 0 || s[2] == 0 || s[3] == 0
+            return false
+        end
+        for e in weights
+            if size(e) != s
+                return false
+            end
+        end
+    end
+    return true
+end
+
+n_filters(L::ConvolutionalLayerOp) = length(L.bias)
+
+kernel(L::ConvolutionalLayerOp) = @inbounds size(first(L.weights))
+
+# application to a tensor
+function (L::ConvolutionalLayerOp)(T)
+    s = size(T)
+    if length(s) != 3
+        throw(ArgumentError("a convolutional layer requires at least two dimensions, but got $s"))
+    end
+    p, q, r = kernel(L)
+    @inbounds begin
+        if p > s[1] || q > s[2] || r != s[3]
+            throw(ArgumentError("convolution with kernel size $(kernel(L)) " *
+                                "does not apply to a tensor of dimension $s"))
+        end
+        d1 = s[1] - p + 1
+        d2 = s[2] - q + 1
+    end
+    t = n_filters(L)
+    s = (d1, d2, t)
+    O = similar(T, s)
+    @inbounds for f in 1:t
+        W = L.weights[f]
+        b = L.bias[f]
+        for k in 1:r
+            for j in 1:d2
+                for i in 1:d1
+                    T′ = view(T, i:(i + p - 1), j:(j + q - 1), k)
+                    O[i, j, f] = L.activation(dot(W, T′) + b)
+                end
+            end
+        end
+    end
+    return O
+end
+
+function Base.:(==)(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp)
+    return L1.weights == L2.weights &&
+           L1.bias == L2.bias &&
+           L1.activation == L2.activation
+end
+
+function Base.:isapprox(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp; atol::Real=0,
+                        rtol=nothing)
+    if isnothing(rtol)
+        if iszero(atol)
+            N = @inbounds promote_type(eltype(first(L1.weights)), eltype(first(L2.weights)),
+                                       eltype(L1.bias), eltype(L2.bias))
+            rtol = Base.rtoldefault(N)
+        else
+            rtol = zero(atol)
+        end
+    end
+    return isapprox(L1.weights, L2.weights; atol=atol, rtol=rtol) &&
+           isapprox(L1.bias, L2.bias; atol=atol, rtol=rtol) &&
+           L1.activation == L2.activation
+end
+
+function Base.show(io::IO, L::ConvolutionalLayerOp)
+    str = "$(string(ConvolutionalLayerOp)) of $(n_filters(L)) filters with " *
+          "kernel size $(kernel(L)) and $(typeof(L.activation)) activation"
+    return print(io, str)
+end
+
+function load_Flux_convert_Conv_layer()
+    return quote
+        function Base.convert(::Type{ConvolutionalLayerOp}, layer::Flux.Conv)
+            if !all(isone, layer.stride)
+                throw(ArgumentError("stride $(layer.stride) != 1 is not supported"))  # COV_EXCL_LINE
+            end
+            if !all(iszero, layer.pad)
+                throw(ArgumentError("pad $(layer.pad) != 0 is not supported"))  # COV_EXCL_LINE
+            end
+            if !all(isone, layer.dilation)
+                throw(ArgumentError("dilation $(layer.dilation) != 1 is not supported"))  # COV_EXCL_LINE
+            end
+            if !all(isone, layer.groups)
+                throw(ArgumentError("groups $(layer.groups) != 1 is not supported"))  # COV_EXCL_LINE
+            end
+            act = get(activations_Flux, layer.σ, nothing)
+            if isnothing(act)
+                throw(ArgumentError("unsupported activation function $(layer.σ)"))  # COV_EXCL_LINE
+            end
+            # Flux stores a 4D matrix instead of a vector of 3D matrices
+            weights = @inbounds [layer.weight[:, :, :, i] for i in 1:size(layer.weight, 4)]
+            return ConvolutionalLayerOp(weights, layer.bias, act)
+        end
+
+        function Base.convert(::Type{Flux.Conv}, layer::ConvolutionalLayerOp)
+            act = get(activations_Flux, layer.activation, nothing)
+            if isnothing(act)
+                throw(ArgumentError("unsupported activation function $(layer.activation)"))  # COV_EXCL_LINE
+            end
+            # Flux stores a 4D matrix instead of a vector of 3D matrices
+            weights = cat(layer.weights...; dims=4)
+            return Flux.Conv(weights, layer.bias, act)
+        end
+    end
+end
diff --git a/src/Architecture/init.jl b/src/Architecture/init.jl
@@ -3,6 +3,7 @@ function __init__()
     @require Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" begin
         eval(load_Flux_activations())
         eval(load_Flux_convert_Dense_layer())
+        eval(load_Flux_convert_Conv_layer())
         eval(load_Flux_convert_network())
     end
 end
diff --git a/test/Architecture/ConvolutionalLayerOp.jl b/test/Architecture/ConvolutionalLayerOp.jl
@@ -0,0 +1,96 @@
+using ControllerFormats.Architecture: kernel, n_filters
+using ReachabilityBase.Subtypes: subtypes
+
+# 4x4x1 input tensor
+T441 = reshape([0 4 2 1; -1 0 1 -2; 3 1 2 0; 0 1 4 1], (4, 4, 1))
+O_Id = reshape([2 7 -2; -1 4 0; 6 9 1], (3, 3, 1))
+# 2x2x3 input tensor
+T223 = reshape(1:12, (2, 2, 3))
+
+W1 = reshape([1 0; -1 2], (2, 2, 1))
+b1 = 1
+W2 = W1
+b2 = 2
+# 2x2 kernel and 1 filter
+Ws = [W1]
+bs = [b1]
+
+# invalid weight/bias combination
+@test_throws ArgumentError ConvolutionalLayerOp(Ws, [1, 0], Id())
+@test_throws ArgumentError ConvolutionalLayerOp([], [], Id())
+@test_throws ArgumentError ConvolutionalLayerOp([W1, hcat(1)], [1, 0], Id())
+@test_throws ArgumentError ConvolutionalLayerOp([[1 0; -1 2]], [1], Id())
+
+# one filter
+L = ConvolutionalLayerOp(Ws, bs, ReLU())
+# two filters
+L2 = ConvolutionalLayerOp([W1, W2], [b1, b2], ReLU())
+
+# printing
+io = IOBuffer()
+println(io, L)
+
+# output for tensors
+@test L(T441) == reshape([2 7 0; 0 4 0; 6 9 1], (3, 3, 1))
+@test L2(T441) == cat([2 7 0; 0 4 0; 6 9 1], [3 8 0; 0 5 1; 7 10 2]; dims=(3))
+@test_throws ArgumentError L(T223)
+@test_throws ArgumentError L(reshape(1:4.0, (2, 2)))
+
+# equality
+@test L == ConvolutionalLayerOp(Ws, bs, ReLU())
+@test L != ConvolutionalLayerOp([W1 .+ 1], bs, ReLU()) &&
+      L != ConvolutionalLayerOp(Ws, [b1 .+ 1], ReLU()) &&
+      L != ConvolutionalLayerOp(Ws, bs, Id())
+
+# approximate equality
+@test L ≈ ConvolutionalLayerOp(Ws, bs, ReLU())
+@test L ≈ ConvolutionalLayerOp([W1 .+ 1e-10], bs, ReLU()) &&
+      L ≈ ConvolutionalLayerOp(Ws, [b1 .+ 1e-10], ReLU()) &&
+      !≈(L, ConvolutionalLayerOp([W1 .+ 1e-10], bs, ReLU()); rtol=1e-12) &&
+      !≈(L, ConvolutionalLayerOp(Ws, [b1 .+ 1e-10], ReLU()); rtol=1e-12) &&
+      ≈(L, ConvolutionalLayerOp([W1 .+ 1e-1], bs, ReLU()); atol=1) &&
+      ≈(L, ConvolutionalLayerOp(Ws, [b1 .+ 1e-1], ReLU()); atol=1) &&
+      !(L ≈ ConvolutionalLayerOp([W1 .+ 1], bs, ReLU())) &&
+      !(L ≈ ConvolutionalLayerOp(Ws, [b1 .+ 1], ReLU())) &&
+      !(L ≈ ConvolutionalLayerOp(Ws, bs, Id()))
+
+# kernel size and number of filters
+@test kernel(L) == kernel(L2) == (2, 2, 1)
+@test n_filters(L) == 1 && n_filters(L2) == 2
+
+# test methods for all activations
+function test_layer(L::ConvolutionalLayerOp{Id})
+    @test L(T441) == O_Id
+end
+
+function test_layer(L::ConvolutionalLayerOp{ReLU})
+    @test L(T441) == reshape([2 7 0; 0 4 0; 6 9 1], (3, 3, 1))
+end
+
+function test_layer(L::ConvolutionalLayerOp{Sigmoid})
+    @test L(float(T441)) ≈ Sigmoid().(O_Id) atol = 1e-3
+end
+
+function test_layer(L::ConvolutionalLayerOp{Tanh})
+    @test L(float(T441)) ≈ Tanh().(O_Id) atol = 1e-3
+end
+
+function test_layer(L::ConvolutionalLayerOp{<:LeakyReLU})
+    @test L(T441) == O_Id
+end
+
+function test_layer(L::ConvolutionalLayerOp)
+    return error("untested activation function: ", typeof(L.activation))
+end
+
+# run test with all activations
+for act in subtypes(ActivationFunction)
+    if act == TestActivation
+        continue
+    elseif act == LeakyReLU
+        act_inst = LeakyReLU(1)
+    else
+        act_inst = act()
+    end
+    test_layer(ConvolutionalLayerOp(Ws, bs, act_inst))
+end
diff --git a/test/Architecture/Flux.jl b/test/Architecture/Flux.jl
@@ -1,5 +1,9 @@
 import Flux
 
+################
+# Dense layers #
+################
+
 L1 = Flux.Dense(1, 2, Flux.relu)
 L1.weight .= 1, 2
 L1.bias .= 3, 4
@@ -48,3 +52,19 @@ W = hcat([1 0.5; -0.5 0.5; -1 -0.5])
 b = [1.0, 0, -2]
 L = DenseLayerOp(W, b, TestActivation())
 @test_throws ArgumentError convert(Flux.Dense, L)
+
+########################
+# Convolutional layers #
+########################
+
+LC = Flux.Conv((2, 2), 1 => 1, Flux.relu)
+LC.weight .= reshape([1 0; -1 2], (2, 2, 1, 1))
+LC.bias .= 1
+
+# layer conversion
+op = convert(ConvolutionalLayerOp, LC)
+@test op.weights[1] == LC.weight[:, :, :]
+@test op.bias == LC.bias
+@test op.activation == ReLU()
+L_back = convert(Flux.Conv, op)
+@test compare_Flux_layer(LC, L_back)
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -16,6 +16,9 @@ struct TestActivation <: ActivationFunction end
     @testset "DenseLayerOp" begin
         include("Architecture/DenseLayerOp.jl")
     end
+    @testset "ConvolutionalLayerOp" begin
+        include("Architecture/ConvolutionalLayerOp.jl")
+    end
     @testset "FlattenLayerOp" begin
         include("Architecture/FlattenLayerOp.jl")
     end