Skip to content

Add more layer types #45

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,16 @@ uuid = "02ac4b2c-022a-44aa-84a5-ea45a5754bcc"
version = "0.2.2"

[deps]
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ReachabilityBase = "379f33d0-9447-4353-bd03-d664070e549f"
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"

[compat]
LinearAlgebra = "<0.0.1, 1.6"
ReachabilityBase = "0.1.1 - 0.2"
Reexport = "0.2, 1"
Requires = "0.5, 1"
Statistics = "<0.0.1, 1.6"
julia = "1.6"
10 changes: 10 additions & 0 deletions docs/src/lib/Architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,20 @@ dim_out(::AbstractLayerOp)
ControllerFormats.Architecture.dim(::AbstractLayerOp)
```

#### More specific layer interfaces

```@docs
AbstractPoolingLayerOp
```

#### Implementation

```@docs
DenseLayerOp
ConvolutionalLayerOp
FlattenLayerOp
MaxPoolingLayerOp
MeanPoolingLayerOp
```

### Activation functions
Expand Down
9 changes: 8 additions & 1 deletion src/Architecture/Architecture.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,22 @@ Module containing data structures to represent controllers.
module Architecture

using Requires
using LinearAlgebra: dot
using Statistics: mean

import Base: size
export AbstractNeuralNetwork, FeedforwardNetwork,
AbstractLayerOp, DenseLayerOp,
AbstractLayerOp, DenseLayerOp, ConvolutionalLayerOp, FlattenLayerOp,
AbstractPoolingLayerOp, MaxPoolingLayerOp, MeanPoolingLayerOp,
layers, dim_in, dim_out,
ActivationFunction, Id, ReLU, Sigmoid, Tanh, LeakyReLU

include("ActivationFunction.jl")
include("LayerOps/AbstractLayerOp.jl")
include("LayerOps/DenseLayerOp.jl")
include("LayerOps/ConvolutionalLayerOp.jl")
include("LayerOps/FlattenLayerOp.jl")
include("LayerOps/PoolingLayerOp.jl")
include("NeuralNetworks/AbstractNeuralNetwork.jl")
include("NeuralNetworks/FeedforwardNetwork.jl")

Expand Down
154 changes: 154 additions & 0 deletions src/Architecture/LayerOps/ConvolutionalLayerOp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
"""
ConvolutionalLayerOp{F, M, B} <: AbstractLayerOp

A convolutional layer operation is a series of filters, each of which computes a
small affine map followed by an activation function.

### Fields

- `weights` -- vector with one weight matrix for each filter
- `bias` -- vector with one bias value for each filter
- `activation` -- activation function

### Notes

Conversion from a `Flux.Conv` is supported.
"""
struct ConvolutionalLayerOp{F,W,B} <: AbstractLayerOp
weights::W
bias::B
activation::F

function ConvolutionalLayerOp(weights::W, bias::B, activation::F;
validate=Val(true)) where {F,W,B}
if validate isa Val{true} && !_isconsistent_ConvolutionalLayerOp(weights, bias)
throw(ArgumentError("inconsistent filter dimensions: weights " *
"($(length(weights))) and biases ($(length(bias)))"))
end

return new{F,W,B}(weights, bias, activation)
end
end

function _isconsistent_ConvolutionalLayerOp(weights, bias)
if length(weights) != length(bias)
return false
elseif length(bias) == 0
return false
end
@inbounds begin
s = size(first(weights))
if length(s) != 3 || s[1] == 0 || s[2] == 0 || s[3] == 0
return false
end
for e in weights
if size(e) != s
return false
end
end
end
return true
end

n_filters(L::ConvolutionalLayerOp) = length(L.bias)

kernel(L::ConvolutionalLayerOp) = @inbounds size(first(L.weights))

# application to a tensor
function (L::ConvolutionalLayerOp)(T)
s = size(T)
if length(s) != 3
throw(ArgumentError("a convolutional layer requires at least two dimensions, but got $s"))
end
p, q, r = kernel(L)
@inbounds begin
if p > s[1] || q > s[2] || r != s[3]
throw(ArgumentError("convolution with kernel size $(kernel(L)) " *
"does not apply to a tensor of dimension $s"))
end
d1 = s[1] - p + 1
d2 = s[2] - q + 1
end
t = n_filters(L)
s = (d1, d2, t)
O = similar(T, s)
@inbounds for f in 1:t
W = L.weights[f]
b = L.bias[f]
for k in 1:r
for j in 1:d2
for i in 1:d1
T′ = view(T, i:(i + p - 1), j:(j + q - 1), k)
O[i, j, f] = L.activation(dot(W, T′) + b)
end
end
end
end
return O
end

function Base.:(==)(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp)
return L1.weights == L2.weights &&
L1.bias == L2.bias &&
L1.activation == L2.activation
end

function Base.:isapprox(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp; atol::Real=0,
rtol=nothing)
if isnothing(rtol)
if iszero(atol)
N = @inbounds promote_type(eltype(first(L1.weights)), eltype(first(L2.weights)),
eltype(L1.bias), eltype(L2.bias))
rtol = Base.rtoldefault(N)
else
rtol = zero(atol)
end
end
return isapprox(L1.weights, L2.weights; atol=atol, rtol=rtol) &&
isapprox(L1.bias, L2.bias; atol=atol, rtol=rtol) &&
L1.activation == L2.activation
end

function Base.show(io::IO, L::ConvolutionalLayerOp)
str = "$(string(ConvolutionalLayerOp)) of $(n_filters(L)) filters with " *
"kernel size $(kernel(L)) and $(typeof(L.activation)) activation"
return print(io, str)
end

size(::ConvolutionalLayerOp) = (3, 3)

function load_Flux_convert_Conv_layer()
return quote
function Base.convert(::Type{ConvolutionalLayerOp}, layer::Flux.Conv)
if !all(isone, layer.stride)
throw(ArgumentError("stride $(layer.stride) != 1 is not supported")) # COV_EXCL_LINE
end
if !all(iszero, layer.pad)
throw(ArgumentError("pad $(layer.pad) != 0 is not supported")) # COV_EXCL_LINE
end
if !all(isone, layer.dilation)
throw(ArgumentError("dilation $(layer.dilation) != 1 is not supported")) # COV_EXCL_LINE
end
if !all(isone, layer.groups)
throw(ArgumentError("groups $(layer.groups) != 1 is not supported")) # COV_EXCL_LINE
end
act = get(activations_Flux, layer.σ, nothing)
if isnothing(act)
throw(ArgumentError("unsupported activation function $(layer.σ)")) # COV_EXCL_LINE
end
# Flux stores a 4D matrix instead of a vector of 3D matrices
weights = @inbounds [layer.weight[:, :, :, i] for i in 1:size(layer.weight, 4)]
return ConvolutionalLayerOp(weights, layer.bias, act)
end

function Base.convert(::Type{Flux.Conv}, layer::ConvolutionalLayerOp)
act = get(activations_Flux, layer.activation, nothing)
if isnothing(act)
throw(ArgumentError("unsupported activation function $(layer.activation)")) # COV_EXCL_LINE
end
# Flux stores a 4D matrix instead of a vector of 3D matrices
weights = cat(layer.weights...; dims=4)
return Flux.Conv(weights, layer.bias, act)
end
end
end
2 changes: 2 additions & 0 deletions src/Architecture/LayerOps/DenseLayerOp.jl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ dim_in(L::DenseLayerOp) = size(L.weights, 2)

dim_out(L::DenseLayerOp) = length(L.bias)

size(::DenseLayerOp) = (1, 1)

function load_Flux_convert_Dense_layer()
return quote
function Base.convert(::Type{DenseLayerOp}, layer::Flux.Dense)
Expand Down
46 changes: 46 additions & 0 deletions src/Architecture/LayerOps/FlattenLayerOp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
"""
FlattenLayerOp <: AbstractLayerOp

A flattening layer operation converts a multidimensional tensor into a vector.

### Notes

The implementation uses row-major ordering for convenience with the
machine-learning literature.

```@jldoctest
julia> T = reshape([1, 3, 2, 4, 5, 7, 6, 8], (2, 2, 2))
2×2×2 Array{Int64, 3}:
[:, :, 1] =
1 2
3 4

[:, :, 2] =
5 6
7 8

julia> FlattenLayerOp()(T)
8-element Vector{Int64}:
1
2
3
4
5
6
7
8
```
"""
struct FlattenLayerOp <: AbstractLayerOp
end

# application to a vector (swap to row-major convention)
function (L::FlattenLayerOp)(T)
s = size(T)
if length(s) == 1
return vec(T)
end
return vec(permutedims(T, (2, 1, 3:length(s)...)))
end

size(::FlattenLayerOp) = (nothing, 1)
94 changes: 94 additions & 0 deletions src/Architecture/LayerOps/PoolingLayerOp.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
"""
AbstractPoolingLayerOp <: AbstractLayerOp

Abstract type for pooling layer operations.

### Notes

Pooling is an operation on a three-dimensional tensor that iterates over the
first two dimensions in a window and aggregates the values, thus reducing the
output dimension.

### Implementation

The following (unexported) functions should be implemented:

- `window(::AbstractPoolingLayerOp)` -- return the pair ``(p, q)`` representing the window size
- `aggregation(::AbstractPoolingLayerOp)` -- return the aggregation function (applied to a tensor)
"""
abstract type AbstractPoolingLayerOp <: AbstractLayerOp end

for (type_name, normal_name, agg_function, agg_name) in
((:MaxPoolingLayerOp, "max", maximum, "maximum"),
(:MeanPoolingLayerOp, "mean", mean, "Statistics.mean"))
@eval begin
@doc """
$($type_name) <: AbstractPoolingLayerOp

A $($normal_name)-pooling layer operation. The aggregation function is
`$($agg_name)`.

### Fields

- `p` -- horizontal window size
- `q` -- vertical window size
"""
struct $type_name <: AbstractPoolingLayerOp
p::Int
q::Int

function $type_name(p::Int, q::Int; validate=Val(true))
if validate isa Val{true} && (p <= 0 || q <= 0)
throw(ArgumentError("inconsistent window size ($p, $q)"))
end
return new(p, q)
end
end

window(L::$type_name) = (L.p, L.q)

aggregation(::$type_name) = $agg_function

function Base.:(==)(L1::$type_name, L2::$type_name)
return window(L1) == window(L2)
end

function Base.show(io::IO, L::$type_name)
str = "$(string($type_name)) for $($normal_name)-pooling of window " *
"size $(window(L))"
return print(io, str)
end
end
end

# application to a tensor
function (L::AbstractPoolingLayerOp)(T)
s = size(T)
if length(s) != 3
throw(ArgumentError("a pooling layer requires a three-dimensional input, but got $s"))
end
p, q = window(L)
@inbounds begin
if mod(s[1], p) != 0 || mod(s[2], q) != 0
throw(ArgumentError("pooling with window size ($p, $q) does " *
"not apply to a tensor of dimension $s"))
end
d1 = div(s[1], p)
d2 = div(s[2], q)
d3 = s[3]
end
s = (d1, d2, d3)
O = similar(T, s)
aggregate = aggregation(L)
@inbounds for k in 1:d3
for j in 1:d2
for i in 1:d1
cluster = view(T, ((i - 1) * p + 1):(i * p), ((j - 1) * q + 1):(j * q), k)
O[i, j, k] = aggregate(cluster)
end
end
end
return O
end

size(::AbstractPoolingLayerOp) = (3, 3)
8 changes: 7 additions & 1 deletion src/Architecture/NeuralNetworks/FeedforwardNetwork.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,20 @@ end
function _first_inconsistent_layer(L)
prev = nothing
for (i, l) in enumerate(L)
if !isnothing(prev) && dim_in(l) != dim_out(prev)
if !isnothing(prev) &&
((!isnothing(dim_in(l)) && !isnothing(dim_out(prev)) && dim_in(l) != dim_out(prev)) ||
!_iscompatible(size(prev), size(l)))
return i
end
prev = l
end
return 0
end

_iscompatible(t1::Tuple, t2::Tuple) = _iscompatible(t1[2], t2[1])
_iscompatible(i::Int, j::Int) = i == j
_iscompatible(i, ::Nothing) = true

layers(N::FeedforwardNetwork) = N.layers

function load_Flux_convert_network()
Expand Down
1 change: 1 addition & 0 deletions src/Architecture/init.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ function __init__()
@require Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" begin
eval(load_Flux_activations())
eval(load_Flux_convert_Dense_layer())
eval(load_Flux_convert_Conv_layer())
eval(load_Flux_convert_network())
end
end
Loading