Skip to content

Commit 8c0a614

Browse files
authored
Merge pull request #45 from JuliaReach/schillic/layers
Add more layer types
2 parents c2f6420 + 216a8ac commit 8c0a614

17 files changed

+525
-2
lines changed

Project.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@ uuid = "02ac4b2c-022a-44aa-84a5-ea45a5754bcc"
33
version = "0.2.2"
44

55
[deps]
6+
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
67
ReachabilityBase = "379f33d0-9447-4353-bd03-d664070e549f"
78
Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
89
Requires = "ae029012-a4dd-5104-9daa-d747884805df"
10+
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
911

1012
[compat]
13+
LinearAlgebra = "<0.0.1, 1.6"
1114
ReachabilityBase = "0.1.1 - 0.2"
1215
Reexport = "0.2, 1"
1316
Requires = "0.5, 1"
17+
Statistics = "<0.0.1, 1.6"
1418
julia = "1.6"

docs/src/lib/Architecture.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,20 @@ dim_out(::AbstractLayerOp)
5353
ControllerFormats.Architecture.dim(::AbstractLayerOp)
5454
```
5555

56+
#### More specific layer interfaces
57+
58+
```@docs
59+
AbstractPoolingLayerOp
60+
```
61+
5662
#### Implementation
5763

5864
```@docs
5965
DenseLayerOp
66+
ConvolutionalLayerOp
67+
FlattenLayerOp
68+
MaxPoolingLayerOp
69+
MeanPoolingLayerOp
6070
```
6171

6272
### Activation functions

src/Architecture/Architecture.jl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,22 @@ Module containing data structures to represent controllers.
66
module Architecture
77

88
using Requires
9+
using LinearAlgebra: dot
10+
using Statistics: mean
911

12+
import Base: size
1013
export AbstractNeuralNetwork, FeedforwardNetwork,
11-
AbstractLayerOp, DenseLayerOp,
14+
AbstractLayerOp, DenseLayerOp, ConvolutionalLayerOp, FlattenLayerOp,
15+
AbstractPoolingLayerOp, MaxPoolingLayerOp, MeanPoolingLayerOp,
1216
layers, dim_in, dim_out,
1317
ActivationFunction, Id, ReLU, Sigmoid, Tanh, LeakyReLU
1418

1519
include("ActivationFunction.jl")
1620
include("LayerOps/AbstractLayerOp.jl")
1721
include("LayerOps/DenseLayerOp.jl")
22+
include("LayerOps/ConvolutionalLayerOp.jl")
23+
include("LayerOps/FlattenLayerOp.jl")
24+
include("LayerOps/PoolingLayerOp.jl")
1825
include("NeuralNetworks/AbstractNeuralNetwork.jl")
1926
include("NeuralNetworks/FeedforwardNetwork.jl")
2027

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
"""
2+
ConvolutionalLayerOp{F, M, B} <: AbstractLayerOp
3+
4+
A convolutional layer operation is a series of filters, each of which computes a
5+
small affine map followed by an activation function.
6+
7+
### Fields
8+
9+
- `weights` -- vector with one weight matrix for each filter
10+
- `bias` -- vector with one bias value for each filter
11+
- `activation` -- activation function
12+
13+
### Notes
14+
15+
Conversion from a `Flux.Conv` is supported.
16+
"""
17+
struct ConvolutionalLayerOp{F,W,B} <: AbstractLayerOp
18+
weights::W
19+
bias::B
20+
activation::F
21+
22+
function ConvolutionalLayerOp(weights::W, bias::B, activation::F;
23+
validate=Val(true)) where {F,W,B}
24+
if validate isa Val{true} && !_isconsistent_ConvolutionalLayerOp(weights, bias)
25+
throw(ArgumentError("inconsistent filter dimensions: weights " *
26+
"($(length(weights))) and biases ($(length(bias)))"))
27+
end
28+
29+
return new{F,W,B}(weights, bias, activation)
30+
end
31+
end
32+
33+
function _isconsistent_ConvolutionalLayerOp(weights, bias)
34+
if length(weights) != length(bias)
35+
return false
36+
elseif length(bias) == 0
37+
return false
38+
end
39+
@inbounds begin
40+
s = size(first(weights))
41+
if length(s) != 3 || s[1] == 0 || s[2] == 0 || s[3] == 0
42+
return false
43+
end
44+
for e in weights
45+
if size(e) != s
46+
return false
47+
end
48+
end
49+
end
50+
return true
51+
end
52+
53+
n_filters(L::ConvolutionalLayerOp) = length(L.bias)
54+
55+
kernel(L::ConvolutionalLayerOp) = @inbounds size(first(L.weights))
56+
57+
# application to a tensor
58+
function (L::ConvolutionalLayerOp)(T)
59+
s = size(T)
60+
if length(s) != 3
61+
throw(ArgumentError("a convolutional layer requires at least two dimensions, but got $s"))
62+
end
63+
p, q, r = kernel(L)
64+
@inbounds begin
65+
if p > s[1] || q > s[2] || r != s[3]
66+
throw(ArgumentError("convolution with kernel size $(kernel(L)) " *
67+
"does not apply to a tensor of dimension $s"))
68+
end
69+
d1 = s[1] - p + 1
70+
d2 = s[2] - q + 1
71+
end
72+
t = n_filters(L)
73+
s = (d1, d2, t)
74+
O = similar(T, s)
75+
@inbounds for f in 1:t
76+
W = L.weights[f]
77+
b = L.bias[f]
78+
for k in 1:r
79+
for j in 1:d2
80+
for i in 1:d1
81+
T′ = view(T, i:(i + p - 1), j:(j + q - 1), k)
82+
O[i, j, f] = L.activation(dot(W, T′) + b)
83+
end
84+
end
85+
end
86+
end
87+
return O
88+
end
89+
90+
function Base.:(==)(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp)
91+
return L1.weights == L2.weights &&
92+
L1.bias == L2.bias &&
93+
L1.activation == L2.activation
94+
end
95+
96+
function Base.:isapprox(L1::ConvolutionalLayerOp, L2::ConvolutionalLayerOp; atol::Real=0,
97+
rtol=nothing)
98+
if isnothing(rtol)
99+
if iszero(atol)
100+
N = @inbounds promote_type(eltype(first(L1.weights)), eltype(first(L2.weights)),
101+
eltype(L1.bias), eltype(L2.bias))
102+
rtol = Base.rtoldefault(N)
103+
else
104+
rtol = zero(atol)
105+
end
106+
end
107+
return isapprox(L1.weights, L2.weights; atol=atol, rtol=rtol) &&
108+
isapprox(L1.bias, L2.bias; atol=atol, rtol=rtol) &&
109+
L1.activation == L2.activation
110+
end
111+
112+
function Base.show(io::IO, L::ConvolutionalLayerOp)
113+
str = "$(string(ConvolutionalLayerOp)) of $(n_filters(L)) filters with " *
114+
"kernel size $(kernel(L)) and $(typeof(L.activation)) activation"
115+
return print(io, str)
116+
end
117+
118+
size(::ConvolutionalLayerOp) = (3, 3)
119+
120+
function load_Flux_convert_Conv_layer()
121+
return quote
122+
function Base.convert(::Type{ConvolutionalLayerOp}, layer::Flux.Conv)
123+
if !all(isone, layer.stride)
124+
throw(ArgumentError("stride $(layer.stride) != 1 is not supported")) # COV_EXCL_LINE
125+
end
126+
if !all(iszero, layer.pad)
127+
throw(ArgumentError("pad $(layer.pad) != 0 is not supported")) # COV_EXCL_LINE
128+
end
129+
if !all(isone, layer.dilation)
130+
throw(ArgumentError("dilation $(layer.dilation) != 1 is not supported")) # COV_EXCL_LINE
131+
end
132+
if !all(isone, layer.groups)
133+
throw(ArgumentError("groups $(layer.groups) != 1 is not supported")) # COV_EXCL_LINE
134+
end
135+
act = get(activations_Flux, layer.σ, nothing)
136+
if isnothing(act)
137+
throw(ArgumentError("unsupported activation function $(layer.σ)")) # COV_EXCL_LINE
138+
end
139+
# Flux stores a 4D matrix instead of a vector of 3D matrices
140+
weights = @inbounds [layer.weight[:, :, :, i] for i in 1:size(layer.weight, 4)]
141+
return ConvolutionalLayerOp(weights, layer.bias, act)
142+
end
143+
144+
function Base.convert(::Type{Flux.Conv}, layer::ConvolutionalLayerOp)
145+
act = get(activations_Flux, layer.activation, nothing)
146+
if isnothing(act)
147+
throw(ArgumentError("unsupported activation function $(layer.activation)")) # COV_EXCL_LINE
148+
end
149+
# Flux stores a 4D matrix instead of a vector of 3D matrices
150+
weights = cat(layer.weights...; dims=4)
151+
return Flux.Conv(weights, layer.bias, act)
152+
end
153+
end
154+
end

src/Architecture/LayerOps/DenseLayerOp.jl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ dim_in(L::DenseLayerOp) = size(L.weights, 2)
7373

7474
dim_out(L::DenseLayerOp) = length(L.bias)
7575

76+
size(::DenseLayerOp) = (1, 1)
77+
7678
function load_Flux_convert_Dense_layer()
7779
return quote
7880
function Base.convert(::Type{DenseLayerOp}, layer::Flux.Dense)
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
"""
2+
FlattenLayerOp <: AbstractLayerOp
3+
4+
A flattening layer operation converts a multidimensional tensor into a vector.
5+
6+
### Notes
7+
8+
The implementation uses row-major ordering for convenience with the
9+
machine-learning literature.
10+
11+
```@jldoctest
12+
julia> T = reshape([1, 3, 2, 4, 5, 7, 6, 8], (2, 2, 2))
13+
2×2×2 Array{Int64, 3}:
14+
[:, :, 1] =
15+
1 2
16+
3 4
17+
18+
[:, :, 2] =
19+
5 6
20+
7 8
21+
22+
julia> FlattenLayerOp()(T)
23+
8-element Vector{Int64}:
24+
1
25+
2
26+
3
27+
4
28+
5
29+
6
30+
7
31+
8
32+
```
33+
"""
34+
struct FlattenLayerOp <: AbstractLayerOp
35+
end
36+
37+
# application to a vector (swap to row-major convention)
38+
function (L::FlattenLayerOp)(T)
39+
s = size(T)
40+
if length(s) == 1
41+
return vec(T)
42+
end
43+
return vec(permutedims(T, (2, 1, 3:length(s)...)))
44+
end
45+
46+
size(::FlattenLayerOp) = (nothing, 1)
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
"""
2+
AbstractPoolingLayerOp <: AbstractLayerOp
3+
4+
Abstract type for pooling layer operations.
5+
6+
### Notes
7+
8+
Pooling is an operation on a three-dimensional tensor that iterates over the
9+
first two dimensions in a window and aggregates the values, thus reducing the
10+
output dimension.
11+
12+
### Implementation
13+
14+
The following (unexported) functions should be implemented:
15+
16+
- `window(::AbstractPoolingLayerOp)` -- return the pair ``(p, q)`` representing the window size
17+
- `aggregation(::AbstractPoolingLayerOp)` -- return the aggregation function (applied to a tensor)
18+
"""
19+
abstract type AbstractPoolingLayerOp <: AbstractLayerOp end
20+
21+
for (type_name, normal_name, agg_function, agg_name) in
22+
((:MaxPoolingLayerOp, "max", maximum, "maximum"),
23+
(:MeanPoolingLayerOp, "mean", mean, "Statistics.mean"))
24+
@eval begin
25+
@doc """
26+
$($type_name) <: AbstractPoolingLayerOp
27+
28+
A $($normal_name)-pooling layer operation. The aggregation function is
29+
`$($agg_name)`.
30+
31+
### Fields
32+
33+
- `p` -- horizontal window size
34+
- `q` -- vertical window size
35+
"""
36+
struct $type_name <: AbstractPoolingLayerOp
37+
p::Int
38+
q::Int
39+
40+
function $type_name(p::Int, q::Int; validate=Val(true))
41+
if validate isa Val{true} && (p <= 0 || q <= 0)
42+
throw(ArgumentError("inconsistent window size ($p, $q)"))
43+
end
44+
return new(p, q)
45+
end
46+
end
47+
48+
window(L::$type_name) = (L.p, L.q)
49+
50+
aggregation(::$type_name) = $agg_function
51+
52+
function Base.:(==)(L1::$type_name, L2::$type_name)
53+
return window(L1) == window(L2)
54+
end
55+
56+
function Base.show(io::IO, L::$type_name)
57+
str = "$(string($type_name)) for $($normal_name)-pooling of window " *
58+
"size $(window(L))"
59+
return print(io, str)
60+
end
61+
end
62+
end
63+
64+
# application to a tensor
65+
function (L::AbstractPoolingLayerOp)(T)
66+
s = size(T)
67+
if length(s) != 3
68+
throw(ArgumentError("a pooling layer requires a three-dimensional input, but got $s"))
69+
end
70+
p, q = window(L)
71+
@inbounds begin
72+
if mod(s[1], p) != 0 || mod(s[2], q) != 0
73+
throw(ArgumentError("pooling with window size ($p, $q) does " *
74+
"not apply to a tensor of dimension $s"))
75+
end
76+
d1 = div(s[1], p)
77+
d2 = div(s[2], q)
78+
d3 = s[3]
79+
end
80+
s = (d1, d2, d3)
81+
O = similar(T, s)
82+
aggregate = aggregation(L)
83+
@inbounds for k in 1:d3
84+
for j in 1:d2
85+
for i in 1:d1
86+
cluster = view(T, ((i - 1) * p + 1):(i * p), ((j - 1) * q + 1):(j * q), k)
87+
O[i, j, k] = aggregate(cluster)
88+
end
89+
end
90+
end
91+
return O
92+
end
93+
94+
size(::AbstractPoolingLayerOp) = (3, 3)

src/Architecture/NeuralNetworks/FeedforwardNetwork.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,20 @@ end
3232
function _first_inconsistent_layer(L)
3333
prev = nothing
3434
for (i, l) in enumerate(L)
35-
if !isnothing(prev) && dim_in(l) != dim_out(prev)
35+
if !isnothing(prev) &&
36+
((!isnothing(dim_in(l)) && !isnothing(dim_out(prev)) && dim_in(l) != dim_out(prev)) ||
37+
!_iscompatible(size(prev), size(l)))
3638
return i
3739
end
3840
prev = l
3941
end
4042
return 0
4143
end
4244

45+
_iscompatible(t1::Tuple, t2::Tuple) = _iscompatible(t1[2], t2[1])
46+
_iscompatible(i::Int, j::Int) = i == j
47+
_iscompatible(i, ::Nothing) = true
48+
4349
layers(N::FeedforwardNetwork) = N.layers
4450

4551
function load_Flux_convert_network()

src/Architecture/init.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ function __init__()
33
@require Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" begin
44
eval(load_Flux_activations())
55
eval(load_Flux_convert_Dense_layer())
6+
eval(load_Flux_convert_Conv_layer())
67
eval(load_Flux_convert_network())
78
end
89
end

0 commit comments

Comments
 (0)