From b21fdfb53fc57c822f426f878443b894fb3c4f0e Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Tue, 12 Feb 2019 23:22:16 -0500 Subject: [PATCH 01/12] Several notes in morph.py --- morph/nn/morph.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/morph/nn/morph.py b/morph/nn/morph.py index bbe853a..bd6d644 100644 --- a/morph/nn/morph.py +++ b/morph/nn/morph.py @@ -1,11 +1,19 @@ -import torch.nn as nn +import torch.nn as nn # TODO do we even need to import this __just__ for the type? def once(net: nn.Module, experimental_support=False) -> nn.Module: """Runs an experimental implementation of the MorphNet algorithm on `net` producing a new network: - 1. Shrink the layers o - + 1. Shrink the layers + 2. Widen the network + a. If everything mathematically fits together nicely, try to run inference + i. initialize those new weights with my random sampling technique + b. If things aren't snug, apply the more robust layer fitting approach + i. the layer widths will be what they will and that logic is handled in + morph.nn.widen.py + 3. Present the new model in a simple dataclass + a. takes advantage of the generated __repr__ and __eq__ + b. that class will have analysis functions (like `pd.DataFrame.summary()`) Returns: either `net` if `experimental_support == False` or a MorphNet of the supplied `net`. """ From 744bb5adf321d9331c52adb733321b31f674a1aa Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Tue, 12 Feb 2019 23:22:45 -0500 Subject: [PATCH 02/12] Clean up shrink --- morph/nn/shrink.py | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/morph/nn/shrink.py b/morph/nn/shrink.py index f273d37..e69de29 100644 --- a/morph/nn/shrink.py +++ b/morph/nn/shrink.py @@ -1,27 +0,0 @@ -import torch -import torch.nn as nn - -#################### HELPERS #################### - - -def _group_layers_by_algo(children_list): - """Group the layers into how they will be acted upon by my implementation of the algorithm: - 1. First child in the list - 2. Slice of all the child, those that are not first nor last - 3. Last child in the list - """ - - list_len = len(children_list) - - # validate input in case I slip up - if list_len < 1: - raise ValueError('Invalid argument:', children_list) - - if list_len <= 2: - return children_list # interface? - - first = children_list[0] - middle = children_list[1:-1] - last = children_list[-1] - - return first, middle, last From db1f9447c87ad97e044cfeedc73627db6ccc0aa9 Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Tue, 12 Feb 2019 23:24:24 -0500 Subject: [PATCH 03/12] Reorganize utilities The code that was in shrink was utility code that I found uses for in at least two other places + Satisfies my rule of three occurences + Led to successful porting of notebook code --- morph/nn/_types.py | 9 +++++++++ morph/nn/utils.py | 44 ++++++++++++++++++++++++++++++++------------ 2 files changed, 41 insertions(+), 12 deletions(-) create mode 100644 morph/nn/_types.py diff --git a/morph/nn/_types.py b/morph/nn/_types.py new file mode 100644 index 0000000..bd0ac33 --- /dev/null +++ b/morph/nn/_types.py @@ -0,0 +1,9 @@ +def type_name(o): + '''Returns the simplified type name of the given object. + Eases type checking, rather than any(isinstance(some_obj, _type) for _type in [my, types, to, check]) + ''' + return type(o).__name__ + + +def type_supported(type_name: str) -> bool: + return type_name in ['Conv2d', 'Linear'] diff --git a/morph/nn/utils.py b/morph/nn/utils.py index 45da6be..e4e4913 100644 --- a/morph/nn/utils.py +++ b/morph/nn/utils.py @@ -1,5 +1,36 @@ import torch.nn as nn +from morph.nn._types import type_name, type_supported + +from typing import List, Tuple, TypeVar + +ML = TypeVar('MODULES', List[nn.Module]) +# Type constrained to be the results of nn.Module.children() or ...named_children() +CL = TypeVar('MODULE_CHILDREN_LIST', ML, List[Tuple[str, nn.Module]]) + + +def group_layers_by_algo(children_list: CL) -> ML: + """Group the layers into how they will be acted upon by my implementation of the algorithm: + 1. First child in the list (the "input" layer) + 2. Slice of all the child, those that are not first nor last + 3. Last child in the list (the "output" layer) + """ + + list_len = len(children_list) + + # validate input in case I slip up + if list_len < 1: + raise ValueError('Invalid argument:', children_list) + + if list_len <= 2: + return children_list # interface? + + first = children_list[0] + middle = children_list[1:-1] + last = children_list[-1] + + return first, middle, last + def layer_has_bias(layer: nn.Module) -> bool: return not layer.bias is None @@ -66,7 +97,7 @@ def new_output_layer(base_layer: nn.Module, type_name: str, in_dim: int) -> nn.M def redo_layer(layer: nn.Module, new_in=None, new_out=None) -> nn.Module: if new_in is None and new_out is None: - return layehr + return layer _type = type_name(layer) if not type_supported(_type): @@ -96,14 +127,3 @@ def layer_is_conv2d(name: str): def layer_is_linear(name: str): return name == 'Linear' - - -def type_name(o): - '''Returns the simplified type name of the given object. - Eases type checking, rather than any(isinstance(some_obj, _type) for _type in [my, types, to, check]) - ''' - return type(o).__name__ - - -def type_supported(type_name: str) -> bool: - return type_name in ['Conv2d', 'Linear'] From a1a03f92cd711f547f5bd09f6fc2e274a7be2790 Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Tue, 12 Feb 2019 23:24:55 -0500 Subject: [PATCH 04/12] WIP: Implement widen (by a different name) --- morph/nn/widen.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/morph/nn/widen.py b/morph/nn/widen.py index 991aa1a..7b2f9a1 100644 --- a/morph/nn/widen.py +++ b/morph/nn/widen.py @@ -1 +1,50 @@ - \ No newline at end of file +import torch.nn as nn + +from morph.nn.utils import make_children_list, group_layers_by_algo + +# TODO: refactor out width_factor +def new_resize_layers(net: nn.Module): + + old_layers = make_children_list(net.named_children()) + (first_name, first_layer), middle, last = group_layers_by_algo(old_layers) + + last_out = first_layer.out_channels # count of the last layer's out features + + new_out_next_in = int(last_out * 1.4) + + # NOTE: is there a better way to do this part? + network = nn.Module() # new network + + network.add_module(first_name, nn.Conv2d( + first_layer.in_channels, new_out_next_in, kernel_size=first_layer.kernel_size, + stride=first_layer.stride + )) + + # TODO: format and utilize the functions in utils for making layers + for name, child in middle: + # otherwise, we want to + type_name = type(child).__name__ + if type_name in ['Conv2d', 'Linear']: + + temp = 0 + if type_name == 'Conv2d': + temp = int(child.out_channels * 1.4) + network.add_module(name, nn.Conv2d( + new_out_next_in, out_channels=temp, + kernel_size=child.kernel_size, stride=child.stride + )) + else: # type_name == 'Linear' + temp = int(child.out_features * 1.4) + network.add_module(name, nn.Linear( + in_features=new_out_next_in, out_features=temp + )) + + new_out_next_in = temp + + last_name, last_layer = last + network.add_module(last_name, nn.Conv2d( + new_out_next_in, last_layer.out_channels, + kernel_size=last_layer.kernel_size, stride=last_layer.stride + )) + + return network \ No newline at end of file From e356aae6d583a56f2b2724d0cfcb72fb1e922620 Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Tue, 12 Feb 2019 23:48:01 -0500 Subject: [PATCH 05/12] Implement resize_layers, proving utilities useful But showing that some things are too granual right now and may be less flexible than I want them to be. * API usage shouldn't be so dependent on passing around strings - That's kind of pathetic + Make an informed decision about what's best for the API, then do it. * Since redo_layer() is so effective, use it to lower the import entity count. * NIT: make_children_list should be something like 'children_as_list' --- morph/nn/_types.py | 1 + morph/nn/widen.py | 78 ++++++++++++++++++++++------------------------ 2 files changed, 38 insertions(+), 41 deletions(-) diff --git a/morph/nn/_types.py b/morph/nn/_types.py index bd0ac33..9eef78c 100644 --- a/morph/nn/_types.py +++ b/morph/nn/_types.py @@ -6,4 +6,5 @@ def type_name(o): def type_supported(type_name: str) -> bool: + # NOTE: already considerd a constants file. I don't like that precident return type_name in ['Conv2d', 'Linear'] diff --git a/morph/nn/widen.py b/morph/nn/widen.py index 7b2f9a1..a53adfc 100644 --- a/morph/nn/widen.py +++ b/morph/nn/widen.py @@ -1,50 +1,46 @@ import torch.nn as nn -from morph.nn.utils import make_children_list, group_layers_by_algo +# TODO: nope. This is really long +from morph.nn.utils import group_layers_by_algo, layer_is_conv2d, make_children_list, new_input_layer, new_output_layer, redo_layer, type_name, type_supported + # TODO: refactor out width_factor -def new_resize_layers(net: nn.Module): - +def resize_layers(net: nn.Module): + old_layers = make_children_list(net.named_children()) (first_name, first_layer), middle, last = group_layers_by_algo(old_layers) - - last_out = first_layer.out_channels # count of the last layer's out features - - new_out_next_in = int(last_out * 1.4) - - # NOTE: is there a better way to do this part? - network = nn.Module() # new network - - network.add_module(first_name, nn.Conv2d( - first_layer.in_channels, new_out_next_in, kernel_size=first_layer.kernel_size, - stride=first_layer.stride - )) - + + first_layer_output_size = first_layer.out_channels # count of the last layer's out features + + new_out_next_in = int(first_layer_output_size * 1.4) + + # NOTE: is there a better way to do this part? Maybe nn.Sequential? + network = nn.Module() # new network + + network.add_module( + first_name, + new_input_layer(first_layer, type_name(first_layer), out_dim=new_out_next_in)) + # TODO: format and utilize the functions in utils for making layers - for name, child in middle: - # otherwise, we want to - type_name = type(child).__name__ - if type_name in ['Conv2d', 'Linear']: - - temp = 0 - if type_name == 'Conv2d': - temp = int(child.out_channels * 1.4) - network.add_module(name, nn.Conv2d( - new_out_next_in, out_channels=temp, - kernel_size=child.kernel_size, stride=child.stride - )) - else: # type_name == 'Linear' - temp = int(child.out_features * 1.4) - network.add_module(name, nn.Linear( - in_features=new_out_next_in, out_features=temp - )) - - new_out_next_in = temp - + for name, child in middle: + # otherwise, we want to + _t = type_name(child) + if type_supported(_t): + + new_out = 0 + # TODO: look up performance on type name access. Maybe this could just be layer_is_conv2d(child) + if layer_is_conv2d(_t): + new_out = int(child.out_channels * 1.4) + else: # type_name == 'Linear' + new_out = int(child.out_features * 1.4) + + new_layer = redo_layer(child, new_in=new_out_next_in, new_out=new_out) + new_out_next_in = new_out + network.add_module(name, new_layer) + last_name, last_layer = last - network.add_module(last_name, nn.Conv2d( - new_out_next_in, last_layer.out_channels, - kernel_size=last_layer.kernel_size, stride=last_layer.stride - )) - + network.add_module( + last_name, + new_output_layer(last_layer, type_name(last_layer), in_dim=new_out_next_in)) + return network \ No newline at end of file From cc95ef95dbaaf7114dd6ef90cc5634b820023375 Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Thu, 14 Feb 2019 23:40:34 -0500 Subject: [PATCH 06/12] Add environment.yml It's been missing seen the project restructure and that can get lumped in here --- environment.yml | 128 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 environment.yml diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..8bb69f9 --- /dev/null +++ b/environment.yml @@ -0,0 +1,128 @@ +name: insight-project-pytorch +channels: + - pytorch + - anaconda-fusion + - defaults +dependencies: + - appnope=0.1.0=py37_0 + - asn1crypto=0.24.0=py37_0 + - astroid=2.1.0=py37_0 + - backcall=0.1.0=py37_0 + - blas=1.0=mkl + - bleach=3.1.0=py37_0 + - ca-certificates=2018.12.5=0 + - certifi=2018.11.29=py37_0 + - cffi=1.11.5=py37h6174b99_1 + - chardet=3.0.4=py37_1 + - cryptography=2.4.2=py37ha12b0ac_0 + - cycler=0.10.0=py37_0 + - dbus=1.13.6=h90a0687_0 + - decorator=4.3.0=py37_0 + - defusedxml=0.5.0=py37_1 + - entrypoints=0.3=py37_0 + - expat=2.2.6=h0a44026_0 + - freetype=2.9.1=hb4e5f40_0 + - gettext=0.19.8.1=h15daf44_3 + - glib=2.56.2=hd9629dc_0 + - icu=58.2=h4b95b61_1 + - idna=2.8=py37_0 + - intel-openmp=2019.1=144 + - ipykernel=5.1.0=py37h39e3cac_0 + - ipython=7.2.0=py37h39e3cac_0 + - ipython_genutils=0.2.0=py37_0 + - ipywidgets=7.4.2=py37_0 + - isort=4.3.4=py37_0 + - jedi=0.13.2=py37_0 + - jinja2=2.10=py37_0 + - jpeg=9b=he5867d9_2 + - jsonschema=2.6.0=py37_0 + - jupyter=1.0.0=py37_7 + - jupyter_client=5.2.4=py37_0 + - jupyter_console=6.0.0=py37_0 + - jupyter_core=4.4.0=py37_0 + - kiwisolver=1.0.1=py37h0a44026_0 + - lazy-object-proxy=1.3.1=py37h1de35cc_2 + - libcxx=4.0.1=hcfea43d_1 + - libcxxabi=4.0.1=hcfea43d_1 + - libedit=3.1.20181209=hb402a30_0 + - libffi=3.2.1=h475c297_4 + - libgfortran=3.0.1=h93005f0_2 + - libiconv=1.15=hdd342a3_7 + - libpng=1.6.36=ha441bb4_0 + - libsodium=1.0.16=h3efe00b_0 + - libtiff=4.0.10=hcb84e12_1001 + - markupsafe=1.1.0=py37h1de35cc_0 + - matplotlib=3.0.2=py37h54f8f79_0 + - mccabe=0.6.1=py37_1 + - mistune=0.8.4=py37h1de35cc_0 + - mkl=2019.1=144 + - mkl_fft=1.0.10=py37h5e564d8_0 + - mkl_random=1.0.2=py37h27c97d8_0 + - nb_conda_kernels=2.2.0=py37_0 + - nbconvert=5.4.0=py37_1 + - nbformat=4.4.0=py37_0 + - ncurses=6.1=h0a44026_1 + - ninja=1.8.2=py37h04f5b5a_1 + - notebook=5.7.4=py37_0 + - numpy=1.15.4=py37hacdab7b_0 + - numpy-base=1.15.4=py37h6575580_0 + - olefile=0.46=py37_0 + - openssl=1.1.1a=h1de35cc_0 + - pandoc=1.19.2.1=ha5e8f32_1 + - pandocfilters=1.4.2=py37_1 + - parso=0.3.1=py37_0 + - pcre=8.42=h378b8a2_0 + - pexpect=4.6.0=py37_0 + - pickleshare=0.7.5=py37_0 + - pillow=5.4.1=py37hb68e598_0 + - pip=18.1=py37_0 + - prometheus_client=0.5.0=py37_0 + - prompt_toolkit=2.0.7=py37_0 + - ptyprocess=0.6.0=py37_0 + - pycparser=2.19=py37_0 + - pygments=2.3.1=py37_0 + - pylint=2.2.2=py37_0 + - pyopenssl=18.0.0=py37_0 + - pyparsing=2.3.1=py37_0 + - pyqt=5.9.2=py37h655552a_2 + - pysocks=1.6.8=py37_0 + - python=3.7.2=haf84260_0 + - python-dateutil=2.7.5=py37_0 + - pytz=2018.9=py37_0 + - pyzmq=17.1.2=py37h1de35cc_0 + - qt=5.9.7=h468cd18_1 + - qtconsole=4.4.3=py37_0 + - readline=7.0=h1de35cc_5 + - requests=2.21.0=py37_0 + - rope=0.11.0=py37_0 + - send2trash=1.5.0=py37_0 + - setuptools=40.6.3=py37_0 + - sip=4.19.8=py37h0a44026_0 + - six=1.12.0=py37_0 + - sqlite=3.26.0=ha441bb4_0 + - terminado=0.8.1=py37_1 + - testpath=0.4.2=py37_0 + - tk=8.6.8=ha441bb4_0 + - tornado=5.1.1=py37h1de35cc_0 + - traitlets=4.3.2=py37_0 + - urllib3=1.24.1=py37_0 + - wcwidth=0.1.7=py37_0 + - webencodings=0.5.1=py37_1 + - wheel=0.32.3=py37_0 + - widgetsnbextension=3.4.2=py37_0 + - wrapt=1.11.0=py37h1de35cc_0 + - xz=5.2.4=h1de35cc_4 + - yapf=0.25.0=py37_0 + - zeromq=4.2.5=h0a44026_1 + - zlib=1.2.11=h1de35cc_3 + - pytorch=1.0.0=py3.7_1 + - torchvision=0.2.1=py_2 + - pip: + - docutils==0.14 + - pkginfo==1.5.0.1 + - readme-renderer==24.0 + - requests-toolbelt==0.9.1 + - torch==1.0.0 + - tqdm==4.30.0 + - twine==1.12.1 + From 23dc886fc6f9c7e97015894af97ad235fb96af66 Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Thu, 14 Feb 2019 23:41:30 -0500 Subject: [PATCH 07/12] Update README.md and demo.py to be truly reflective ... of the project's state and intent. * All of the pieces are there, but they still aren't connected * That's a poor show of engineering quality --- README.md | 53 +++++++++++++++++++++++++++++++++++++++++++++++++---- demo.py | 2 +- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 349cc9d..e93ef2f 100755 --- a/README.md +++ b/README.md @@ -13,16 +13,58 @@ Please feel free to look around, but bookmark which release tag it was. Master w A Stephen Fox endeavor to become an Applied AI Scientist. +## Background Resources + +### Key Ideas + +1. Make it simple to refine neural architectures +2. Focus on dropping model parameter size while __keeping performance as high as possible__ +3. Make the tools user-friendly, and clearly documented + +### Project Roadmap + +- Please see [the GitHub Project board](https://github.com/stephenjfox/Morph.py/projects/1) + +--- + +## What is Morph.py? + +Morph.py is a Neural Network Architecture Optimization toolkit targeted at Deep Learning researchers + and practitioners. +* It acts outside of the current paradigm of [Neural Architecture Search](https://github.com/D-X-Y/awesome-NAS) + while still proving effective +* It helps one model accuracy of a model with respect to its size (as measured by "count of model parameters") + * Subsequently, you could be nearly as effective (given some margin of error) with a __much__ smaller + memory footprint +* Provides you, the researcher, with [better insight on how to improve your model](https://github.com/stephenjfox/Morph.py/projects/3) + +Please enjoy this [Google Slides presentation](https://goo.gl/ZzZrng) + +Coming soon: +* A walkthrough of the presentation (more detail than my presenter's notes) +* More [supported model architectures](https://github.com/stephenjfox/Morph.py/projects/2) + + +### Current support + +* Dynamic adjustment of a given layer's size +* Weight persistence across layer resizing + * To preserve all the hard work you spent in + +--- + +# Contributing + ## Setup (to work alongside me) `git clone https://github.com/stephenjfox/Morph.py.git` -## Requisites +### Requisites -### [Install Anaconda](https://www.anaconda.com/download/) +#### [Install Anaconda](https://www.anaconda.com/download/) * They've made it easier with the years. If you haven't already, please give it a try -### Install Pip +#### Install Pip 1. `conda install pip` 2. Proceed as normal @@ -32,4 +74,7 @@ A Stephen Fox endeavor to become an Applied AI Scientist. - Jupyter Notebook * And a few tools to make it better on your local environment like `nb_conda`, `nbconvert`, and `nb_conda_kernels` - Python 3.6+ because [Python 2 is dying](https://pythonclock.org/) -- PyTorch (`conda install torch torchvision`) +- PyTorch (`conda install torch torchvision -c pytorch`) + +All of these and more are covered in the `environment.yml` file: ++ Simply run `conda env create -f environment.yml -n ` \ No newline at end of file diff --git a/demo.py b/demo.py index 5a8be18..e5e82c7 100644 --- a/demo.py +++ b/demo.py @@ -14,7 +14,7 @@ def main(): # do one pass through the algorithm modified = morph.once(my_model) - print(modified) # proof that the thing wasn't tampered with + print(modified) # take a peek at the new layers. You take it from here my_dataloader = DataLoader(TensorDataset(torch.randn(2, 28, 28))) From f06d2b2b77e5730c3781e21367c4a031e316288e Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Fri, 15 Feb 2019 13:18:13 -0500 Subject: [PATCH 08/12] Clean up README example --- README.md | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ demo.py | 2 ++ 2 files changed, 50 insertions(+) diff --git a/README.md b/README.md index e93ef2f..803c0cc 100755 --- a/README.md +++ b/README.md @@ -27,6 +27,54 @@ A Stephen Fox endeavor to become an Applied AI Scientist. --- +## Usage + +### Installation + +`pip install morph-py` + +### Code Example + +```python +import morph + +morph_optimizer = None +# train loop +for e in range(epoch_count): + + for input, target in dataloader: + optimizer.zero_grad() # optional: zero gradients or don't... + output = model(input) + + loss = loss_fn(output, target) + loss.backward() + optim.step() + + + # setup for comparing the morphed model + if morph_optimizer: + morph_optimizer.zero_grad() + morph_loss = loss_fn(morph_model(input), target) + + logging.info(f'Morph loss - Standard loss = {morph_loss - loss}') + + morph_loss.backward() + morph_optimizer.step() + + + # Experimentally supported: Initialize our morphing halfway training + if e == epoch_count // 2: + # if you want to override your model + model = morph.once(model) + + # if you want to compare in parallel + morph_model = morph.once(model) + + # either way, you need to tell your optimizer about it + morph_optimizer = init_optimizer(params=morph_model.parameters()) + +``` + ## What is Morph.py? Morph.py is a Neural Network Architecture Optimization toolkit targeted at Deep Learning researchers diff --git a/demo.py b/demo.py index e5e82c7..0d88319 100644 --- a/demo.py +++ b/demo.py @@ -20,6 +20,8 @@ def main(): # get back the class that will do work morphed = net.Morph(my_model, epochs=5, dataloader=my_dataloader) + + # TODO: we need your loss function, but this is currentry __unsupported__ morphed.run_training() From b085293971fb2ba1947dab6dc7afd5b993c9f22b Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Sat, 16 Feb 2019 21:34:10 -0500 Subject: [PATCH 09/12] Release 0.1.0/resize layers should be more clear (#7) * Use more utilities, new abstraction * Add layer inspectors * Add errors and utils * Calculate a new layer's size Rather than having to reinstatiate layers, or some sub-optimal traversal of a neural architectures' nn.Modules, just do the math for 'in-size' and 'out-size' for a given layer to make the math easier. * Clarify demo.py * Trim down resize_layers in nn.widen module * Complete resize_layers for #2 Need a 'fit_layers', for when shrinking/pruning clips out too many neuron connections --- demo.py | 5 ++++- morph/_error.py | 6 ++++++ morph/_utils.py | 12 ++++++++++++ morph/layers/widen.py | 39 ++++++++++++++++++++------------------- morph/nn/shrink.py | 20 ++++++++++++++++++++ morph/nn/utils.py | 31 ++++++++++++++++++++++++++++--- morph/nn/widen.py | 38 +++++++++++++++++--------------------- 7 files changed, 107 insertions(+), 44 deletions(-) create mode 100644 morph/_error.py create mode 100644 morph/_utils.py diff --git a/demo.py b/demo.py index 0d88319..a167554 100644 --- a/demo.py +++ b/demo.py @@ -9,6 +9,9 @@ from morph._models import EasyMnist +def random_dataset(): + return TensorDataset(torch.randn(2, 28, 28)) + def main(): my_model = EasyMnist() # do one pass through the algorithm @@ -16,7 +19,7 @@ def main(): print(modified) # take a peek at the new layers. You take it from here - my_dataloader = DataLoader(TensorDataset(torch.randn(2, 28, 28))) + my_dataloader = DataLoader(random_dataset) # get back the class that will do work morphed = net.Morph(my_model, epochs=5, dataloader=my_dataloader) diff --git a/morph/_error.py b/morph/_error.py new file mode 100644 index 0000000..1d645c2 --- /dev/null +++ b/morph/_error.py @@ -0,0 +1,6 @@ +class ValidationError(Exception): + """Custom error that represents a validation issue, according to internal + system rules + """ + def __init__(self, msg): + super(ValidationError, self).__init__(msg) diff --git a/morph/_utils.py b/morph/_utils.py new file mode 100644 index 0000000..3639185 --- /dev/null +++ b/morph/_utils.py @@ -0,0 +1,12 @@ +from ._error import ValidationError + + +def check(pred: bool, message='Validation failed'): + if not pred: raise ValidationError(message) + + +def round(value: float) -> int: + """Rounds a `value` up to the next integer if possible. + Performs differently from the standard Python `round` + """ + return int(value + .5) \ No newline at end of file diff --git a/morph/layers/widen.py b/morph/layers/widen.py index e2ff0e6..c25ee70 100644 --- a/morph/layers/widen.py +++ b/morph/layers/widen.py @@ -3,11 +3,11 @@ import torch import torch.nn as nn -from ..nn.utils import layer_has_bias +from ..nn.utils import layer_has_bias, redo_layer +from .._utils import check, round # NOTE: should factor be {smaller, default at all}? -# TODO: Research - is there a better type for layer than nn.Module? def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module: """ Args: @@ -23,23 +23,18 @@ def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module: Returns: A new layer of the base type (e.g. nn.Linear) or `None` if in_place=True """ - if factor < 1.0: - raise ValueError('Cannot shrink with the widen() function') - if factor == 1.0: - raise ValueError("You shouldn't waste compute time if you're not changing anything") + check(factor > 1.0, "Your call to widen() should be increasing the size of your layers") # we know that layer.weight.size()[0] is the __output__ dimension in the linear case output_dim = 0 if isinstance(layer, nn.Linear): output_dim = layer.weight.size()[0] # FIXME: switch to layer.out_features? input_dim = layer.weight.size()[1] # FIXME: switch to layer.in_features? - # TODO: other classes, for robustness? - # TODO: Use dictionary look-ups instead, because they're faster? else: raise ValueError('unsupported layer type:', type(layer)) logging.debug(f"current dimensions: {(output_dim, input_dim)}") - new_size = round(factor * output_dim + .5) # round up, not down, if we can + new_size = round(factor * output_dim) # round up, not down, if we can # We're increasing layer width from output_dim to new_size, so let's save that for later size_diff = new_size - output_dim @@ -56,20 +51,26 @@ def widen(layer: nn.Module, factor=1.4, in_place=False) -> nn.Module: # TODO: cleanup duplication? Missing properties that will effect usability? if in_place: - layer.out_features = new_size - layer.weight = p_weights - layer.bias = p_bias - logging.warning( - 'Using experimental "in-place" version. May have unexpected affects on activation.' - ) + write_layer_properties(layer, new_size, p_weights, p_bias) return layer else: - print(f"New shape = {expanded_weights.shape}") - l = nn.Linear(*expanded_weights.shape[::-1], bias=utils.layer_has_bias(layer)) - l.weight = p_weights - l.bias = p_bias + logging.debug(f"New shape = {expanded_weights.shape}") + new_input, new_output = expanded_weights[1], expanded_weights[0] + l = redo_layer(layer, new_in=new_input, new_out=new_output) + write_layer_properties(layer, new_size=None, new_weights=p_weights, new_bias=p_bias) + return l +def write_layer_properties(layer, new_size, new_weights, new_bias): + """Assigns properties to this `layer`, making the changes on a model in-line + """ + if new_size: layer.out_features = new_size + if new_weights: layer.weight = new_weights + if new_bias: layer.bias = new_bias + logging.warning( + 'Using experimental "in-place" version. May have unexpected affects on activation.' + ) + def _expand_bias_or_weight(t: nn.Parameter, increase: int) -> torch.Tensor: """Returns a tensor of shape `t`, with padding values drawn from a Guassian distribution diff --git a/morph/nn/shrink.py b/morph/nn/shrink.py index e69de29..5be3320 100644 --- a/morph/nn/shrink.py +++ b/morph/nn/shrink.py @@ -0,0 +1,20 @@ +from morph.layers.sparse import percent_waste +from morph._utils import check, round +from morph.nn.utils import in_dim, out_dim + +import torch.nn as nn + + +def calc_reduced_size(layer: nn.Module) -> (int, int): + """Calculates the reduced size of the layer, post training (initial or morphed re-training) + so the layers can be resized. + """ + # TODO: remove this guard when properly we protect access to this function + check( + type(layer) == nn.Conv2d or type(layer) == nn.Linear, + 'Invalid layer type: ' + type(layer)) + + percent_keep = 1 - percent_waste(layer) + shrunk_in, shrunk_out = percent_keep * in_dim(layer), percent_keep * out_dim(layer) + + return round(shrunk_in), round(shrunk_out) diff --git a/morph/nn/utils.py b/morph/nn/utils.py index e4e4913..23e0468 100644 --- a/morph/nn/utils.py +++ b/morph/nn/utils.py @@ -1,6 +1,7 @@ import torch.nn as nn from morph.nn._types import type_name, type_supported +from morph._utils import check from typing import List, Tuple, TypeVar @@ -12,15 +13,14 @@ def group_layers_by_algo(children_list: CL) -> ML: """Group the layers into how they will be acted upon by my implementation of the algorithm: 1. First child in the list (the "input" layer) - 2. Slice of all the child, those that are not first nor last + 2. Slice of all the children, those that are not first nor last 3. Last child in the list (the "output" layer) """ list_len = len(children_list) # validate input in case I slip up - if list_len < 1: - raise ValueError('Invalid argument:', children_list) + check(list_len > 1, 'Your children_list must be more than a singleton') if list_len <= 2: return children_list # interface? @@ -43,6 +43,31 @@ def make_children_list(children_or_named_children): return [c for c in children_or_named_children] +#################### LAYER INSPECTION #################### + + +def in_dim(layer: nn.Module) -> int: + check(type_supported(layer)) + + if layer_is_linear(layer): + return layer.in_features + elif layer_is_conv2d(layer): + return layer.in_channels + else: + raise RuntimeError('Inspecting on unsupported layer') + + +def out_dim(layer: nn.Module) -> int: + check(type_supported(layer)) + + if layer_is_linear(layer): + return layer.out_features + elif layer_is_conv2d(layer): + return layer.out_channels + else: + raise RuntimeError('Inspecting on unsupported layer') + + #################### NEW LAYERS #################### diff --git a/morph/nn/widen.py b/morph/nn/widen.py index a53adfc..fee07fe 100644 --- a/morph/nn/widen.py +++ b/morph/nn/widen.py @@ -1,46 +1,42 @@ import torch.nn as nn -# TODO: nope. This is really long -from morph.nn.utils import group_layers_by_algo, layer_is_conv2d, make_children_list, new_input_layer, new_output_layer, redo_layer, type_name, type_supported +import logging +from morph.nn.utils import group_layers_by_algo, make_children_list, out_dim, redo_layer +from morph._utils import round +from morph.nn._types import type_name, type_supported -# TODO: refactor out width_factor -def resize_layers(net: nn.Module): + +def resize_layers(net: nn.Module, width_factor: float = 1.4) -> nn.Module: + """Perform a uniform layer widening, which increases the output dimension for + fully-connected layers and the number of filters for convolutional layers. + """ old_layers = make_children_list(net.named_children()) (first_name, first_layer), middle, last = group_layers_by_algo(old_layers) first_layer_output_size = first_layer.out_channels # count of the last layer's out features - new_out_next_in = int(first_layer_output_size * 1.4) + new_out_next_in = round(first_layer_output_size * width_factor) # NOTE: is there a better way to do this part? Maybe nn.Sequential? network = nn.Module() # new network - network.add_module( - first_name, - new_input_layer(first_layer, type_name(first_layer), out_dim=new_out_next_in)) + network.add_module(first_name, redo_layer(first_layer, new_out=new_out_next_in)) - # TODO: format and utilize the functions in utils for making layers for name, child in middle: - # otherwise, we want to - _t = type_name(child) - if type_supported(_t): + if type_supported(type_name(child)): - new_out = 0 - # TODO: look up performance on type name access. Maybe this could just be layer_is_conv2d(child) - if layer_is_conv2d(_t): - new_out = int(child.out_channels * 1.4) - else: # type_name == 'Linear' - new_out = int(child.out_features * 1.4) + new_out = round(out_dim(child) * width_factor) new_layer = redo_layer(child, new_in=new_out_next_in, new_out=new_out) new_out_next_in = new_out network.add_module(name, new_layer) + else: + logging.warning(f"Encountered a non-resizable layer: {type(child)}") + network.add_module(name, child) last_name, last_layer = last - network.add_module( - last_name, - new_output_layer(last_layer, type_name(last_layer), in_dim=new_out_next_in)) + network.add_module(last_name, redo_layer(last_layer, new_in=new_out_next_in)) return network \ No newline at end of file From 24bb995377ba4ddf3f267fa1a042f83698d5f601 Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Mon, 18 Feb 2019 12:23:41 -0500 Subject: [PATCH 10/12] Restore testability __Solution__: When running tests from the command line, ___always__ import the module `-m unittest` and use the verbose (`-v`) flag to see where Python is breaking up. * Then you'll be able to trace down the problem. * Do this a few times, and it will be reflex --- demo.py | 4 ++-- morph/__init__.py | 2 +- morph/layers/widen.py | 2 +- morph/layers/widen_test.py | 24 ++++++++++++++++++++++++ morph/nn/utils.py | 4 ++-- morph/{_models.py => testing/models.py} | 0 morph/{_utils.py => utils.py} | 0 morph/utils_test.py | 19 +++++++++++++++++++ 8 files changed, 49 insertions(+), 6 deletions(-) create mode 100644 morph/layers/widen_test.py rename morph/{_models.py => testing/models.py} (100%) rename morph/{_utils.py => utils.py} (100%) create mode 100644 morph/utils_test.py diff --git a/demo.py b/demo.py index a167554..f1587e2 100644 --- a/demo.py +++ b/demo.py @@ -6,7 +6,7 @@ import morph.nn as net from morph.layers.sparse import sparsify -from morph._models import EasyMnist +from morph.testing.models import EasyMnist def random_dataset(): @@ -19,7 +19,7 @@ def main(): print(modified) # take a peek at the new layers. You take it from here - my_dataloader = DataLoader(random_dataset) + my_dataloader = DataLoader(random_dataset()) # get back the class that will do work morphed = net.Morph(my_model, epochs=5, dataloader=my_dataloader) diff --git a/morph/__init__.py b/morph/__init__.py index e1530f3..8e67f27 100644 --- a/morph/__init__.py +++ b/morph/__init__.py @@ -1 +1 @@ -from .nn.morph import once # facility tate "morph.once" \ No newline at end of file +from .nn import once # facilitate "morph.once" \ No newline at end of file diff --git a/morph/layers/widen.py b/morph/layers/widen.py index c25ee70..3e737c2 100644 --- a/morph/layers/widen.py +++ b/morph/layers/widen.py @@ -4,7 +4,7 @@ import torch.nn as nn from ..nn.utils import layer_has_bias, redo_layer -from .._utils import check, round +from ..utils import check, round # NOTE: should factor be {smaller, default at all}? diff --git a/morph/layers/widen_test.py b/morph/layers/widen_test.py new file mode 100644 index 0000000..0c87a66 --- /dev/null +++ b/morph/layers/widen_test.py @@ -0,0 +1,24 @@ +import unittest + +from .widen import widen, nn +from .._error import ValidationError + + +class TestWiden_Functional(unittest.TestCase): + + DUD_LINEAR = nn.Linear(1, 1) + + def test_widen_width_factor_too_small_should_fail(self): + with self.assertRaises(ValidationError): + widen(self.DUD_LINEAR, 0.8) + + def test_widen_width_factor_identity_should_fail(self): + with self.assertRaises(ValidationError): + widen(self.DUD_LINEAR, 1.0) + + def test_widen_width_factor_increases_layer_generously(self): + pass + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file diff --git a/morph/nn/utils.py b/morph/nn/utils.py index 23e0468..9c1764c 100644 --- a/morph/nn/utils.py +++ b/morph/nn/utils.py @@ -1,11 +1,11 @@ import torch.nn as nn from morph.nn._types import type_name, type_supported -from morph._utils import check +from morph.utils import check from typing import List, Tuple, TypeVar -ML = TypeVar('MODULES', List[nn.Module]) +ML = List[nn.Module] # Type constrained to be the results of nn.Module.children() or ...named_children() CL = TypeVar('MODULE_CHILDREN_LIST', ML, List[Tuple[str, nn.Module]]) diff --git a/morph/_models.py b/morph/testing/models.py similarity index 100% rename from morph/_models.py rename to morph/testing/models.py diff --git a/morph/_utils.py b/morph/utils.py similarity index 100% rename from morph/_utils.py rename to morph/utils.py diff --git a/morph/utils_test.py b/morph/utils_test.py new file mode 100644 index 0000000..c2eadf1 --- /dev/null +++ b/morph/utils_test.py @@ -0,0 +1,19 @@ +import unittest + +from .utils import round + +class TestGlobalUtilities(unittest.TestCase): + + def test_round_down(self): + test = 1.2 + expected = 1 + self.assertEqual(expected, round(test), '1.2 should round DOWN, to 1') + + def test_round_up(self): + test = 1.7 + expected = 2 + self.assertEqual(expected, round(test), '1.7 should round UP, to 2') + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file From 57d0433b9d70b3ced65b7de60fde130874d4a9bf Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Mon, 18 Feb 2019 23:11:16 -0500 Subject: [PATCH 11/12] Release 0.1.0/sparsify 2019 02 18 (#11) * Attempt to test for #4 PyTorch's boolean comparison crap isn't useful and makes it a pain to test exact tensor values. * Will resume later * Skipping sparsify test It's a painfully simple function that has worked every time I've used it. - No it doesn't handle every edge case + Yes, it gets the job done and can be packaged for the general case * Use instance `.nonzero()` instead of `torch.nonzero()` * Fix "type-check" in layer inspectors * WIP: Implement shrink() in terms of resize_layers() It was as easy as I wanted it to be. * The complexity is how to handle a given nested layer + Those will get implemented with a given feature - Need to program feature detection TODO: + Implement the resizing on a layer-by-layer case, to make the shrinking a bit different + Instead of applying the data transformation uniformly, each layer gets + Those factors will be computed as 1 - percent_waste(layer) * Lay out skeleton for the true shrinking algo #4 * shrink_layer() is simple * Justification for giving Shrinkage a 'input_dimensions' property: > The thought is that channel depth doesn't change the output dimensions for CNNs, and that's attribute we're concerned with in the convulotional case... * Linear layers only have two dimensions, so it's a huge deal there. * RNNs do linear things over 'timesteps', so it's a big deal there. * Residual/identity/skip-connections in CNNs need this. > __It's decided__. The attribute stays --- check-prune-widen.ipynb | 171 ++++++++++++++++++++++++++++++++++++ morph/layers/sparse.py | 2 +- morph/layers/sparse_test.py | 16 ++++ morph/nn/resizing.py | 6 ++ morph/nn/shrink.py | 107 +++++++++++++++++++--- morph/nn/utils.py | 16 ++-- morph/nn/widen.py | 17 +++- 7 files changed, 313 insertions(+), 22 deletions(-) create mode 100644 check-prune-widen.ipynb create mode 100644 morph/layers/sparse_test.py create mode 100644 morph/nn/resizing.py diff --git a/check-prune-widen.ipynb b/check-prune-widen.ipynb new file mode 100644 index 0000000..4a1a13e --- /dev/null +++ b/check-prune-widen.ipynb @@ -0,0 +1,171 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import morph" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "morph.nn" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "??morph.nn.once" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "import morph.nn.shrink as ms" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from morph.testing.models import EasyMnist" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "some_linear = ms.nn.Linear(3, 2)\n", + "c = [c for c in some_linear.children()]\n", + "len(c)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "EasyMnist(\n", + " (linear1): Linear(in_features=784, out_features=1000, bias=True)\n", + " (linear2): Linear(in_features=1000, out_features=30, bias=True)\n", + " (linear3): Linear(in_features=30, out_features=10, bias=True)\n", + ")" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "EasyMnist()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Module(\n", + " (linear1): Linear(in_features=784, out_features=700, bias=True)\n", + " (linear2): Linear(in_features=700, out_features=21, bias=True)\n", + " (linear3): Linear(in_features=21, out_features=10, bias=True)\n", + ")" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ms.prune(EasyMnist())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/morph/layers/sparse.py b/morph/layers/sparse.py index 1f29dd6..d62b316 100644 --- a/morph/layers/sparse.py +++ b/morph/layers/sparse.py @@ -28,7 +28,7 @@ def percent_waste(layer: nn.Module) -> float: weight matrix/tensor to determine how many neurons can be spared """ w = layer.weight - non_sparse_w = torch.nonzero(sparsify(w)) + non_sparse_w = sparsify(w).nonzero() non_zero_count = non_sparse_w.numel() // len(non_sparse_w[0]) percent_size = non_zero_count / w.numel() diff --git a/morph/layers/sparse_test.py b/morph/layers/sparse_test.py new file mode 100644 index 0000000..aab6657 --- /dev/null +++ b/morph/layers/sparse_test.py @@ -0,0 +1,16 @@ +from unittest import main as test_main, TestCase, skip + +from .sparse import sparsify, torch + +class TestSparseFunctions(TestCase): + + @skip("Skipping value-wise comparison until better solution than iterating all tensor values") + def test_sparsify_selected_indices_should_have_sub_threshold_values(self): + test_threshold = 0.1 + test_tensor = torch.randn(3, 2) + expected = torch.where(test_tensor > test_threshold, test_tensor, torch.zeros(3, 2)) + self.assertEqual(expected, sparsify(test_tensor, test_threshold)) + + +if __name__ == "__main__": + test_main() \ No newline at end of file diff --git a/morph/nn/resizing.py b/morph/nn/resizing.py new file mode 100644 index 0000000..79caee5 --- /dev/null +++ b/morph/nn/resizing.py @@ -0,0 +1,6 @@ +from collections import namedtuple + +Resizing = namedtuple('Resizing', ['input_size', 'output_size'], defaults=[0, 0]) +Resizing.__doc__ += ': Baseclass for a type that encapsulates a resized layer' +Resizing.input_size.__doc__ = "The layer's \"new\" input dimension size (Linear -> in_features, Conv2d -> in_channels)" +Resizing.output_size.__doc__ = "The layer's \"new\" output dimension size (Linear -> out_features, Conv2d -> out_channels)" diff --git a/morph/nn/shrink.py b/morph/nn/shrink.py index 5be3320..661990b 100644 --- a/morph/nn/shrink.py +++ b/morph/nn/shrink.py @@ -1,20 +1,103 @@ from morph.layers.sparse import percent_waste -from morph._utils import check, round -from morph.nn.utils import in_dim, out_dim +from morph.utils import check, round +from .resizing import Resizing +from .utils import in_dim, out_dim, group_layers_by_algo +from .widen import resize_layers +from ._types import type_name + +from typing import List import torch.nn as nn -def calc_reduced_size(layer: nn.Module) -> (int, int): - """Calculates the reduced size of the layer, post training (initial or morphed re-training) - so the layers can be resized. +class Shrinkage: + """ + An intermediary for the "Shrink" step of the three step Morphing algorithm. + Rather than have all of the state be free in the small scope of a mega-function, + these abstractions ease the way of implementing the shrinking and prune of the + network. + * Given that we have access to the total count of nodes, and how wasteful a layer was + we can deduce any necessary changes once given a new input dimension + * We expect input dimensions to change to accomodate the trimmed down earlier layers, + but we want an expansion further along to allow the opening of bottlenecks in the architecture + """ + + def __init__(self, input_dimension: int, initial_parameters: int, + waste_percentage: float): + self.input_dimension = input_dimension # TODO: is this relevant in any non-Linear case? + self.initial_parameters = initial_parameters + self.waste_percentage = waste_percentage + self.reduced_parameters = Shrinkage.reduce_parameters(initial_parameters, + waste_percentage) + + @staticmethod + def reduce_parameters(initial_parameters: int, waste: float) -> int: + """Calculates the new, smaller, number of paratemers that this instance encapsulates""" + percent_keep = (1. - waste) + unrounded_params_to_keep = percent_keep * initial_parameters + # round digital up to the nearest integer + return round(unrounded_params_to_keep) + + +def shrink_to_resize(shrinkage: Shrinkage, new_input_dimension: int) -> Resizing: + """Given the `new_input_dimension`, calculate a reshaping/resizing for the parameters + of the supplied `shrinkage`. + We round up the new output dimension, generously allowing for opening bottlenecks. + Iteratively, any waste introduced is pruned hereafter. (Needs proof/unit test) """ - # TODO: remove this guard when properly we protect access to this function - check( - type(layer) == nn.Conv2d or type(layer) == nn.Linear, - 'Invalid layer type: ' + type(layer)) + new_output_dimension = round(shrinkage.reduced_parameters / new_input_dimension) + return Resizing(new_input_dimension, new_output_dimension) + + +#################### prove of a good implementation #################### + + +def uniform_prune(net: nn.Module) -> nn.Module: + """Shrink the network down 70%. Input and output dimensions are not altered""" + return resize_layers(net, width_factor=0.7) + + +#################### the algorithm to end all algorithms #################### + + +def shrink_layer(layer: nn.Module) -> Shrinkage: + waste = percent_waste(layer) + parameter_count = layer.weight.numel() # the count is already tracked for us + return Shrinkage(in_dim(layer), parameter_count, waste) + + +def fit_layer_sizes(layer_sizes: List[Shrinkage]) -> List[Resizing]: + # TODO: where's the invocation site for shrink_to_resize + pass + + +def transform(original_layer: nn.Module, new_shape: Resizing) -> nn.Module: + # TODO: this might just be utils.redo_layer, without the primitive obsession + pass + + +def shrink_prune_fit(net: nn.Module) -> nn.Module: + first, middle_layers, last = group_layers_by_algo(net) + shrunk = { + "first": shrink_layer(first), + "middle": [shrink_layer(m) for m in middle_layers], + "last": shrink_layer(last) + } + + # FIXME: why doesn't the linter like `fitted_layers` + fitted_layers = fit_layer_sizes([shrunk["first"], *shrunk["middle"], shrunk["last"]]) + + # iteration very similar to `resize_layers` but matches Shrinkage with the corresponding layer + new_first, new_middle_layers, new_last = group_layers_by_algo(fitted_layers) + + new_net = nn.Module() + + new_net.add_module(type_name(first), transform(first, new_first)) + + for old, new in zip(middle_layers, new_middle_layers): + new_net.add_module(type_name(old), transform(old, new)) + pass # append to new_net with the Shrinkage's properties - percent_keep = 1 - percent_waste(layer) - shrunk_in, shrunk_out = percent_keep * in_dim(layer), percent_keep * out_dim(layer) + new_net.add_module(type_name(last), transform(last, new_last)) - return round(shrunk_in), round(shrunk_out) + return new_net diff --git a/morph/nn/utils.py b/morph/nn/utils.py index 9c1764c..1df2ba7 100644 --- a/morph/nn/utils.py +++ b/morph/nn/utils.py @@ -47,22 +47,26 @@ def make_children_list(children_or_named_children): def in_dim(layer: nn.Module) -> int: - check(type_supported(layer)) + """Returns the input dimension of a given (supported) `layer`""" + layer_name = type_name(layer) + check(type_supported(layer_name)) - if layer_is_linear(layer): + if layer_is_linear(layer_name): return layer.in_features - elif layer_is_conv2d(layer): + elif layer_is_conv2d(layer_name): return layer.in_channels else: raise RuntimeError('Inspecting on unsupported layer') def out_dim(layer: nn.Module) -> int: - check(type_supported(layer)) + """Returns the output dimension of a given (supported) `layer`""" + layer_name = type_name(layer) + check(type_supported(layer_name)) - if layer_is_linear(layer): + if layer_is_linear(layer_name): return layer.out_features - elif layer_is_conv2d(layer): + elif layer_is_conv2d(layer_name): return layer.out_channels else: raise RuntimeError('Inspecting on unsupported layer') diff --git a/morph/nn/widen.py b/morph/nn/widen.py index fee07fe..8f5e3ab 100644 --- a/morph/nn/widen.py +++ b/morph/nn/widen.py @@ -3,10 +3,14 @@ import logging from morph.nn.utils import group_layers_by_algo, make_children_list, out_dim, redo_layer -from morph._utils import round +from morph.utils import round from morph.nn._types import type_name, type_supported +def widen(net: nn.Module, width_factor: float = 1.4) -> nn.Module: + return resize_layers(net, width_factor) + + def resize_layers(net: nn.Module, width_factor: float = 1.4) -> nn.Module: """Perform a uniform layer widening, which increases the output dimension for fully-connected layers and the number of filters for convolutional layers. @@ -15,7 +19,7 @@ def resize_layers(net: nn.Module, width_factor: float = 1.4) -> nn.Module: old_layers = make_children_list(net.named_children()) (first_name, first_layer), middle, last = group_layers_by_algo(old_layers) - first_layer_output_size = first_layer.out_channels # count of the last layer's out features + first_layer_output_size = out_dim(first_layer) # count of the last layer's out features new_out_next_in = round(first_layer_output_size * width_factor) @@ -32,6 +36,9 @@ def resize_layers(net: nn.Module, width_factor: float = 1.4) -> nn.Module: new_layer = redo_layer(child, new_in=new_out_next_in, new_out=new_out) new_out_next_in = new_out network.add_module(name, new_layer) + elif type_is_nested(child): + raise NotImplementedError( + 'Currently do not support for nested structures (i.e. ResidualBlock, nn.Sequntial)') else: logging.warning(f"Encountered a non-resizable layer: {type(child)}") network.add_module(name, child) @@ -39,4 +46,8 @@ def resize_layers(net: nn.Module, width_factor: float = 1.4) -> nn.Module: last_name, last_layer = last network.add_module(last_name, redo_layer(last_layer, new_in=new_out_next_in)) - return network \ No newline at end of file + return network + +def type_is_nested(layer: nn.Module) -> bool: + """Returns true is the `layer` has children""" + return bool(make_children_list(layer)) \ No newline at end of file From 5e14afc3f620b79cca12cfdaf0f1f5168a028633 Mon Sep 17 00:00:00 2001 From: Stephen Fox Date: Wed, 1 May 2019 06:56:48 -0400 Subject: [PATCH 12/12] Add two MNIST models for testing and demos 1. simple sequential linear model for testing 2. Simple 3-layer conv net, with a brief explanation of project goals --- morph/testing/models.py | 44 ++++++++++++++++++++++++++++++++++++++--- morph/utils.py | 14 +++++++++++-- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/morph/testing/models.py b/morph/testing/models.py index 410eab7..8035b7e 100644 --- a/morph/testing/models.py +++ b/morph/testing/models.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn import torch.nn.functional as F - +from utils import Lambda class EasyMnist(nn.Module): @@ -13,11 +13,49 @@ def __init__(self): def forward(self, x_batch: torch.Tensor): """Simple ReLU-based activations through all layers of the DNN. - Simple and effectively deep neural network. No frills. + Simple and sufficiently deep neural network. No frills. """ _input = x_batch.view(-1, 784) # shape for our linear1 out1 = F.relu(self.linear1(x_batch)) out2 = F.relu(self.linear2(out1)) out3 = F.relu(self.linear3(out2)) - return out3 \ No newline at end of file + return out3 + + +# for comparison with the above +def EasyMnistSeq(): + return nn.Sequential( + Lambda(lambda x: x.reshape(-1, 784)), + nn.Linear(784, 1000), + nn.Relu(), + nn.Linear(1000, 300), + nn.Relu(), + nn.Linear(300, 10), + nn.Relu(), + ) + + +class MnistConvNet(nn.Module): + def __init__(self, interim_size=16): + """ + A simple and shallow deep CNN to show that morph will shrink this architecture, + which will inherently be wasteful on the task of classifying MNIST digits with + accuracy above 95%. + By default produces a 1x16 -> 16x16 -> 16x10 convnet + """ + super().__init__() + self.conv1 = nn.Conv2d(1, interim_size, kernel_size=3, stride=2, padding=1) + self.conv2 = nn.Conv2d(interim_size, interim_size, kernel_size=3, stride=2, padding=1) + self.conv3 = nn.Conv2d(interim_size, 10, kernel_size=3, stride=2, padding=1) + + def forward(self, xb): + xb = xb.view(-1, 1, 28, 28) # any batch_size, 1 channel, 28x28 pixels + xb = F.relu(self.conv1(xb)) + xb = F.relu(self.conv2(xb)) + xb = F.relu(self.conv3(xb)) + xb = F.avg_pool2d(xb, 4) + + # reshape the output to the second dimension of the pool size, and just fill the rest to whatever. + return xb.view(-1, xb.size(1)) + \ No newline at end of file diff --git a/morph/utils.py b/morph/utils.py index 3639185..680c4b9 100644 --- a/morph/utils.py +++ b/morph/utils.py @@ -1,5 +1,5 @@ from ._error import ValidationError - +import torch.nn as nn def check(pred: bool, message='Validation failed'): if not pred: raise ValidationError(message) @@ -9,4 +9,14 @@ def round(value: float) -> int: """Rounds a `value` up to the next integer if possible. Performs differently from the standard Python `round` """ - return int(value + .5) \ No newline at end of file + return int(value + .5) + + +# courtesy of https://pytorch.org/tutorials/beginner/nn_tutorial.html#nn-sequential +class Lambda(nn.Module): + def __init__(self, func): + super().__init__() + self.func = func + + def forward(self, x): + return self.func(x)