Source code for nupic.research.frameworks.pytorch.models.not_so_densenet

#  Numenta Platform for Intelligent Computing (NuPIC)
#  Copyright (C) 2019, Numenta, Inc.  Unless you have an agreement
#  with Numenta, Inc., for a separate license for this software code, the
#  following terms and conditions apply:
#
#  This program is free software: you can redistribute it and/or modify
#  it under the terms of the GNU Affero Public License version 3 as
#  published by the Free Software Foundation.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#  See the GNU Affero Public License for more details.
#
#  You should have received a copy of the GNU Affero Public License
#  along with this program.  If not, see http://www.gnu.org/licenses.
#
#  http://numenta.org/licenses/
#
import math

import torch.nn as nn
from torchvision.models.densenet import _DenseBlock, _Transition

from nupic.torch.modules import Flatten, KWinners2d, SparseWeights, SparseWeights2d


def _sparsify_relu(parent, relu_names, channels, percent_on, k_inference_factor,
                   boost_strength, boost_strength_factor, duty_cycle_period):
    """Replace ReLU with k-winners where percent_on < 1.0.

    :param parent: Parent Layer containing the ReLU modules to be replaced
    :param relu_names: List of ReLU module names to be replaced.
    :param channels: List of input channels for each k-winner.
    :param percent_on: List of 'percent_on' parameters for each ReLU
    :param k_inference_factor: During inference (training=False) we increase
                               `percent_on` in all sparse layers by this factor
    :param boost_strength: boost strength (0.0 implies no boosting)
    :param boost_strength_factor: Boost strength factor to use [0..1]
    :param duty_cycle_period: The period used to calculate duty cycles
    """
    for i, name in enumerate(relu_names):
        if percent_on[i] >= 1.0:
            continue

        assert isinstance(parent.__getattr__(name), nn.ReLU)
        parent.__setattr__(name, KWinners2d(
            channels=channels[i],
            percent_on=percent_on[i],
            k_inference_factor=k_inference_factor,
            boost_strength=boost_strength,
            boost_strength_factor=boost_strength_factor,
            duty_cycle_period=duty_cycle_period,
        ))


def _sparsify_cnn(parent, cnn_names, weight_sparsity):
    """Enforce weight sparsity on the given cnn modules during training.

    :param parent: Parent Layer containing the CNN modules to sparsify
    :param cnn_names: List of CNN module names to sparsify
    :param weight_sparsity: Percent of weights that are allowed to be non-zero
    """
    for i, name in enumerate(cnn_names):
        if weight_sparsity[i] >= 1.0:
            continue

        module = parent.__getattr__(name)
        parent.__setattr__(name, SparseWeights2d(module, weight_sparsity[i]))


def _sparsify_linear(parent, linear_names, weight_sparsity):
    """Enforce weight sparsity on the given linear modules during training.

    :param parent: Parent Layer containing the Linear modules to sparsify
    :param linear_names: List of Linear module names to sparsify
    :param weight_sparsity: Percent of weights that are allowed to be non-zero
    """
    for i, name in enumerate(linear_names):
        if weight_sparsity[i] >= 1.0:
            continue

        module = parent.__getattr__(name)
        parent.__setattr__(name, SparseWeights(module, weight_sparsity[i]))


[docs]class DenseNetCIFAR(nn.Sequential): """DenseNet_ CIFAR model. Based on :mod:`torchvision.models.densenet` blocks. See original `densenet.lua`_ implementation for more details. .. _DenseNet: https://arxiv.org/abs/1608.06993 .. _`densenet.lua`: https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua # noqa :param block_config: how many layers in each pooling block. If None compute from `depth` :param depth: DenseNet network depth. If None then `block_config` must be given :param growth_rate: how many filters to add each layer (`k` in paper) :param reduction: Channel compress ratio at transition layer :param num_classes: number of classification classes :param bottleneck_size: multiplicative factor for number of bottle neck layers :param avg_pool_size: Average pool size for last transition layer """ def __init__(self, block_config=None, depth=100, growth_rate=12, reduction=0.5, num_classes=10, bottleneck_size=4, avg_pool_size=8): super(DenseNetCIFAR, self).__init__() # Compute blocks from depth if block_config is None: layers = (depth - 4) // 6 block_config = (layers,) * 3 # First convolution num_features = growth_rate * 2 self.add_module("conv", nn.Conv2d(in_channels=3, out_channels=num_features, kernel_size=3, padding=1, bias=False)) for i, num_layers in enumerate(block_config): block = _DenseBlock(num_layers=num_layers, num_input_features=num_features, bn_size=bottleneck_size, growth_rate=growth_rate, drop_rate=0) self.add_module("block{0}".format(i + 1), block) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: out_features = math.floor(num_features * reduction) trans = _Transition(num_input_features=num_features, num_output_features=out_features) self.add_module("transition{0}".format(i + 1), trans) num_features = out_features # Final batch norm self.add_module("norm", nn.BatchNorm2d(num_features)) self.add_module("relu", nn.ReLU(inplace=True)) self.add_module("avg_pool", nn.AvgPool2d(kernel_size=avg_pool_size)) # classifier layer outputs = int(num_features * 16 / (avg_pool_size * avg_pool_size)) self.add_module("flatten", Flatten()) self.add_module("classifier", nn.Linear(outputs, num_classes)) # Official init from torch repo. for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight.data) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): m.bias.data.zero_()
[docs]class NoSoDenseNetCIFAR(DenseNetCIFAR): """Modified DenseNet_ architecture using sparse dense blocks and sparse transition layers. Inspired by the original `densenet.lua`_ implementation. .. _DenseNet: https://arxiv.org/abs/1608.06993 .. _`densenet.lua`: https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua # noqa :param block_config: how many layers in each pooling block. If None compute from `depth` :param depth: DenseNet network depth. If None then `block_config` must be given :param growth_rate: how many filters to add each layer (`k` in paper) :param reduction: Channel compress ratio at transition layer :param num_classes: number of classification classes :param bottleneck_size: multiplicative factor for number of bottle neck layers :param avg_pool_size: Average pool size for last transition layer :param dense_percent_on: Percent of units allowed to remain before each convolution layer of the dense layer. :param dense_sparse_weights: Percent of weights that are allowed to be non-zero in each CNN of the dense layer :param transition_percent_on: Percent of units allowed to remain the convolution layer of the transition layer :param transition_sparse_weights: Percent of weights that are allowed to be non-zero in the CNN of the transition layer :param classifier_percent_on: Percent of units allowed to remain after the last batch norm before the classifier :param classifier_sparse_weights: Percent of weights that are allowed to be non-zero in the classifier :param k_inference_factor: During inference (training=False) we increase `percent_on` in all sparse layers by this factor :param boost_strength: boost strength (0.0 implies no boosting) :param boost_strength_factor: Boost strength factor to use [0..1] :param duty_cycle_period: The period used to calculate duty cycles """ def __init__( self, block_config=None, depth=100, growth_rate=12, reduction=0.5, num_classes=10, bottleneck_size=4, avg_pool_size=4, dense_percent_on=([1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]), dense_sparse_weights=([1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]), transition_percent_on=(1.0, 1.0, 1.0), transition_sparse_weights=(1.0, 1.0, 1.0), classifier_percent_on=1.0, classifier_sparse_weights=1.0, k_inference_factor=1.0, boost_strength=1.5, boost_strength_factor=0.95, duty_cycle_period=1000, ): super(NoSoDenseNetCIFAR, self).__init__(block_config=block_config, depth=depth, growth_rate=growth_rate, reduction=reduction, num_classes=num_classes, bottleneck_size=bottleneck_size, avg_pool_size=avg_pool_size) # Sparsify relu after the last batch norm before the classifier _sparsify_relu(parent=self, relu_names=["relu"], channels=[self.norm.num_features], percent_on=[classifier_percent_on], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period) # Sparsify the classifier weights _sparsify_linear(parent=self, linear_names=["classifier"], weight_sparsity=[classifier_sparse_weights]) # Sparsify dense blocks def _is_denseblock(x): return isinstance(x, _DenseBlock) def _is_norm(x): return isinstance(x, nn.BatchNorm2d) def _is_relu(name_child): return isinstance(name_child[1], nn.ReLU) def _is_cnn(name_child): return isinstance(name_child[1], nn.Conv2d) for i, block in enumerate(filter(_is_denseblock, self.children())): for layer in block.children(): channels = [bn.num_features for bn in filter(_is_norm, layer.children())] relu_names = [x[0] for x in filter(_is_relu, layer.named_children())] _sparsify_relu(parent=layer, relu_names=relu_names, channels=channels, percent_on=dense_percent_on[i], k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period) cnn_names = [x[0] for x in filter(_is_cnn, layer.named_children())] _sparsify_cnn(parent=layer, cnn_names=cnn_names, weight_sparsity=dense_sparse_weights[i]) # Sparsify transition block def _is_transition(x): return isinstance(x, _Transition) for i, transition in enumerate(filter(_is_transition, self.children())): channels = [bn.num_features for bn in filter(_is_norm, transition.children())] relu_names = [x[0] for x in filter(_is_relu, transition.named_children())] _sparsify_relu(parent=transition, relu_names=relu_names, channels=channels, percent_on=(transition_percent_on[i],) * len(relu_names), k_inference_factor=k_inference_factor, boost_strength=boost_strength, boost_strength_factor=boost_strength_factor, duty_cycle_period=duty_cycle_period) cnn_names = [x[0] for x in filter(_is_cnn, transition.named_children())] _sparsify_cnn(parent=transition, cnn_names=cnn_names, weight_sparsity=[transition_sparse_weights[i]])
if __name__ == "__main__": nsdn = NoSoDenseNetCIFAR( classifier_percent_on=0.5, classifier_sparse_weights=0.2, transition_percent_on=(1.0, 0.1, 0.2), transition_sparse_weights=(0.1, 1.0, 0.2), dense_percent_on=([1.0, 1.0], [0.1, 1.0], [0.1, 0.2]), dense_sparse_weights=([1.0, 1.0], [0.1, 1.0], [0.1, 0.2])) print(nsdn)