# Numenta Platform for Intelligent Computing (NuPIC)
# Copyright (C) 2019, Numenta, Inc. Unless you have an agreement
# with Numenta, Inc., for a separate license for this software code, the
# following terms and conditions apply:
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero Public License version 3 as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Affero Public License for more details.
#
# You should have received a copy of the GNU Affero Public License
# along with this program. If not, see http://www.gnu.org/licenses.
#
# http://numenta.org/licenses/
#
import math
import torch.nn as nn
from torchvision.models.densenet import _DenseBlock, _Transition
from nupic.torch.modules import Flatten, KWinners2d, SparseWeights, SparseWeights2d
def _sparsify_relu(parent, relu_names, channels, percent_on, k_inference_factor,
boost_strength, boost_strength_factor, duty_cycle_period):
"""Replace ReLU with k-winners where percent_on < 1.0.
:param parent: Parent Layer containing the ReLU modules to be replaced
:param relu_names: List of ReLU module names to be replaced.
:param channels: List of input channels for each k-winner.
:param percent_on: List of 'percent_on' parameters for each ReLU
:param k_inference_factor: During inference (training=False) we increase
`percent_on` in all sparse layers by this factor
:param boost_strength: boost strength (0.0 implies no boosting)
:param boost_strength_factor: Boost strength factor to use [0..1]
:param duty_cycle_period: The period used to calculate duty cycles
"""
for i, name in enumerate(relu_names):
if percent_on[i] >= 1.0:
continue
assert isinstance(parent.__getattr__(name), nn.ReLU)
parent.__setattr__(name, KWinners2d(
channels=channels[i],
percent_on=percent_on[i],
k_inference_factor=k_inference_factor,
boost_strength=boost_strength,
boost_strength_factor=boost_strength_factor,
duty_cycle_period=duty_cycle_period,
))
def _sparsify_cnn(parent, cnn_names, weight_sparsity):
"""Enforce weight sparsity on the given cnn modules during training.
:param parent: Parent Layer containing the CNN modules to sparsify
:param cnn_names: List of CNN module names to sparsify
:param weight_sparsity: Percent of weights that are allowed to be non-zero
"""
for i, name in enumerate(cnn_names):
if weight_sparsity[i] >= 1.0:
continue
module = parent.__getattr__(name)
parent.__setattr__(name, SparseWeights2d(module, weight_sparsity[i]))
def _sparsify_linear(parent, linear_names, weight_sparsity):
"""Enforce weight sparsity on the given linear modules during training.
:param parent: Parent Layer containing the Linear modules to sparsify
:param linear_names: List of Linear module names to sparsify
:param weight_sparsity: Percent of weights that are allowed to be non-zero
"""
for i, name in enumerate(linear_names):
if weight_sparsity[i] >= 1.0:
continue
module = parent.__getattr__(name)
parent.__setattr__(name, SparseWeights(module, weight_sparsity[i]))
[docs]class DenseNetCIFAR(nn.Sequential):
"""DenseNet_ CIFAR model. Based on :mod:`torchvision.models.densenet`
blocks. See original `densenet.lua`_ implementation for more details.
.. _DenseNet: https://arxiv.org/abs/1608.06993
.. _`densenet.lua`: https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua # noqa
:param block_config: how many layers in each pooling block. If None compute from `depth`
:param depth: DenseNet network depth. If None then `block_config` must be given
:param growth_rate: how many filters to add each layer (`k` in paper)
:param reduction: Channel compress ratio at transition layer
:param num_classes: number of classification classes
:param bottleneck_size: multiplicative factor for number of bottle neck layers
:param avg_pool_size: Average pool size for last transition layer
"""
def __init__(self,
block_config=None,
depth=100,
growth_rate=12,
reduction=0.5,
num_classes=10,
bottleneck_size=4,
avg_pool_size=8):
super(DenseNetCIFAR, self).__init__()
# Compute blocks from depth
if block_config is None:
layers = (depth - 4) // 6
block_config = (layers,) * 3
# First convolution
num_features = growth_rate * 2
self.add_module("conv", nn.Conv2d(in_channels=3,
out_channels=num_features,
kernel_size=3,
padding=1,
bias=False))
for i, num_layers in enumerate(block_config):
block = _DenseBlock(num_layers=num_layers,
num_input_features=num_features,
bn_size=bottleneck_size,
growth_rate=growth_rate,
drop_rate=0)
self.add_module("block{0}".format(i + 1), block)
num_features = num_features + num_layers * growth_rate
if i != len(block_config) - 1:
out_features = math.floor(num_features * reduction)
trans = _Transition(num_input_features=num_features,
num_output_features=out_features)
self.add_module("transition{0}".format(i + 1), trans)
num_features = out_features
# Final batch norm
self.add_module("norm", nn.BatchNorm2d(num_features))
self.add_module("relu", nn.ReLU(inplace=True))
self.add_module("avg_pool", nn.AvgPool2d(kernel_size=avg_pool_size))
# classifier layer
outputs = int(num_features * 16 / (avg_pool_size * avg_pool_size))
self.add_module("flatten", Flatten())
self.add_module("classifier", nn.Linear(outputs, num_classes))
# Official init from torch repo.
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight.data)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
elif isinstance(m, nn.Linear):
m.bias.data.zero_()
[docs]class NoSoDenseNetCIFAR(DenseNetCIFAR):
"""Modified DenseNet_ architecture using sparse dense blocks and sparse
transition layers. Inspired by the original `densenet.lua`_ implementation.
.. _DenseNet: https://arxiv.org/abs/1608.06993
.. _`densenet.lua`: https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua # noqa
:param block_config: how many layers in each pooling block.
If None compute from `depth`
:param depth: DenseNet network depth. If None then `block_config` must be given
:param growth_rate: how many filters to add each layer (`k` in paper)
:param reduction: Channel compress ratio at transition layer
:param num_classes: number of classification classes
:param bottleneck_size: multiplicative factor for number of bottle neck layers
:param avg_pool_size: Average pool size for last transition layer
:param dense_percent_on: Percent of units allowed to remain before each
convolution layer of the dense layer.
:param dense_sparse_weights: Percent of weights that are allowed to be
non-zero in each CNN of the dense layer
:param transition_percent_on: Percent of units allowed to remain the
convolution layer of the transition layer
:param transition_sparse_weights: Percent of weights that are allowed to be
non-zero in the CNN of the transition layer
:param classifier_percent_on: Percent of units allowed to remain after the
last batch norm before the classifier
:param classifier_sparse_weights: Percent of weights that are allowed to be
non-zero in the classifier
:param k_inference_factor: During inference (training=False) we increase
`percent_on` in all sparse layers by this factor
:param boost_strength: boost strength (0.0 implies no boosting)
:param boost_strength_factor: Boost strength factor to use [0..1]
:param duty_cycle_period: The period used to calculate duty cycles
"""
def __init__(
self,
block_config=None,
depth=100,
growth_rate=12,
reduction=0.5,
num_classes=10,
bottleneck_size=4,
avg_pool_size=4,
dense_percent_on=([1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]),
dense_sparse_weights=([1.0, 1.0], [1.0, 1.0], [1.0, 1.0], [1.0, 1.0]),
transition_percent_on=(1.0, 1.0, 1.0),
transition_sparse_weights=(1.0, 1.0, 1.0),
classifier_percent_on=1.0,
classifier_sparse_weights=1.0,
k_inference_factor=1.0,
boost_strength=1.5,
boost_strength_factor=0.95,
duty_cycle_period=1000,
):
super(NoSoDenseNetCIFAR, self).__init__(block_config=block_config,
depth=depth,
growth_rate=growth_rate,
reduction=reduction,
num_classes=num_classes,
bottleneck_size=bottleneck_size,
avg_pool_size=avg_pool_size)
# Sparsify relu after the last batch norm before the classifier
_sparsify_relu(parent=self,
relu_names=["relu"],
channels=[self.norm.num_features],
percent_on=[classifier_percent_on],
k_inference_factor=k_inference_factor,
boost_strength=boost_strength,
boost_strength_factor=boost_strength_factor,
duty_cycle_period=duty_cycle_period)
# Sparsify the classifier weights
_sparsify_linear(parent=self, linear_names=["classifier"],
weight_sparsity=[classifier_sparse_weights])
# Sparsify dense blocks
def _is_denseblock(x):
return isinstance(x, _DenseBlock)
def _is_norm(x):
return isinstance(x, nn.BatchNorm2d)
def _is_relu(name_child):
return isinstance(name_child[1], nn.ReLU)
def _is_cnn(name_child):
return isinstance(name_child[1], nn.Conv2d)
for i, block in enumerate(filter(_is_denseblock, self.children())):
for layer in block.children():
channels = [bn.num_features for bn in
filter(_is_norm, layer.children())]
relu_names = [x[0] for x in filter(_is_relu, layer.named_children())]
_sparsify_relu(parent=layer,
relu_names=relu_names,
channels=channels,
percent_on=dense_percent_on[i],
k_inference_factor=k_inference_factor,
boost_strength=boost_strength,
boost_strength_factor=boost_strength_factor,
duty_cycle_period=duty_cycle_period)
cnn_names = [x[0] for x in filter(_is_cnn, layer.named_children())]
_sparsify_cnn(parent=layer, cnn_names=cnn_names,
weight_sparsity=dense_sparse_weights[i])
# Sparsify transition block
def _is_transition(x):
return isinstance(x, _Transition)
for i, transition in enumerate(filter(_is_transition, self.children())):
channels = [bn.num_features for bn in
filter(_is_norm, transition.children())]
relu_names = [x[0] for x in filter(_is_relu, transition.named_children())]
_sparsify_relu(parent=transition,
relu_names=relu_names,
channels=channels,
percent_on=(transition_percent_on[i],) * len(relu_names),
k_inference_factor=k_inference_factor,
boost_strength=boost_strength,
boost_strength_factor=boost_strength_factor,
duty_cycle_period=duty_cycle_period)
cnn_names = [x[0] for x in filter(_is_cnn, transition.named_children())]
_sparsify_cnn(parent=transition, cnn_names=cnn_names,
weight_sparsity=[transition_sparse_weights[i]])
if __name__ == "__main__":
nsdn = NoSoDenseNetCIFAR(
classifier_percent_on=0.5,
classifier_sparse_weights=0.2,
transition_percent_on=(1.0, 0.1, 0.2),
transition_sparse_weights=(0.1, 1.0, 0.2),
dense_percent_on=([1.0, 1.0], [0.1, 1.0], [0.1, 0.2]),
dense_sparse_weights=([1.0, 1.0], [0.1, 1.0], [0.1, 0.2]))
print(nsdn)