X-Git-Url: https://gerrit.akraino.org/r/gitweb?a=blobdiff_plain;f=example-apps%2FPDD%2Fpcb-defect-detection%2Flibs%2Fnetworks%2Fslim_nets%2Fresnet_v2.py;fp=example-apps%2FPDD%2Fpcb-defect-detection%2Flibs%2Fnetworks%2Fslim_nets%2Fresnet_v2.py;h=7617701574dae04460832945d3ffa25872111f4e;hb=a785567fb9acfc68536767d20f60ba917ae85aa1;hp=0000000000000000000000000000000000000000;hpb=94a133e696b9b2a7f73544462c2714986fa7ab4a;p=ealt-edge.git diff --git a/example-apps/PDD/pcb-defect-detection/libs/networks/slim_nets/resnet_v2.py b/example-apps/PDD/pcb-defect-detection/libs/networks/slim_nets/resnet_v2.py new file mode 100755 index 0000000..7617701 --- /dev/null +++ b/example-apps/PDD/pcb-defect-detection/libs/networks/slim_nets/resnet_v2.py @@ -0,0 +1,333 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains definitions for the preactivation form of Residual Networks. + +Residual networks (ResNets) were originally proposed in: +[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Deep Residual Learning for Image Recognition. arXiv:1512.03385 + +The full preactivation 'v2' ResNet variant implemented in this module was +introduced by: +[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun + Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 + +The key difference of the full preactivation 'v2' variant compared to the +'v1' variant in [1] is the use of batch normalization before every weight layer. + +Typical use: + + from tensorflow.contrib.slim.slim_nets import resnet_v2 + +ResNet-101 for image classification into 1000 classes: + + # inputs has shape [batch, 224, 224, 3] + with slim.arg_scope(resnet_v2.resnet_arg_scope()): + net, end_points = resnet_v2.resnet_v2_101(inputs, 1000, is_training=False) + +ResNet-101 for semantic segmentation into 21 classes: + + # inputs has shape [batch, 513, 513, 3] + with slim.arg_scope(resnet_v2.resnet_arg_scope(is_training)): + net, end_points = resnet_v2.resnet_v2_101(inputs, + 21, + is_training=False, + global_pool=False, + output_stride=16) +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import tensorflow as tf + +from nets import resnet_utils + +slim = tf.contrib.slim +resnet_arg_scope = resnet_utils.resnet_arg_scope + + +@slim.add_arg_scope +def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, + outputs_collections=None, scope=None): + """Bottleneck residual unit variant with BN before convolutions. + + This is the full preactivation residual unit variant proposed in [2]. See + Fig. 1(b) of [2] for its definition. Note that we use here the bottleneck + variant which has an extra bottleneck layer. + + When putting together two consecutive ResNet blocks that use this unit, one + should use stride = 2 in the last unit of the first block. + + Args: + inputs: A tensor of size [batch, height, width, channels]. + depth: The depth of the ResNet unit output. + depth_bottleneck: The depth of the bottleneck layers. + stride: The ResNet unit's stride. Determines the amount of downsampling of + the units output compared to its input. + rate: An integer, rate for atrous convolution. + outputs_collections: Collection to add the ResNet unit output. + scope: Optional variable_scope. + + Returns: + The ResNet unit's output. + """ + with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: + depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) + preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') + if depth == depth_in: + shortcut = resnet_utils.subsample(inputs, stride, 'shortcut') + else: + shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, + normalizer_fn=None, activation_fn=None, + scope='shortcut') + + residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, + scope='conv1') + residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride, + rate=rate, scope='conv2') + residual = slim.conv2d(residual, depth, [1, 1], stride=1, + normalizer_fn=None, activation_fn=None, + scope='conv3') + + output = shortcut + residual + + return slim.utils.collect_named_outputs(outputs_collections, + sc.original_name_scope, + output) + + +def resnet_v2(inputs, + blocks, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + include_root_block=True, + spatial_squeeze=False, + reuse=None, + scope=None): + """Generator for v2 (preactivation) ResNet models. + + This function generates a family of ResNet v2 models. See the resnet_v2_*() + methods for specific model instantiations, obtained by selecting different + block instantiations that produce ResNets of various depths. + + Training for image classification on Imagenet is usually done with [224, 224] + inputs, resulting in [7, 7] feature maps at the output of the last ResNet + block for the ResNets defined in [1] that have nominal stride equal to 32. + However, for dense prediction tasks we advise that one uses inputs with + spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In + this case the feature maps at the ResNet output will have spatial shape + [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1] + and corners exactly aligned with the input image corners, which greatly + facilitates alignment of the features to the image. Using as input [225, 225] + images results in [8, 8] feature maps at the output of the last ResNet block. + + For dense prediction tasks, the ResNet needs to run in fully-convolutional + (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all + have nominal stride equal to 32 and a good choice in FCN mode is to use + output_stride=16 in order to increase the density of the computed features at + small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + blocks: A list of length equal to the number of ResNet blocks. Each element + is a resnet_utils.Block object describing the units in the block. + num_classes: Number of predicted classes for classification tasks. If None + we return the features before the logit layer. + is_training: whether is training or not. + global_pool: If True, we perform global average pooling before computing the + logits. Set to True for image classification, False for dense prediction. + output_stride: If None, then the output will be computed at the nominal + network stride. If output_stride is not None, it specifies the requested + ratio of input to output spatial resolution. + include_root_block: If True, include the initial convolution followed by + max-pooling, if False excludes it. If excluded, `inputs` should be the + results of an activation-less convolution. + spatial_squeeze: if True, logits is of shape [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + + Returns: + net: A rank-4 tensor of size [batch, height_out, width_out, channels_out]. + If global_pool is False, then height_out and width_out are reduced by a + factor of output_stride compared to the respective height_in and width_in, + else both height_out and width_out equal one. If num_classes is None, then + net is the output of the last ResNet block, potentially after global + average pooling. If num_classes is not None, net contains the pre-softmax + activations. + end_points: A dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: If the target output_stride is not valid. + """ + with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: + end_points_collection = sc.name + '_end_points' + with slim.arg_scope([slim.conv2d, bottleneck, + resnet_utils.stack_blocks_dense], + outputs_collections=end_points_collection): + with slim.arg_scope([slim.batch_norm], is_training=is_training): + net = inputs + if include_root_block: + if output_stride is not None: + if output_stride % 4 != 0: + raise ValueError('The output_stride needs to be a multiple of 4.') + output_stride /= 4 + # We do not include batch normalization or activation functions in + # conv1 because the first ResNet unit will perform these. Cf. + # Appendix of [2]. + with slim.arg_scope([slim.conv2d], + activation_fn=None, normalizer_fn=None): + net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') + net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') + net = resnet_utils.stack_blocks_dense(net, blocks, output_stride) + # This is needed because the pre-activation variant does not have batch + # normalization or activation functions in the residual unit output. See + # Appendix of [2]. + net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') + if global_pool: + # Global average pooling. + net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) + if num_classes is not None: + net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, scope='logits') + if spatial_squeeze: + logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze') + else: + logits = net + # Convert end_points_collection into a dictionary of end_points. + end_points = slim.utils.convert_collection_to_dict( + end_points_collection) + if num_classes is not None: + end_points['predictions'] = slim.softmax(logits, scope='predictions') + return logits, end_points +resnet_v2.default_image_size = 224 + + +def resnet_v2_block(scope, base_depth, num_units, stride): + """Helper function for creating a resnet_v2 bottleneck block. + + Args: + scope: The scope of the block. + base_depth: The depth of the bottleneck layer for each unit. + num_units: The number of units in the block. + stride: The stride of the block, implemented as a stride in the last unit. + All other units have stride=1. + + Returns: + A resnet_v2 bottleneck block. + """ + return resnet_utils.Block(scope, bottleneck, [{ + 'depth': base_depth * 4, + 'depth_bottleneck': base_depth, + 'stride': 1 + }] * (num_units - 1) + [{ + 'depth': base_depth * 4, + 'depth_bottleneck': base_depth, + 'stride': stride + }]) +resnet_v2.default_image_size = 224 + + +def resnet_v2_50(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=False, + reuse=None, + scope='resnet_v2_50'): + """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=6, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_50.default_image_size = resnet_v2.default_image_size + + +def resnet_v2_101(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=False, + reuse=None, + scope='resnet_v2_101'): + """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=23, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_101.default_image_size = resnet_v2.default_image_size + + +def resnet_v2_152(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=False, + reuse=None, + scope='resnet_v2_152'): + """ResNet-152 model of [1]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=8, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=36, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_152.default_image_size = resnet_v2.default_image_size + + +def resnet_v2_200(inputs, + num_classes=None, + is_training=True, + global_pool=True, + output_stride=None, + spatial_squeeze=False, + reuse=None, + scope='resnet_v2_200'): + """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" + blocks = [ + resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), + resnet_v2_block('block2', base_depth=128, num_units=24, stride=2), + resnet_v2_block('block3', base_depth=256, num_units=36, stride=2), + resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), + ] + return resnet_v2(inputs, blocks, num_classes, is_training=is_training, + global_pool=global_pool, output_stride=output_stride, + include_root_block=True, spatial_squeeze=spatial_squeeze, + reuse=reuse, scope=scope) +resnet_v2_200.default_image_size = resnet_v2.default_image_size