1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 # ==============================================================================
15 """Contains definitions for the original form of Residual Networks.
17 The 'v1' residual networks (ResNets) implemented in this module were proposed
19 [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
20 Deep Residual Learning for Image Recognition. arXiv:1512.03385
22 Other variants were introduced in:
23 [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
24 Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
26 The networks defined in this module utilize the bottleneck building block of
27 [1] with projection shortcuts only for increasing depths. They employ batch
28 normalization *after* every weight layer. This is the architecture used by
29 MSRA in the Imagenet and MSCOCO 2016 competition models ResNet-101 and
30 ResNet-152. See [2; Fig. 1a] for a comparison between the current 'v1'
31 architecture and the alternative 'v2' architecture of [2] which uses batch
32 normalization *before* every weight layer in the so-called full pre-activation
37 from tensorflow.contrib.slim.slim_nets import resnet_v1
39 ResNet-101 for image classification into 1000 classes:
41 # inputs has shape [batch, 224, 224, 3]
42 with slim.arg_scope(resnet_v1.resnet_arg_scope()):
43 net, end_points = resnet_v1.resnet_v1_101(inputs, 1000, is_training=False)
45 ResNet-101 for semantic segmentation into 21 classes:
47 # inputs has shape [batch, 513, 513, 3]
48 with slim.arg_scope(resnet_v1.resnet_arg_scope()):
49 net, end_points = resnet_v1.resnet_v1_101(inputs,
55 from __future__ import absolute_import
56 from __future__ import division
57 from __future__ import print_function
59 import tensorflow as tf
61 from libs.networks.slim_nets import resnet_utils
64 resnet_arg_scope = resnet_utils.resnet_arg_scope
65 slim = tf.contrib.slim
69 def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
70 outputs_collections=None, scope=None):
71 """Bottleneck residual unit variant with BN after convolutions.
73 This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
74 its definition. Note that we use here the bottleneck variant which has an
75 extra bottleneck layer.
77 When putting together two consecutive ResNet blocks that use this unit, one
78 should use stride = 2 in the last unit of the first block.
81 inputs: A tensor of size [batch, height, width, channels].
82 depth: The depth of the ResNet unit output.
83 depth_bottleneck: The depth of the bottleneck layers.
84 stride: The ResNet unit's stride. Determines the amount of downsampling of
85 the units output compared to its input.
86 rate: An integer, rate for atrous convolution.
87 outputs_collections: Collection to add the ResNet unit output.
88 scope: Optional variable_scope.
91 The ResNet unit's output.
93 with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
94 depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
96 shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
98 shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride,
99 activation_fn=None, scope='shortcut')
101 residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
103 residual = resnet_utils.conv2d_same(residual, depth_bottleneck, 3, stride,
104 rate=rate, scope='conv2')
105 residual = slim.conv2d(residual, depth, [1, 1], stride=1,
106 activation_fn=None, scope='conv3')
108 output = tf.nn.relu(shortcut + residual)
110 return slim.utils.collect_named_outputs(outputs_collections,
111 sc.original_name_scope,
115 def resnet_v1(inputs,
121 include_root_block=True,
122 spatial_squeeze=False,
125 """Generator for v1 ResNet models.
127 This function generates a family of ResNet v1 models. See the resnet_v1_*()
128 methods for specific model instantiations, obtained by selecting different
129 block instantiations that produce ResNets of various depths.
131 Training for image classification on Imagenet is usually done with [224, 224]
132 inputs, resulting in [7, 7] feature maps at the output of the last ResNet
133 block for the ResNets defined in [1] that have nominal stride equal to 32.
134 However, for dense prediction tasks we advise that one uses inputs with
135 spatial dimensions that are multiples of 32 plus 1, e.g., [321, 321]. In
136 this case the feature maps at the ResNet output will have spatial shape
137 [(height - 1) / output_stride + 1, (width - 1) / output_stride + 1]
138 and corners exactly aligned with the input image corners, which greatly
139 facilitates alignment of the features to the image. Using as input [225, 225]
140 images results in [8, 8] feature maps at the output of the last ResNet block.
142 For dense prediction tasks, the ResNet needs to run in fully-convolutional
143 (FCN) mode and global_pool needs to be set to False. The ResNets in [1, 2] all
144 have nominal stride equal to 32 and a good choice in FCN mode is to use
145 output_stride=16 in order to increase the density of the computed features at
146 small computational and memory overhead, cf. http://arxiv.org/abs/1606.00915.
149 inputs: A tensor of size [batch, height_in, width_in, channels].
150 blocks: A list of length equal to the number of ResNet blocks. Each element
151 is a resnet_utils.Block object describing the units in the block.
152 num_classes: Number of predicted classes for classification tasks. If None
153 we return the features before the logit layer.
154 is_training: whether is training or not.
155 global_pool: If True, we perform global average pooling before computing the
156 logits. Set to True for image classification, False for dense prediction.
157 output_stride: If None, then the output will be computed at the nominal
158 network stride. If output_stride is not None, it specifies the requested
159 ratio of input to output spatial resolution.
160 include_root_block: If True, include the initial convolution followed by
161 max-pooling, if False excludes it.
162 spatial_squeeze: if True, logits is of shape [B, C], if false logits is
163 of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
164 reuse: whether or not the network and its variables should be reused. To be
165 able to reuse 'scope' must be given.
166 scope: Optional variable_scope.
169 net: A rank-4 tensor of size [batch, height_out, width_out, channels_out].
170 If global_pool is False, then height_out and width_out are reduced by a
171 factor of output_stride compared to the respective height_in and width_in,
172 else both height_out and width_out equal one. If num_classes is None, then
173 net is the output of the last ResNet block, potentially after global
174 average pooling. If num_classes is not None, net contains the pre-softmax
176 end_points: A dictionary from components of the network to the corresponding
180 ValueError: If the target output_stride is not valid.
182 with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
183 end_points_collection = sc.name + '_end_points'
184 with slim.arg_scope([slim.conv2d, bottleneck,
185 resnet_utils.stack_blocks_dense],
186 outputs_collections=end_points_collection):
187 with slim.arg_scope([slim.batch_norm], is_training=is_training):
189 if include_root_block:
190 if output_stride is not None:
191 if output_stride % 4 != 0:
192 raise ValueError('The output_stride needs to be a multiple of 4.')
194 net = resnet_utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
195 net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
196 net = resnet_utils.stack_blocks_dense(net, blocks, output_stride)
198 # Global average pooling.
199 net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
200 # yjr_feature = tf.squeeze(net, [0, 1, 2])
201 if num_classes is not None:
202 net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
203 normalizer_fn=None, scope='logits')
205 logits = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
208 # Convert end_points_collection into a dictionary of end_points.
209 end_points = slim.utils.convert_collection_to_dict(
210 end_points_collection)
211 if num_classes is not None:
212 end_points['predictions'] = slim.softmax(logits, scope='predictions')
215 # end_points['yjr_feature'] = yjr_feature
216 return logits, end_points
217 resnet_v1.default_image_size = 224
220 def resnet_v1_block(scope, base_depth, num_units, stride):
221 """Helper function for creating a resnet_v1 bottleneck block.
224 scope: The scope of the block.
225 base_depth: The depth of the bottleneck layer for each unit.
226 num_units: The number of units in the block.
227 stride: The stride of the block, implemented as a stride in the last unit.
228 All other units have stride=1.
231 A resnet_v1 bottleneck block.
233 return resnet_utils.Block(scope, bottleneck, [{
234 'depth': base_depth * 4,
235 'depth_bottleneck': base_depth,
237 }] * (num_units - 1) + [{
238 'depth': base_depth * 4,
239 'depth_bottleneck': base_depth,
244 def resnet_v1_50(inputs,
249 spatial_squeeze=True,
251 scope='resnet_v1_50'):
252 """ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
254 resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
255 resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
256 resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
257 resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
259 return resnet_v1(inputs, blocks, num_classes, is_training,
260 global_pool=global_pool, output_stride=output_stride,
261 include_root_block=True, spatial_squeeze=spatial_squeeze,
262 reuse=reuse, scope=scope)
263 resnet_v1_50.default_image_size = resnet_v1.default_image_size
266 def resnet_v1_101(inputs,
271 spatial_squeeze=True,
273 scope='resnet_v1_101'):
274 """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
276 resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
277 resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
278 resnet_v1_block('block3', base_depth=256, num_units=23, stride=2),
279 resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
281 return resnet_v1(inputs, blocks, num_classes, is_training,
282 global_pool=global_pool, output_stride=output_stride,
283 include_root_block=True, spatial_squeeze=spatial_squeeze,
284 reuse=reuse, scope=scope)
285 resnet_v1_101.default_image_size = resnet_v1.default_image_size
288 def resnet_v1_152(inputs,
293 spatial_squeeze=True,
295 scope='resnet_v1_152'):
296 """ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
298 resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
299 resnet_v1_block('block2', base_depth=128, num_units=8, stride=2),
300 resnet_v1_block('block3', base_depth=256, num_units=36, stride=2),
301 resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
303 return resnet_v1(inputs, blocks, num_classes, is_training,
304 global_pool=global_pool, output_stride=output_stride,
305 include_root_block=True, spatial_squeeze=spatial_squeeze,
306 reuse=reuse, scope=scope)
307 resnet_v1_152.default_image_size = resnet_v1.default_image_size
310 def resnet_v1_200(inputs,
315 spatial_squeeze=True,
317 scope='resnet_v1_200'):
318 """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
320 resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
321 resnet_v1_block('block2', base_depth=128, num_units=24, stride=2),
322 resnet_v1_block('block3', base_depth=256, num_units=36, stride=2),
323 resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
325 return resnet_v1(inputs, blocks, num_classes, is_training,
326 global_pool=global_pool, output_stride=output_stride,
327 include_root_block=True, spatial_squeeze=spatial_squeeze,
328 reuse=reuse, scope=scope)
329 resnet_v1_200.default_image_size = resnet_v1.default_image_size