conv_blocks.py

   1 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 #     http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14 # ==============================================================================
  15 """Convolution blocks for mobilenet."""
  16 import contextlib
  17 import functools
  18
  19 import tensorflow as tf
  20
  21 slim = tf.contrib.slim
  22
  23
  24 def _fixed_padding(inputs, kernel_size, rate=1):
  25   """Pads the input along the spatial dimensions independently of input size.
  26
  27   Pads the input such that if it was used in a convolution with 'VALID' padding,
  28   the output would have the same dimensions as if the unpadded input was used
  29   in a convolution with 'SAME' padding.
  30
  31   Args:
  32     inputs: A tensor of size [batch, height_in, width_in, channels].
  33     kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
  34     rate: An integer, rate for atrous convolution.
  35
  36   Returns:
  37     output: A tensor of size [batch, height_out, width_out, channels] with the
  38       input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
  39   """
  40   kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
  41                            kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
  42   pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
  43   pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
  44   pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
  45   padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
  46                                   [pad_beg[1], pad_end[1]], [0, 0]])
  47   return padded_inputs
  48
  49
  50 def _make_divisible(v, divisor, min_value=None):
  51   if min_value is None:
  52     min_value = divisor
  53   new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
  54   # Make sure that round down does not go down by more than 10%.
  55   if new_v < 0.9 * v:
  56     new_v += divisor
  57   return new_v
  58
  59
  60 def _split_divisible(num, num_ways, divisible_by=8):
  61   """Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
  62   assert num % divisible_by == 0
  63   assert num / num_ways >= divisible_by
  64   # Note: want to round down, we adjust each split to match the total.
  65   base = num // num_ways // divisible_by * divisible_by
  66   result = []
  67   accumulated = 0
  68   for i in range(num_ways):
  69     r = base
  70     while accumulated + r < num * (i + 1) / num_ways:
  71       r += divisible_by
  72     result.append(r)
  73     accumulated += r
  74   assert accumulated == num
  75   return result
  76
  77
  78 @contextlib.contextmanager
  79 def _v1_compatible_scope_naming(scope):
  80   if scope is None:  # Create uniqified separable blocks.
  81     with tf.variable_scope(None, default_name='separable') as s, \
  82          tf.name_scope(s.original_name_scope):
  83       yield ''
  84   else:
  85     # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
  86     # which provide numbered scopes.
  87     scope += '_'
  88     yield scope
  89
  90
  91 @slim.add_arg_scope
  92 def split_separable_conv2d(input_tensor,
  93                            num_outputs,
  94                            scope=None,
  95                            normalizer_fn=None,
  96                            stride=1,
  97                            rate=1,
  98                            endpoints=None,
  99                            use_explicit_padding=False):
 100   """Separable mobilenet V1 style convolution.
 101
 102   Depthwise convolution, with default non-linearity,
 103   followed by 1x1 depthwise convolution.  This is similar to
 104   slim.separable_conv2d, but differs in tha it applies batch
 105   normalization and non-linearity to depthwise. This  matches
 106   the basic building of Mobilenet Paper
 107   (https://arxiv.org/abs/1704.04861)
 108
 109   Args:
 110     input_tensor: input
 111     num_outputs: number of outputs
 112     scope: optional name of the scope. Note if provided it will use
 113     scope_depthwise for deptwhise, and scope_pointwise for pointwise.
 114     normalizer_fn: which normalizer function to use for depthwise/pointwise
 115     stride: stride
 116     rate: output rate (also known as dilation rate)
 117     endpoints: optional, if provided, will export additional tensors to it.
 118     use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
 119       inputs so that the output dimensions are the same as if 'SAME' padding
 120       were used.
 121
 122   Returns:
 123     output tesnor
 124   """
 125
 126   with _v1_compatible_scope_naming(scope) as scope:
 127     dw_scope = scope + 'depthwise'
 128     endpoints = endpoints if endpoints is not None else {}
 129     kernel_size = [3, 3]
 130     padding = 'SAME'
 131     if use_explicit_padding:
 132       padding = 'VALID'
 133       input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
 134     net = slim.separable_conv2d(
 135         input_tensor,
 136         None,
 137         kernel_size,
 138         depth_multiplier=1,
 139         stride=stride,
 140         rate=rate,
 141         normalizer_fn=normalizer_fn,
 142         padding=padding,
 143         scope=dw_scope)
 144
 145     endpoints[dw_scope] = net
 146
 147     pw_scope = scope + 'pointwise'
 148     net = slim.conv2d(
 149         net,
 150         num_outputs, [1, 1],
 151         stride=1,
 152         normalizer_fn=normalizer_fn,
 153         scope=pw_scope)
 154     endpoints[pw_scope] = net
 155   return net
 156
 157
 158 def expand_input_by_factor(n, divisible_by=8):
 159   return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
 160
 161
 162 @slim.add_arg_scope
 163 def expanded_conv(input_tensor,
 164                   num_outputs,
 165                   expansion_size=expand_input_by_factor(6),
 166                   stride=1,
 167                   rate=1,
 168                   kernel_size=(3, 3),
 169                   residual=True,
 170                   normalizer_fn=None,
 171                   split_projection=1,
 172                   split_expansion=1,
 173                   expansion_transform=None,
 174                   depthwise_location='expansion',
 175                   depthwise_channel_multiplier=1,
 176                   endpoints=None,
 177                   use_explicit_padding=False,
 178                   scope=None):
 179   """Depthwise Convolution Block with expansion.
 180
 181   Builds a composite convolution that has the following structure
 182   expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
 183
 184   Args:
 185     input_tensor: input
 186     num_outputs: number of outputs in the final layer.
 187     expansion_size: the size of expansion, could be a constant or a callable.
 188       If latter it will be provided 'num_inputs' as an input. For forward
 189       compatibility it should accept arbitrary keyword arguments.
 190       Default will expand the input by factor of 6.
 191     stride: depthwise stride
 192     rate: depthwise rate
 193     kernel_size: depthwise kernel
 194     residual: whether to include residual connection between input
 195       and output.
 196     normalizer_fn: batchnorm or otherwise
 197     split_projection: how many ways to split projection operator
 198       (that is conv expansion->bottleneck)
 199     split_expansion: how many ways to split expansion op
 200       (that is conv bottleneck->expansion) ops will keep depth divisible
 201       by this value.
 202     expansion_transform: Optional function that takes expansion
 203       as a single input and returns output.
 204     depthwise_location: where to put depthwise covnvolutions supported
 205       values None, 'input', 'output', 'expansion'
 206     depthwise_channel_multiplier: depthwise channel multiplier:
 207     each input will replicated (with different filters)
 208     that many times. So if input had c channels,
 209     output will have c x depthwise_channel_multpilier.
 210     endpoints: An optional dictionary into which intermediate endpoints are
 211       placed. The keys "expansion_output", "depthwise_output",
 212       "projection_output" and "expansion_transform" are always populated, even
 213       if the corresponding functions are not invoked.
 214     use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
 215       inputs so that the output dimensions are the same as if 'SAME' padding
 216       were used.
 217     scope: optional scope.
 218
 219   Returns:
 220     Tensor of depth num_outputs
 221
 222   Raises:
 223     TypeError: on inval
 224   """
 225   with tf.variable_scope(scope, default_name='expanded_conv') as s, \
 226        tf.name_scope(s.original_name_scope):
 227     prev_depth = input_tensor.get_shape().as_list()[3]
 228     if  depthwise_location not in [None, 'input', 'output', 'expansion']:
 229       raise TypeError('%r is unknown value for depthwise_location' %
 230                       depthwise_location)
 231     padding = 'SAME'
 232     if use_explicit_padding:
 233       padding = 'VALID'
 234     depthwise_func = functools.partial(
 235         slim.separable_conv2d,
 236         num_outputs=None,
 237         kernel_size=kernel_size,
 238         depth_multiplier=depthwise_channel_multiplier,
 239         stride=stride,
 240         rate=rate,
 241         normalizer_fn=normalizer_fn,
 242         padding=padding,
 243         scope='depthwise')
 244     # b1 -> b2 * r -> b2
 245     #   i -> (o * r) (bottleneck) -> o
 246     input_tensor = tf.identity(input_tensor, 'input')
 247     net = input_tensor
 248
 249     if depthwise_location == 'input':
 250       if use_explicit_padding:
 251         net = _fixed_padding(net, kernel_size, rate)
 252       net = depthwise_func(net, activation_fn=None)
 253
 254     if callable(expansion_size):
 255       inner_size = expansion_size(num_inputs=prev_depth)
 256     else:
 257       inner_size = expansion_size
 258
 259     if inner_size > net.shape[3]:
 260       net = split_conv(
 261           net,
 262           inner_size,
 263           num_ways=split_expansion,
 264           scope='expand',
 265           stride=1,
 266           normalizer_fn=normalizer_fn)
 267       net = tf.identity(net, 'expansion_output')
 268     if endpoints is not None:
 269       endpoints['expansion_output'] = net
 270
 271     if depthwise_location == 'expansion':
 272       if use_explicit_padding:
 273         net = _fixed_padding(net, kernel_size, rate)
 274       net = depthwise_func(net)
 275
 276     net = tf.identity(net, name='depthwise_output')
 277     if endpoints is not None:
 278       endpoints['depthwise_output'] = net
 279     if expansion_transform:
 280       net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
 281     # Note in contrast with expansion, we always have
 282     # projection to produce the desired output size.
 283     net = split_conv(
 284         net,
 285         num_outputs,
 286         num_ways=split_projection,
 287         stride=1,
 288         scope='project',
 289         normalizer_fn=normalizer_fn,
 290         activation_fn=tf.identity)
 291     if endpoints is not None:
 292       endpoints['projection_output'] = net
 293     if depthwise_location == 'output':
 294       if use_explicit_padding:
 295         net = _fixed_padding(net, kernel_size, rate)
 296       net = depthwise_func(net, activation_fn=None)
 297
 298     if callable(residual):  # custom residual
 299       net = residual(input_tensor=input_tensor, output_tensor=net)
 300     elif (residual and
 301           # stride check enforces that we don't add residuals when spatial
 302           # dimensions are None
 303           stride == 1 and
 304           # Depth matches
 305           net.get_shape().as_list()[3] ==
 306           input_tensor.get_shape().as_list()[3]):
 307       net += input_tensor
 308     return tf.identity(net, name='output')
 309
 310
 311 def split_conv(input_tensor,
 312                num_outputs,
 313                num_ways,
 314                scope,
 315                divisible_by=8,
 316                **kwargs):
 317   """Creates a split convolution.
 318
 319   Split convolution splits the input and output into
 320   'num_blocks' blocks of approximately the same size each,
 321   and only connects $i$-th input to $i$ output.
 322
 323   Args:
 324     input_tensor: input tensor
 325     num_outputs: number of output filters
 326     num_ways: num blocks to split by.
 327     scope: scope for all the operators.
 328     divisible_by: make sure that every part is divisiable by this.
 329     **kwargs: will be passed directly into conv2d operator
 330   Returns:
 331     tensor
 332   """
 333   b = input_tensor.get_shape().as_list()[3]
 334
 335   if num_ways == 1 or min(b // num_ways,
 336                           num_outputs // num_ways) < divisible_by:
 337     # Don't do any splitting if we end up with less than 8 filters
 338     # on either side.
 339     return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
 340
 341   outs = []
 342   input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
 343   output_splits = _split_divisible(
 344       num_outputs, num_ways, divisible_by=divisible_by)
 345   inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
 346   base = scope
 347   for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
 348     scope = base + '_part_%d' % (i,)
 349     n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
 350     n = tf.identity(n, scope + '_output')
 351     outs.append(n)
 352   return tf.concat(outs, 3, name=scope + '_concat')