pcb defect detetcion application
[ealt-edge.git] / example-apps / PDD / pcb-defect-detection / libs / networks / mobilenet / conv_blocks.py
diff --git a/example-apps/PDD/pcb-defect-detection/libs/networks/mobilenet/conv_blocks.py b/example-apps/PDD/pcb-defect-detection/libs/networks/mobilenet/conv_blocks.py
new file mode 100755 (executable)
index 0000000..029bd11
--- /dev/null
@@ -0,0 +1,352 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Convolution blocks for mobilenet."""
+import contextlib
+import functools
+
+import tensorflow as tf
+
+slim = tf.contrib.slim
+
+
+def _fixed_padding(inputs, kernel_size, rate=1):
+  """Pads the input along the spatial dimensions independently of input size.
+
+  Pads the input such that if it was used in a convolution with 'VALID' padding,
+  the output would have the same dimensions as if the unpadded input was used
+  in a convolution with 'SAME' padding.
+
+  Args:
+    inputs: A tensor of size [batch, height_in, width_in, channels].
+    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
+    rate: An integer, rate for atrous convolution.
+
+  Returns:
+    output: A tensor of size [batch, height_out, width_out, channels] with the
+      input, either intact (if kernel_size == 1) or padded (if kernel_size > 1).
+  """
+  kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1),
+                           kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)]
+  pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1]
+  pad_beg = [pad_total[0] // 2, pad_total[1] // 2]
+  pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]]
+  padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]],
+                                  [pad_beg[1], pad_end[1]], [0, 0]])
+  return padded_inputs
+
+
+def _make_divisible(v, divisor, min_value=None):
+  if min_value is None:
+    min_value = divisor
+  new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+  # Make sure that round down does not go down by more than 10%.
+  if new_v < 0.9 * v:
+    new_v += divisor
+  return new_v
+
+
+def _split_divisible(num, num_ways, divisible_by=8):
+  """Evenly splits num, num_ways so each piece is a multiple of divisible_by."""
+  assert num % divisible_by == 0
+  assert num / num_ways >= divisible_by
+  # Note: want to round down, we adjust each split to match the total.
+  base = num // num_ways // divisible_by * divisible_by
+  result = []
+  accumulated = 0
+  for i in range(num_ways):
+    r = base
+    while accumulated + r < num * (i + 1) / num_ways:
+      r += divisible_by
+    result.append(r)
+    accumulated += r
+  assert accumulated == num
+  return result
+
+
+@contextlib.contextmanager
+def _v1_compatible_scope_naming(scope):
+  if scope is None:  # Create uniqified separable blocks.
+    with tf.variable_scope(None, default_name='separable') as s, \
+         tf.name_scope(s.original_name_scope):
+      yield ''
+  else:
+    # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts.
+    # which provide numbered scopes.
+    scope += '_'
+    yield scope
+
+
+@slim.add_arg_scope
+def split_separable_conv2d(input_tensor,
+                           num_outputs,
+                           scope=None,
+                           normalizer_fn=None,
+                           stride=1,
+                           rate=1,
+                           endpoints=None,
+                           use_explicit_padding=False):
+  """Separable mobilenet V1 style convolution.
+
+  Depthwise convolution, with default non-linearity,
+  followed by 1x1 depthwise convolution.  This is similar to
+  slim.separable_conv2d, but differs in tha it applies batch
+  normalization and non-linearity to depthwise. This  matches
+  the basic building of Mobilenet Paper
+  (https://arxiv.org/abs/1704.04861)
+
+  Args:
+    input_tensor: input
+    num_outputs: number of outputs
+    scope: optional name of the scope. Note if provided it will use
+    scope_depthwise for deptwhise, and scope_pointwise for pointwise.
+    normalizer_fn: which normalizer function to use for depthwise/pointwise
+    stride: stride
+    rate: output rate (also known as dilation rate)
+    endpoints: optional, if provided, will export additional tensors to it.
+    use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+      inputs so that the output dimensions are the same as if 'SAME' padding
+      were used.
+
+  Returns:
+    output tesnor
+  """
+
+  with _v1_compatible_scope_naming(scope) as scope:
+    dw_scope = scope + 'depthwise'
+    endpoints = endpoints if endpoints is not None else {}
+    kernel_size = [3, 3]
+    padding = 'SAME'
+    if use_explicit_padding:
+      padding = 'VALID'
+      input_tensor = _fixed_padding(input_tensor, kernel_size, rate)
+    net = slim.separable_conv2d(
+        input_tensor,
+        None,
+        kernel_size,
+        depth_multiplier=1,
+        stride=stride,
+        rate=rate,
+        normalizer_fn=normalizer_fn,
+        padding=padding,
+        scope=dw_scope)
+
+    endpoints[dw_scope] = net
+
+    pw_scope = scope + 'pointwise'
+    net = slim.conv2d(
+        net,
+        num_outputs, [1, 1],
+        stride=1,
+        normalizer_fn=normalizer_fn,
+        scope=pw_scope)
+    endpoints[pw_scope] = net
+  return net
+
+
+def expand_input_by_factor(n, divisible_by=8):
+  return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by)
+
+
+@slim.add_arg_scope
+def expanded_conv(input_tensor,
+                  num_outputs,
+                  expansion_size=expand_input_by_factor(6),
+                  stride=1,
+                  rate=1,
+                  kernel_size=(3, 3),
+                  residual=True,
+                  normalizer_fn=None,
+                  split_projection=1,
+                  split_expansion=1,
+                  expansion_transform=None,
+                  depthwise_location='expansion',
+                  depthwise_channel_multiplier=1,
+                  endpoints=None,
+                  use_explicit_padding=False,
+                  scope=None):
+  """Depthwise Convolution Block with expansion.
+
+  Builds a composite convolution that has the following structure
+  expansion (1x1) -> depthwise (kernel_size) -> projection (1x1)
+
+  Args:
+    input_tensor: input
+    num_outputs: number of outputs in the final layer.
+    expansion_size: the size of expansion, could be a constant or a callable.
+      If latter it will be provided 'num_inputs' as an input. For forward
+      compatibility it should accept arbitrary keyword arguments.
+      Default will expand the input by factor of 6.
+    stride: depthwise stride
+    rate: depthwise rate
+    kernel_size: depthwise kernel
+    residual: whether to include residual connection between input
+      and output.
+    normalizer_fn: batchnorm or otherwise
+    split_projection: how many ways to split projection operator
+      (that is conv expansion->bottleneck)
+    split_expansion: how many ways to split expansion op
+      (that is conv bottleneck->expansion) ops will keep depth divisible
+      by this value.
+    expansion_transform: Optional function that takes expansion
+      as a single input and returns output.
+    depthwise_location: where to put depthwise covnvolutions supported
+      values None, 'input', 'output', 'expansion'
+    depthwise_channel_multiplier: depthwise channel multiplier:
+    each input will replicated (with different filters)
+    that many times. So if input had c channels,
+    output will have c x depthwise_channel_multpilier.
+    endpoints: An optional dictionary into which intermediate endpoints are
+      placed. The keys "expansion_output", "depthwise_output",
+      "projection_output" and "expansion_transform" are always populated, even
+      if the corresponding functions are not invoked.
+    use_explicit_padding: Use 'VALID' padding for convolutions, but prepad
+      inputs so that the output dimensions are the same as if 'SAME' padding
+      were used.
+    scope: optional scope.
+
+  Returns:
+    Tensor of depth num_outputs
+
+  Raises:
+    TypeError: on inval
+  """
+  with tf.variable_scope(scope, default_name='expanded_conv') as s, \
+       tf.name_scope(s.original_name_scope):
+    prev_depth = input_tensor.get_shape().as_list()[3]
+    if  depthwise_location not in [None, 'input', 'output', 'expansion']:
+      raise TypeError('%r is unknown value for depthwise_location' %
+                      depthwise_location)
+    padding = 'SAME'
+    if use_explicit_padding:
+      padding = 'VALID'
+    depthwise_func = functools.partial(
+        slim.separable_conv2d,
+        num_outputs=None,
+        kernel_size=kernel_size,
+        depth_multiplier=depthwise_channel_multiplier,
+        stride=stride,
+        rate=rate,
+        normalizer_fn=normalizer_fn,
+        padding=padding,
+        scope='depthwise')
+    # b1 -> b2 * r -> b2
+    #   i -> (o * r) (bottleneck) -> o
+    input_tensor = tf.identity(input_tensor, 'input')
+    net = input_tensor
+
+    if depthwise_location == 'input':
+      if use_explicit_padding:
+        net = _fixed_padding(net, kernel_size, rate)
+      net = depthwise_func(net, activation_fn=None)
+
+    if callable(expansion_size):
+      inner_size = expansion_size(num_inputs=prev_depth)
+    else:
+      inner_size = expansion_size
+
+    if inner_size > net.shape[3]:
+      net = split_conv(
+          net,
+          inner_size,
+          num_ways=split_expansion,
+          scope='expand',
+          stride=1,
+          normalizer_fn=normalizer_fn)
+      net = tf.identity(net, 'expansion_output')
+    if endpoints is not None:
+      endpoints['expansion_output'] = net
+
+    if depthwise_location == 'expansion':
+      if use_explicit_padding:
+        net = _fixed_padding(net, kernel_size, rate)
+      net = depthwise_func(net)
+
+    net = tf.identity(net, name='depthwise_output')
+    if endpoints is not None:
+      endpoints['depthwise_output'] = net
+    if expansion_transform:
+      net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor)
+    # Note in contrast with expansion, we always have
+    # projection to produce the desired output size.
+    net = split_conv(
+        net,
+        num_outputs,
+        num_ways=split_projection,
+        stride=1,
+        scope='project',
+        normalizer_fn=normalizer_fn,
+        activation_fn=tf.identity)
+    if endpoints is not None:
+      endpoints['projection_output'] = net
+    if depthwise_location == 'output':
+      if use_explicit_padding:
+        net = _fixed_padding(net, kernel_size, rate)
+      net = depthwise_func(net, activation_fn=None)
+
+    if callable(residual):  # custom residual
+      net = residual(input_tensor=input_tensor, output_tensor=net)
+    elif (residual and
+          # stride check enforces that we don't add residuals when spatial
+          # dimensions are None
+          stride == 1 and
+          # Depth matches
+          net.get_shape().as_list()[3] ==
+          input_tensor.get_shape().as_list()[3]):
+      net += input_tensor
+    return tf.identity(net, name='output')
+
+
+def split_conv(input_tensor,
+               num_outputs,
+               num_ways,
+               scope,
+               divisible_by=8,
+               **kwargs):
+  """Creates a split convolution.
+
+  Split convolution splits the input and output into
+  'num_blocks' blocks of approximately the same size each,
+  and only connects $i$-th input to $i$ output.
+
+  Args:
+    input_tensor: input tensor
+    num_outputs: number of output filters
+    num_ways: num blocks to split by.
+    scope: scope for all the operators.
+    divisible_by: make sure that every part is divisiable by this.
+    **kwargs: will be passed directly into conv2d operator
+  Returns:
+    tensor
+  """
+  b = input_tensor.get_shape().as_list()[3]
+
+  if num_ways == 1 or min(b // num_ways,
+                          num_outputs // num_ways) < divisible_by:
+    # Don't do any splitting if we end up with less than 8 filters
+    # on either side.
+    return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs)
+
+  outs = []
+  input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by)
+  output_splits = _split_divisible(
+      num_outputs, num_ways, divisible_by=divisible_by)
+  inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope)
+  base = scope
+  for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)):
+    scope = base + '_part_%d' % (i,)
+    n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs)
+    n = tf.identity(n, scope + '_output')
+    outs.append(n)
+  return tf.concat(outs, 3, name=scope + '_concat')