X-Git-Url: https://gerrit.akraino.org/r/gitweb?a=blobdiff_plain;f=example-apps%2FPDD%2Fpcb-defect-detection%2Flibs%2Fnetworks%2Fmobilenet%2Fconv_blocks.py;fp=example-apps%2FPDD%2Fpcb-defect-detection%2Flibs%2Fnetworks%2Fmobilenet%2Fconv_blocks.py;h=029bd117ecb8952e57d438e92792a5b25491f62d;hb=a785567fb9acfc68536767d20f60ba917ae85aa1;hp=0000000000000000000000000000000000000000;hpb=94a133e696b9b2a7f73544462c2714986fa7ab4a;p=ealt-edge.git diff --git a/example-apps/PDD/pcb-defect-detection/libs/networks/mobilenet/conv_blocks.py b/example-apps/PDD/pcb-defect-detection/libs/networks/mobilenet/conv_blocks.py new file mode 100755 index 0000000..029bd11 --- /dev/null +++ b/example-apps/PDD/pcb-defect-detection/libs/networks/mobilenet/conv_blocks.py @@ -0,0 +1,352 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Convolution blocks for mobilenet.""" +import contextlib +import functools + +import tensorflow as tf + +slim = tf.contrib.slim + + +def _fixed_padding(inputs, kernel_size, rate=1): + """Pads the input along the spatial dimensions independently of input size. + + Pads the input such that if it was used in a convolution with 'VALID' padding, + the output would have the same dimensions as if the unpadded input was used + in a convolution with 'SAME' padding. + + Args: + inputs: A tensor of size [batch, height_in, width_in, channels]. + kernel_size: The kernel to be used in the conv2d or max_pool2d operation. + rate: An integer, rate for atrous convolution. + + Returns: + output: A tensor of size [batch, height_out, width_out, channels] with the + input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). + """ + kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), + kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] + pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] + pad_beg = [pad_total[0] // 2, pad_total[1] // 2] + pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] + padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], + [pad_beg[1], pad_end[1]], [0, 0]]) + return padded_inputs + + +def _make_divisible(v, divisor, min_value=None): + if min_value is None: + min_value = divisor + new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_v < 0.9 * v: + new_v += divisor + return new_v + + +def _split_divisible(num, num_ways, divisible_by=8): + """Evenly splits num, num_ways so each piece is a multiple of divisible_by.""" + assert num % divisible_by == 0 + assert num / num_ways >= divisible_by + # Note: want to round down, we adjust each split to match the total. + base = num // num_ways // divisible_by * divisible_by + result = [] + accumulated = 0 + for i in range(num_ways): + r = base + while accumulated + r < num * (i + 1) / num_ways: + r += divisible_by + result.append(r) + accumulated += r + assert accumulated == num + return result + + +@contextlib.contextmanager +def _v1_compatible_scope_naming(scope): + if scope is None: # Create uniqified separable blocks. + with tf.variable_scope(None, default_name='separable') as s, \ + tf.name_scope(s.original_name_scope): + yield '' + else: + # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts. + # which provide numbered scopes. + scope += '_' + yield scope + + +@slim.add_arg_scope +def split_separable_conv2d(input_tensor, + num_outputs, + scope=None, + normalizer_fn=None, + stride=1, + rate=1, + endpoints=None, + use_explicit_padding=False): + """Separable mobilenet V1 style convolution. + + Depthwise convolution, with default non-linearity, + followed by 1x1 depthwise convolution. This is similar to + slim.separable_conv2d, but differs in tha it applies batch + normalization and non-linearity to depthwise. This matches + the basic building of Mobilenet Paper + (https://arxiv.org/abs/1704.04861) + + Args: + input_tensor: input + num_outputs: number of outputs + scope: optional name of the scope. Note if provided it will use + scope_depthwise for deptwhise, and scope_pointwise for pointwise. + normalizer_fn: which normalizer function to use for depthwise/pointwise + stride: stride + rate: output rate (also known as dilation rate) + endpoints: optional, if provided, will export additional tensors to it. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + + Returns: + output tesnor + """ + + with _v1_compatible_scope_naming(scope) as scope: + dw_scope = scope + 'depthwise' + endpoints = endpoints if endpoints is not None else {} + kernel_size = [3, 3] + padding = 'SAME' + if use_explicit_padding: + padding = 'VALID' + input_tensor = _fixed_padding(input_tensor, kernel_size, rate) + net = slim.separable_conv2d( + input_tensor, + None, + kernel_size, + depth_multiplier=1, + stride=stride, + rate=rate, + normalizer_fn=normalizer_fn, + padding=padding, + scope=dw_scope) + + endpoints[dw_scope] = net + + pw_scope = scope + 'pointwise' + net = slim.conv2d( + net, + num_outputs, [1, 1], + stride=1, + normalizer_fn=normalizer_fn, + scope=pw_scope) + endpoints[pw_scope] = net + return net + + +def expand_input_by_factor(n, divisible_by=8): + return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by) + + +@slim.add_arg_scope +def expanded_conv(input_tensor, + num_outputs, + expansion_size=expand_input_by_factor(6), + stride=1, + rate=1, + kernel_size=(3, 3), + residual=True, + normalizer_fn=None, + split_projection=1, + split_expansion=1, + expansion_transform=None, + depthwise_location='expansion', + depthwise_channel_multiplier=1, + endpoints=None, + use_explicit_padding=False, + scope=None): + """Depthwise Convolution Block with expansion. + + Builds a composite convolution that has the following structure + expansion (1x1) -> depthwise (kernel_size) -> projection (1x1) + + Args: + input_tensor: input + num_outputs: number of outputs in the final layer. + expansion_size: the size of expansion, could be a constant or a callable. + If latter it will be provided 'num_inputs' as an input. For forward + compatibility it should accept arbitrary keyword arguments. + Default will expand the input by factor of 6. + stride: depthwise stride + rate: depthwise rate + kernel_size: depthwise kernel + residual: whether to include residual connection between input + and output. + normalizer_fn: batchnorm or otherwise + split_projection: how many ways to split projection operator + (that is conv expansion->bottleneck) + split_expansion: how many ways to split expansion op + (that is conv bottleneck->expansion) ops will keep depth divisible + by this value. + expansion_transform: Optional function that takes expansion + as a single input and returns output. + depthwise_location: where to put depthwise covnvolutions supported + values None, 'input', 'output', 'expansion' + depthwise_channel_multiplier: depthwise channel multiplier: + each input will replicated (with different filters) + that many times. So if input had c channels, + output will have c x depthwise_channel_multpilier. + endpoints: An optional dictionary into which intermediate endpoints are + placed. The keys "expansion_output", "depthwise_output", + "projection_output" and "expansion_transform" are always populated, even + if the corresponding functions are not invoked. + use_explicit_padding: Use 'VALID' padding for convolutions, but prepad + inputs so that the output dimensions are the same as if 'SAME' padding + were used. + scope: optional scope. + + Returns: + Tensor of depth num_outputs + + Raises: + TypeError: on inval + """ + with tf.variable_scope(scope, default_name='expanded_conv') as s, \ + tf.name_scope(s.original_name_scope): + prev_depth = input_tensor.get_shape().as_list()[3] + if depthwise_location not in [None, 'input', 'output', 'expansion']: + raise TypeError('%r is unknown value for depthwise_location' % + depthwise_location) + padding = 'SAME' + if use_explicit_padding: + padding = 'VALID' + depthwise_func = functools.partial( + slim.separable_conv2d, + num_outputs=None, + kernel_size=kernel_size, + depth_multiplier=depthwise_channel_multiplier, + stride=stride, + rate=rate, + normalizer_fn=normalizer_fn, + padding=padding, + scope='depthwise') + # b1 -> b2 * r -> b2 + # i -> (o * r) (bottleneck) -> o + input_tensor = tf.identity(input_tensor, 'input') + net = input_tensor + + if depthwise_location == 'input': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net, activation_fn=None) + + if callable(expansion_size): + inner_size = expansion_size(num_inputs=prev_depth) + else: + inner_size = expansion_size + + if inner_size > net.shape[3]: + net = split_conv( + net, + inner_size, + num_ways=split_expansion, + scope='expand', + stride=1, + normalizer_fn=normalizer_fn) + net = tf.identity(net, 'expansion_output') + if endpoints is not None: + endpoints['expansion_output'] = net + + if depthwise_location == 'expansion': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net) + + net = tf.identity(net, name='depthwise_output') + if endpoints is not None: + endpoints['depthwise_output'] = net + if expansion_transform: + net = expansion_transform(expansion_tensor=net, input_tensor=input_tensor) + # Note in contrast with expansion, we always have + # projection to produce the desired output size. + net = split_conv( + net, + num_outputs, + num_ways=split_projection, + stride=1, + scope='project', + normalizer_fn=normalizer_fn, + activation_fn=tf.identity) + if endpoints is not None: + endpoints['projection_output'] = net + if depthwise_location == 'output': + if use_explicit_padding: + net = _fixed_padding(net, kernel_size, rate) + net = depthwise_func(net, activation_fn=None) + + if callable(residual): # custom residual + net = residual(input_tensor=input_tensor, output_tensor=net) + elif (residual and + # stride check enforces that we don't add residuals when spatial + # dimensions are None + stride == 1 and + # Depth matches + net.get_shape().as_list()[3] == + input_tensor.get_shape().as_list()[3]): + net += input_tensor + return tf.identity(net, name='output') + + +def split_conv(input_tensor, + num_outputs, + num_ways, + scope, + divisible_by=8, + **kwargs): + """Creates a split convolution. + + Split convolution splits the input and output into + 'num_blocks' blocks of approximately the same size each, + and only connects $i$-th input to $i$ output. + + Args: + input_tensor: input tensor + num_outputs: number of output filters + num_ways: num blocks to split by. + scope: scope for all the operators. + divisible_by: make sure that every part is divisiable by this. + **kwargs: will be passed directly into conv2d operator + Returns: + tensor + """ + b = input_tensor.get_shape().as_list()[3] + + if num_ways == 1 or min(b // num_ways, + num_outputs // num_ways) < divisible_by: + # Don't do any splitting if we end up with less than 8 filters + # on either side. + return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs) + + outs = [] + input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by) + output_splits = _split_divisible( + num_outputs, num_ways, divisible_by=divisible_by) + inputs = tf.split(input_tensor, input_splits, axis=3, name='split_' + scope) + base = scope + for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)): + scope = base + '_part_%d' % (i,) + n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs) + n = tf.identity(n, scope + '_output') + outs.append(n) + return tf.concat(outs, 3, name=scope + '_concat')