X-Git-Url: https://gerrit.akraino.org/r/gitweb?a=blobdiff_plain;f=example-apps%2FPDD%2Fpcb-defect-detection%2Flibs%2Fnetworks%2Fslim_nets%2Fmobilenet_v1.py;fp=example-apps%2FPDD%2Fpcb-defect-detection%2Flibs%2Fnetworks%2Fslim_nets%2Fmobilenet_v1.py;h=9b25145f8d48093cf5a92a3415699105353aa038;hb=a785567fb9acfc68536767d20f60ba917ae85aa1;hp=0000000000000000000000000000000000000000;hpb=94a133e696b9b2a7f73544462c2714986fa7ab4a;p=ealt-edge.git diff --git a/example-apps/PDD/pcb-defect-detection/libs/networks/slim_nets/mobilenet_v1.py b/example-apps/PDD/pcb-defect-detection/libs/networks/slim_nets/mobilenet_v1.py new file mode 100755 index 0000000..9b25145 --- /dev/null +++ b/example-apps/PDD/pcb-defect-detection/libs/networks/slim_nets/mobilenet_v1.py @@ -0,0 +1,397 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================= +"""MobileNet v1. + +MobileNet is a general architecture and can be used for multiple use cases. +Depending on the use case, it can use different input layer size and different +head (for example: embeddings, localization and classification). + +As described in https://arxiv.org/abs/1704.04861. + + MobileNets: Efficient Convolutional Neural Networks for + Mobile Vision Applications + Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, + Tobias Weyand, Marco Andreetto, Hartwig Adam + +100% Mobilenet V1 (base) with input size 224x224: + +Layer params macs +-------------------------------------------------------------------------------- +MobilenetV1/Conv2d_0/Conv2D: 864 10,838,016 +MobilenetV1/Conv2d_1_depthwise/depthwise: 288 3,612,672 +MobilenetV1/Conv2d_1_pointwise/Conv2D: 2,048 25,690,112 +MobilenetV1/Conv2d_2_depthwise/depthwise: 576 1,806,336 +MobilenetV1/Conv2d_2_pointwise/Conv2D: 8,192 25,690,112 +MobilenetV1/Conv2d_3_depthwise/depthwise: 1,152 3,612,672 +MobilenetV1/Conv2d_3_pointwise/Conv2D: 16,384 51,380,224 +MobilenetV1/Conv2d_4_depthwise/depthwise: 1,152 903,168 +MobilenetV1/Conv2d_4_pointwise/Conv2D: 32,768 25,690,112 +MobilenetV1/Conv2d_5_depthwise/depthwise: 2,304 1,806,336 +MobilenetV1/Conv2d_5_pointwise/Conv2D: 65,536 51,380,224 +MobilenetV1/Conv2d_6_depthwise/depthwise: 2,304 451,584 +MobilenetV1/Conv2d_6_pointwise/Conv2D: 131,072 25,690,112 +MobilenetV1/Conv2d_7_depthwise/depthwise: 4,608 903,168 +MobilenetV1/Conv2d_7_pointwise/Conv2D: 262,144 51,380,224 +MobilenetV1/Conv2d_8_depthwise/depthwise: 4,608 903,168 +MobilenetV1/Conv2d_8_pointwise/Conv2D: 262,144 51,380,224 +MobilenetV1/Conv2d_9_depthwise/depthwise: 4,608 903,168 +MobilenetV1/Conv2d_9_pointwise/Conv2D: 262,144 51,380,224 +MobilenetV1/Conv2d_10_depthwise/depthwise: 4,608 903,168 +MobilenetV1/Conv2d_10_pointwise/Conv2D: 262,144 51,380,224 +MobilenetV1/Conv2d_11_depthwise/depthwise: 4,608 903,168 +MobilenetV1/Conv2d_11_pointwise/Conv2D: 262,144 51,380,224 +MobilenetV1/Conv2d_12_depthwise/depthwise: 4,608 225,792 +MobilenetV1/Conv2d_12_pointwise/Conv2D: 524,288 25,690,112 +MobilenetV1/Conv2d_13_depthwise/depthwise: 9,216 451,584 +MobilenetV1/Conv2d_13_pointwise/Conv2D: 1,048,576 51,380,224 +-------------------------------------------------------------------------------- +Total: 3,185,088 567,716,352 + + +75% Mobilenet V1 (base) with input size 128x128: + +Layer params macs +-------------------------------------------------------------------------------- +MobilenetV1/Conv2d_0/Conv2D: 648 2,654,208 +MobilenetV1/Conv2d_1_depthwise/depthwise: 216 884,736 +MobilenetV1/Conv2d_1_pointwise/Conv2D: 1,152 4,718,592 +MobilenetV1/Conv2d_2_depthwise/depthwise: 432 442,368 +MobilenetV1/Conv2d_2_pointwise/Conv2D: 4,608 4,718,592 +MobilenetV1/Conv2d_3_depthwise/depthwise: 864 884,736 +MobilenetV1/Conv2d_3_pointwise/Conv2D: 9,216 9,437,184 +MobilenetV1/Conv2d_4_depthwise/depthwise: 864 221,184 +MobilenetV1/Conv2d_4_pointwise/Conv2D: 18,432 4,718,592 +MobilenetV1/Conv2d_5_depthwise/depthwise: 1,728 442,368 +MobilenetV1/Conv2d_5_pointwise/Conv2D: 36,864 9,437,184 +MobilenetV1/Conv2d_6_depthwise/depthwise: 1,728 110,592 +MobilenetV1/Conv2d_6_pointwise/Conv2D: 73,728 4,718,592 +MobilenetV1/Conv2d_7_depthwise/depthwise: 3,456 221,184 +MobilenetV1/Conv2d_7_pointwise/Conv2D: 147,456 9,437,184 +MobilenetV1/Conv2d_8_depthwise/depthwise: 3,456 221,184 +MobilenetV1/Conv2d_8_pointwise/Conv2D: 147,456 9,437,184 +MobilenetV1/Conv2d_9_depthwise/depthwise: 3,456 221,184 +MobilenetV1/Conv2d_9_pointwise/Conv2D: 147,456 9,437,184 +MobilenetV1/Conv2d_10_depthwise/depthwise: 3,456 221,184 +MobilenetV1/Conv2d_10_pointwise/Conv2D: 147,456 9,437,184 +MobilenetV1/Conv2d_11_depthwise/depthwise: 3,456 221,184 +MobilenetV1/Conv2d_11_pointwise/Conv2D: 147,456 9,437,184 +MobilenetV1/Conv2d_12_depthwise/depthwise: 3,456 55,296 +MobilenetV1/Conv2d_12_pointwise/Conv2D: 294,912 4,718,592 +MobilenetV1/Conv2d_13_depthwise/depthwise: 6,912 110,592 +MobilenetV1/Conv2d_13_pointwise/Conv2D: 589,824 9,437,184 +-------------------------------------------------------------------------------- +Total: 1,800,144 106,002,432 + +""" + +# Tensorflow mandates these. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import namedtuple + +import tensorflow as tf + +slim = tf.contrib.slim + +# Conv and DepthSepConv namedtuple define layers of the MobileNet architecture +# Conv defines 3x3 convolution layers +# DepthSepConv defines 3x3 depthwise convolution followed by 1x1 convolution. +# stride is the stride of the convolution +# depth is the number of channels or filters in a layer +Conv = namedtuple('Conv', ['kernel', 'stride', 'depth']) +DepthSepConv = namedtuple('DepthSepConv', ['kernel', 'stride', 'depth']) + +# _CONV_DEFS specifies the MobileNet body +_CONV_DEFS = [ + Conv(kernel=[3, 3], stride=2, depth=32), + DepthSepConv(kernel=[3, 3], stride=1, depth=64), + DepthSepConv(kernel=[3, 3], stride=2, depth=128), + DepthSepConv(kernel=[3, 3], stride=1, depth=128), + DepthSepConv(kernel=[3, 3], stride=2, depth=256), + DepthSepConv(kernel=[3, 3], stride=1, depth=256), + DepthSepConv(kernel=[3, 3], stride=2, depth=512), + DepthSepConv(kernel=[3, 3], stride=1, depth=512), + DepthSepConv(kernel=[3, 3], stride=1, depth=512), + DepthSepConv(kernel=[3, 3], stride=1, depth=512), + DepthSepConv(kernel=[3, 3], stride=1, depth=512), + DepthSepConv(kernel=[3, 3], stride=1, depth=512), + DepthSepConv(kernel=[3, 3], stride=2, depth=1024), + DepthSepConv(kernel=[3, 3], stride=1, depth=1024) +] + + +def mobilenet_v1_base(inputs, + final_endpoint='Conv2d_13_pointwise', + min_depth=8, + depth_multiplier=1.0, + conv_defs=None, + output_stride=None, + scope=None): + """Mobilenet v1. + + Constructs a Mobilenet v1 network from inputs to the given final endpoint. + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + final_endpoint: specifies the endpoint to construct the network up to. It + can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise', + 'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5'_pointwise, + 'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise', + 'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise', + 'Conv2d_12_pointwise', 'Conv2d_13_pointwise']. + min_depth: Minimum depth value (number of channels) for all convolution ops. + Enforced when depth_multiplier < 1, and not an active constraint when + depth_multiplier >= 1. + depth_multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + conv_defs: A list of ConvDef namedtuples specifying the net architecture. + output_stride: An integer that specifies the requested ratio of input to + output spatial resolution. If not None, then we invoke atrous convolution + if necessary to prevent the network from reducing the spatial resolution + of the activation maps. Allowed values are 8 (accurate fully convolutional + mode), 16 (fast fully convolutional mode), 32 (classification mode). + scope: Optional variable_scope. + + Returns: + tensor_out: output tensor corresponding to the final_endpoint. + end_points: a set of activations for external use, for example summaries or + losses. + + Raises: + ValueError: if final_endpoint is not set to one of the predefined values, + or depth_multiplier <= 0, or the target output_stride is not + allowed. + """ + depth = lambda d: max(int(d * depth_multiplier), min_depth) + end_points = {} + + # Used to find thinned depths for each layer. + if depth_multiplier <= 0: + raise ValueError('depth_multiplier is not greater than zero.') + + if conv_defs is None: + conv_defs = _CONV_DEFS + + if output_stride is not None and output_stride not in [8, 16, 32]: + raise ValueError('Only allowed output_stride values are 8, 16, 32.') + + with tf.variable_scope(scope, 'MobilenetV1', [inputs]): + with slim.arg_scope([slim.conv2d, slim.separable_conv2d], padding='SAME'): + # The current_stride variable keeps track of the output stride of the + # activations, i.e., the running product of convolution strides up to the + # current network layer. This allows us to invoke atrous convolution + # whenever applying the next convolution would result in the activations + # having output stride larger than the target output_stride. + current_stride = 1 + + # The atrous convolution rate parameter. + rate = 1 + + net = inputs + for i, conv_def in enumerate(conv_defs): + end_point_base = 'Conv2d_%d' % i + + if output_stride is not None and current_stride == output_stride: + # If we have reached the target output_stride, then we need to employ + # atrous convolution with stride=1 and multiply the atrous rate by the + # current unit's stride for use in subsequent layers. + layer_stride = 1 + layer_rate = rate + rate *= conv_def.stride + else: + layer_stride = conv_def.stride + layer_rate = 1 + current_stride *= conv_def.stride + + if isinstance(conv_def, Conv): + end_point = end_point_base + net = slim.conv2d(net, depth(conv_def.depth), conv_def.kernel, + stride=conv_def.stride, + normalizer_fn=slim.batch_norm, + scope=end_point) + end_points[end_point] = net + if end_point == final_endpoint: + return net, end_points + + elif isinstance(conv_def, DepthSepConv): + end_point = end_point_base + '_depthwise' + + # By passing filters=None + # separable_conv2d produces only a depthwise convolution layer + net = slim.separable_conv2d(net, None, conv_def.kernel, + depth_multiplier=1, + stride=layer_stride, + rate=layer_rate, + normalizer_fn=slim.batch_norm, + scope=end_point) + + end_points[end_point] = net + if end_point == final_endpoint: + return net, end_points + + end_point = end_point_base + '_pointwise' + + net = slim.conv2d(net, depth(conv_def.depth), [1, 1], + stride=1, + normalizer_fn=slim.batch_norm, + scope=end_point) + + end_points[end_point] = net + if end_point == final_endpoint: + return net, end_points + else: + raise ValueError('Unknown convolution type %s for layer %d' + % (conv_def.ltype, i)) + raise ValueError('Unknown final endpoint %s' % final_endpoint) + + +def mobilenet_v1(inputs, + num_classes=1000, + dropout_keep_prob=0.999, + is_training=True, + min_depth=8, + depth_multiplier=1.0, + conv_defs=None, + prediction_fn=tf.contrib.layers.softmax, + spatial_squeeze=True, + reuse=None, + scope='MobilenetV1'): + """Mobilenet v1 model for classification. + + Args: + inputs: a tensor of shape [batch_size, height, width, channels]. + num_classes: number of predicted classes. + dropout_keep_prob: the percentage of activation values that are retained. + is_training: whether is training or not. + min_depth: Minimum depth value (number of channels) for all convolution ops. + Enforced when depth_multiplier < 1, and not an active constraint when + depth_multiplier >= 1. + depth_multiplier: Float multiplier for the depth (number of channels) + for all convolution ops. The value must be greater than zero. Typical + usage will be to set this value in (0, 1) to reduce the number of + parameters or computation cost of the model. + conv_defs: A list of ConvDef namedtuples specifying the net architecture. + prediction_fn: a function to get predictions out of logits. + spatial_squeeze: if True, logits is of shape is [B, C], if false logits is + of shape [B, 1, 1, C], where B is batch_size and C is number of classes. + reuse: whether or not the network and its variables should be reused. To be + able to reuse 'scope' must be given. + scope: Optional variable_scope. + + Returns: + logits: the pre-softmax activations, a tensor of size + [batch_size, num_classes] + end_points: a dictionary from components of the network to the corresponding + activation. + + Raises: + ValueError: Input rank is invalid. + """ + input_shape = inputs.get_shape().as_list() + if len(input_shape) != 4: + raise ValueError('Invalid input tensor rank, expected 4, was: %d' % + len(input_shape)) + + with tf.variable_scope(scope, 'MobilenetV1', [inputs, num_classes], + reuse=reuse) as scope: + with slim.arg_scope([slim.batch_norm, slim.dropout], + is_training=is_training): + net, end_points = mobilenet_v1_base(inputs, scope=scope, + min_depth=min_depth, + depth_multiplier=depth_multiplier, + conv_defs=conv_defs) + with tf.variable_scope('Logits'): + kernel_size = _reduced_kernel_size_for_small_input(net, [7, 7]) + net = slim.avg_pool2d(net, kernel_size, padding='VALID', + scope='AvgPool_1a') + end_points['AvgPool_1a'] = net + # 1 x 1 x 1024 + net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b') + logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, + normalizer_fn=None, scope='Conv2d_1c_1x1') + if spatial_squeeze: + logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') + end_points['Logits'] = logits + if prediction_fn: + end_points['Predictions'] = prediction_fn(logits, scope='Predictions') + return logits, end_points + +mobilenet_v1.default_image_size = 224 + + +def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): + """Define kernel size which is automatically reduced for small input. + + If the shape of the input images is unknown at graph construction time this + function assumes that the input images are large enough. + + Args: + input_tensor: input tensor of size [batch_size, height, width, channels]. + kernel_size: desired kernel size of length 2: [kernel_height, kernel_width] + + Returns: + a tensor with the kernel size. + """ + shape = input_tensor.get_shape().as_list() + if shape[1] is None or shape[2] is None: + kernel_size_out = kernel_size + else: + kernel_size_out = [min(shape[1], kernel_size[0]), + min(shape[2], kernel_size[1])] + return kernel_size_out + + +def mobilenet_v1_arg_scope(is_training=True, + weight_decay=0.00004, + stddev=0.09, + regularize_depthwise=False): + """Defines the default MobilenetV1 arg scope. + + Args: + is_training: Whether or not we're training the model. + weight_decay: The weight decay to use for regularizing the model. + stddev: The standard deviation of the trunctated normal weight initializer. + regularize_depthwise: Whether or not apply regularization on depthwise. + + Returns: + An `arg_scope` to use for the mobilenet v1 model. + """ + batch_norm_params = { + 'is_training': is_training, + 'center': True, + 'scale': True, + 'decay': 0.9997, + 'epsilon': 0.001, + } + + # Set weight_decay for weights in Conv and DepthSepConv layers. + weights_init = tf.truncated_normal_initializer(stddev=stddev) + regularizer = tf.contrib.layers.l2_regularizer(weight_decay) + if regularize_depthwise: + depthwise_regularizer = regularizer + else: + depthwise_regularizer = None + with slim.arg_scope([slim.conv2d, slim.separable_conv2d], + weights_initializer=weights_init, + activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm): + with slim.arg_scope([slim.batch_norm], **batch_norm_params): + with slim.arg_scope([slim.conv2d], weights_regularizer=regularizer): + with slim.arg_scope([slim.separable_conv2d], + weights_regularizer=depthwise_regularizer) as sc: + return sc