inception_v1.py

   1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
   2 #
   3 # Licensed under the Apache License, Version 2.0 (the "License");
   4 # you may not use this file except in compliance with the License.
   5 # You may obtain a copy of the License at
   6 #
   7 # http://www.apache.org/licenses/LICENSE-2.0
   8 #
   9 # Unless required by applicable law or agreed to in writing, software
  10 # distributed under the License is distributed on an "AS IS" BASIS,
  11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12 # See the License for the specific language governing permissions and
  13 # limitations under the License.
  14 # ==============================================================================
  15 """Contains the definition for inception v1 classification network."""
  16
  17 from __future__ import absolute_import
  18 from __future__ import division
  19 from __future__ import print_function
  20
  21 import tensorflow as tf
  22
  23 from nets import inception_utils
  24
  25 slim = tf.contrib.slim
  26 trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
  27
  28
  29 def inception_v1_base(inputs,
  30                       final_endpoint='Mixed_5c',
  31                       scope='InceptionV1'):
  32   """Defines the Inception V1 base architecture.
  33
  34   This architecture is defined in:
  35     Going deeper with convolutions
  36     Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
  37     Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
  38     http://arxiv.org/pdf/1409.4842v1.pdf.
  39
  40   Args:
  41     inputs: a tensor of size [batch_size, height, width, channels].
  42     final_endpoint: specifies the endpoint to construct the network up to. It
  43       can be one of ['Conv2d_1a_7x7', 'MaxPool_2a_3x3', 'Conv2d_2b_1x1',
  44       'Conv2d_2c_3x3', 'MaxPool_3a_3x3', 'Mixed_3b', 'Mixed_3c',
  45       'MaxPool_4a_3x3', 'Mixed_4b', 'Mixed_4c', 'Mixed_4d', 'Mixed_4e',
  46       'Mixed_4f', 'MaxPool_5a_2x2', 'Mixed_5b', 'Mixed_5c']
  47     scope: Optional variable_scope.
  48
  49   Returns:
  50     A dictionary from components of the network to the corresponding activation.
  51
  52   Raises:
  53     ValueError: if final_endpoint is not set to one of the predefined values.
  54   """
  55   end_points = {}
  56   with tf.variable_scope(scope, 'InceptionV1', [inputs]):
  57     with slim.arg_scope(
  58         [slim.conv2d, slim.fully_connected],
  59         weights_initializer=trunc_normal(0.01)):
  60       with slim.arg_scope([slim.conv2d, slim.max_pool2d],
  61                           stride=1, padding='SAME'):
  62         end_point = 'Conv2d_1a_7x7'
  63         net = slim.conv2d(inputs, 64, [7, 7], stride=2, scope=end_point)
  64         end_points[end_point] = net
  65         if final_endpoint == end_point: return net, end_points
  66         end_point = 'MaxPool_2a_3x3'
  67         net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
  68         end_points[end_point] = net
  69         if final_endpoint == end_point: return net, end_points
  70         end_point = 'Conv2d_2b_1x1'
  71         net = slim.conv2d(net, 64, [1, 1], scope=end_point)
  72         end_points[end_point] = net
  73         if final_endpoint == end_point: return net, end_points
  74         end_point = 'Conv2d_2c_3x3'
  75         net = slim.conv2d(net, 192, [3, 3], scope=end_point)
  76         end_points[end_point] = net
  77         if final_endpoint == end_point: return net, end_points
  78         end_point = 'MaxPool_3a_3x3'
  79         net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
  80         end_points[end_point] = net
  81         if final_endpoint == end_point: return net, end_points
  82
  83         end_point = 'Mixed_3b'
  84         with tf.variable_scope(end_point):
  85           with tf.variable_scope('Branch_0'):
  86             branch_0 = slim.conv2d(net, 64, [1, 1], scope='Conv2d_0a_1x1')
  87           with tf.variable_scope('Branch_1'):
  88             branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
  89             branch_1 = slim.conv2d(branch_1, 128, [3, 3], scope='Conv2d_0b_3x3')
  90           with tf.variable_scope('Branch_2'):
  91             branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
  92             branch_2 = slim.conv2d(branch_2, 32, [3, 3], scope='Conv2d_0b_3x3')
  93           with tf.variable_scope('Branch_3'):
  94             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
  95             branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
  96           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
  97         end_points[end_point] = net
  98         if final_endpoint == end_point: return net, end_points
  99
 100         end_point = 'Mixed_3c'
 101         with tf.variable_scope(end_point):
 102           with tf.variable_scope('Branch_0'):
 103             branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
 104           with tf.variable_scope('Branch_1'):
 105             branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
 106             branch_1 = slim.conv2d(branch_1, 192, [3, 3], scope='Conv2d_0b_3x3')
 107           with tf.variable_scope('Branch_2'):
 108             branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
 109             branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='Conv2d_0b_3x3')
 110           with tf.variable_scope('Branch_3'):
 111             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 112             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
 113           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 114         end_points[end_point] = net
 115         if final_endpoint == end_point: return net, end_points
 116
 117         end_point = 'MaxPool_4a_3x3'
 118         net = slim.max_pool2d(net, [3, 3], stride=2, scope=end_point)
 119         end_points[end_point] = net
 120         if final_endpoint == end_point: return net, end_points
 121
 122         end_point = 'Mixed_4b'
 123         with tf.variable_scope(end_point):
 124           with tf.variable_scope('Branch_0'):
 125             branch_0 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
 126           with tf.variable_scope('Branch_1'):
 127             branch_1 = slim.conv2d(net, 96, [1, 1], scope='Conv2d_0a_1x1')
 128             branch_1 = slim.conv2d(branch_1, 208, [3, 3], scope='Conv2d_0b_3x3')
 129           with tf.variable_scope('Branch_2'):
 130             branch_2 = slim.conv2d(net, 16, [1, 1], scope='Conv2d_0a_1x1')
 131             branch_2 = slim.conv2d(branch_2, 48, [3, 3], scope='Conv2d_0b_3x3')
 132           with tf.variable_scope('Branch_3'):
 133             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 134             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
 135           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 136         end_points[end_point] = net
 137         if final_endpoint == end_point: return net, end_points
 138
 139         end_point = 'Mixed_4c'
 140         with tf.variable_scope(end_point):
 141           with tf.variable_scope('Branch_0'):
 142             branch_0 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
 143           with tf.variable_scope('Branch_1'):
 144             branch_1 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
 145             branch_1 = slim.conv2d(branch_1, 224, [3, 3], scope='Conv2d_0b_3x3')
 146           with tf.variable_scope('Branch_2'):
 147             branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
 148             branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
 149           with tf.variable_scope('Branch_3'):
 150             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 151             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
 152           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 153         end_points[end_point] = net
 154         if final_endpoint == end_point: return net, end_points
 155
 156         end_point = 'Mixed_4d'
 157         with tf.variable_scope(end_point):
 158           with tf.variable_scope('Branch_0'):
 159             branch_0 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
 160           with tf.variable_scope('Branch_1'):
 161             branch_1 = slim.conv2d(net, 128, [1, 1], scope='Conv2d_0a_1x1')
 162             branch_1 = slim.conv2d(branch_1, 256, [3, 3], scope='Conv2d_0b_3x3')
 163           with tf.variable_scope('Branch_2'):
 164             branch_2 = slim.conv2d(net, 24, [1, 1], scope='Conv2d_0a_1x1')
 165             branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
 166           with tf.variable_scope('Branch_3'):
 167             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 168             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
 169           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 170         end_points[end_point] = net
 171         if final_endpoint == end_point: return net, end_points
 172
 173         end_point = 'Mixed_4e'
 174         with tf.variable_scope(end_point):
 175           with tf.variable_scope('Branch_0'):
 176             branch_0 = slim.conv2d(net, 112, [1, 1], scope='Conv2d_0a_1x1')
 177           with tf.variable_scope('Branch_1'):
 178             branch_1 = slim.conv2d(net, 144, [1, 1], scope='Conv2d_0a_1x1')
 179             branch_1 = slim.conv2d(branch_1, 288, [3, 3], scope='Conv2d_0b_3x3')
 180           with tf.variable_scope('Branch_2'):
 181             branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
 182             branch_2 = slim.conv2d(branch_2, 64, [3, 3], scope='Conv2d_0b_3x3')
 183           with tf.variable_scope('Branch_3'):
 184             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 185             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
 186           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 187         end_points[end_point] = net
 188         if final_endpoint == end_point: return net, end_points
 189
 190         end_point = 'Mixed_4f'
 191         with tf.variable_scope(end_point):
 192           with tf.variable_scope('Branch_0'):
 193             branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
 194           with tf.variable_scope('Branch_1'):
 195             branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
 196             branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
 197           with tf.variable_scope('Branch_2'):
 198             branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
 199             branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
 200           with tf.variable_scope('Branch_3'):
 201             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 202             branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
 203           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 204         end_points[end_point] = net
 205         if final_endpoint == end_point: return net, end_points
 206
 207         end_point = 'MaxPool_5a_2x2'
 208         net = slim.max_pool2d(net, [2, 2], stride=2, scope=end_point)
 209         end_points[end_point] = net
 210         if final_endpoint == end_point: return net, end_points
 211
 212         end_point = 'Mixed_5b'
 213         with tf.variable_scope(end_point):
 214           with tf.variable_scope('Branch_0'):
 215             branch_0 = slim.conv2d(net, 256, [1, 1], scope='Conv2d_0a_1x1')
 216           with tf.variable_scope('Branch_1'):
 217             branch_1 = slim.conv2d(net, 160, [1, 1], scope='Conv2d_0a_1x1')
 218             branch_1 = slim.conv2d(branch_1, 320, [3, 3], scope='Conv2d_0b_3x3')
 219           with tf.variable_scope('Branch_2'):
 220             branch_2 = slim.conv2d(net, 32, [1, 1], scope='Conv2d_0a_1x1')
 221             branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0a_3x3')
 222           with tf.variable_scope('Branch_3'):
 223             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 224             branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
 225           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 226         end_points[end_point] = net
 227         if final_endpoint == end_point: return net, end_points
 228
 229         end_point = 'Mixed_5c'
 230         with tf.variable_scope(end_point):
 231           with tf.variable_scope('Branch_0'):
 232             branch_0 = slim.conv2d(net, 384, [1, 1], scope='Conv2d_0a_1x1')
 233           with tf.variable_scope('Branch_1'):
 234             branch_1 = slim.conv2d(net, 192, [1, 1], scope='Conv2d_0a_1x1')
 235             branch_1 = slim.conv2d(branch_1, 384, [3, 3], scope='Conv2d_0b_3x3')
 236           with tf.variable_scope('Branch_2'):
 237             branch_2 = slim.conv2d(net, 48, [1, 1], scope='Conv2d_0a_1x1')
 238             branch_2 = slim.conv2d(branch_2, 128, [3, 3], scope='Conv2d_0b_3x3')
 239           with tf.variable_scope('Branch_3'):
 240             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
 241             branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
 242           net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
 243         end_points[end_point] = net
 244         if final_endpoint == end_point: return net, end_points
 245     raise ValueError('Unknown final endpoint %s' % final_endpoint)
 246
 247
 248 def inception_v1(inputs,
 249                  num_classes=1000,
 250                  is_training=True,
 251                  dropout_keep_prob=0.8,
 252                  prediction_fn=slim.softmax,
 253                  spatial_squeeze=True,
 254                  reuse=None,
 255                  scope='InceptionV1'):
 256   """Defines the Inception V1 architecture.
 257
 258   This architecture is defined in:
 259
 260     Going deeper with convolutions
 261     Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
 262     Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
 263     http://arxiv.org/pdf/1409.4842v1.pdf.
 264
 265   The default image size used to train this network is 224x224.
 266
 267   Args:
 268     inputs: a tensor of size [batch_size, height, width, channels].
 269     num_classes: number of predicted classes.
 270     is_training: whether is training or not.
 271     dropout_keep_prob: the percentage of activation values that are retained.
 272     prediction_fn: a function to get predictions out of logits.
 273     spatial_squeeze: if True, logits is of shape [B, C], if false logits is
 274         of shape [B, 1, 1, C], where B is batch_size and C is number of classes.
 275     reuse: whether or not the network and its variables should be reused. To be
 276       able to reuse 'scope' must be given.
 277     scope: Optional variable_scope.
 278
 279   Returns:
 280     logits: the pre-softmax activations, a tensor of size
 281       [batch_size, num_classes]
 282     end_points: a dictionary from components of the network to the corresponding
 283       activation.
 284   """
 285   # Final pooling and prediction
 286   with tf.variable_scope(scope, 'InceptionV1', [inputs, num_classes],
 287                          reuse=reuse) as scope:
 288     with slim.arg_scope([slim.batch_norm, slim.dropout],
 289                         is_training=is_training):
 290       net, end_points = inception_v1_base(inputs, scope=scope)
 291       with tf.variable_scope('Logits'):
 292         net = slim.avg_pool2d(net, [7, 7], stride=1, scope='AvgPool_0a_7x7')
 293         net = slim.dropout(net,
 294                            dropout_keep_prob, scope='Dropout_0b')
 295         logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None,
 296                              normalizer_fn=None, scope='Conv2d_0c_1x1')
 297         if spatial_squeeze:
 298           logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')
 299
 300         end_points['Logits'] = logits
 301         end_points['Predictions'] = prediction_fn(logits, scope='Predictions')
 302   return logits, end_points
 303 inception_v1.default_image_size = 224
 304
 305 inception_v1_arg_scope = inception_utils.inception_arg_scope