1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
3 # Licensed under the Apache License, Version 2.0 (the "License");
4 # you may not use this file except in compliance with the License.
5 # You may obtain a copy of the License at
7 # http://www.apache.org/licenses/LICENSE-2.0
9 # Unless required by applicable law or agreed to in writing, software
10 # distributed under the License is distributed on an "AS IS" BASIS,
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 # See the License for the specific language governing permissions and
13 # limitations under the License.
14 # ==============================================================================
15 """Contains model definitions for versions of the Oxford VGG network.
17 These model definitions were introduced in the following technical report:
19 Very Deep Convolutional Networks For Large-Scale Image Recognition
20 Karen Simonyan and Andrew Zisserman
21 arXiv technical report, 2015
22 PDF: http://arxiv.org/pdf/1409.1556.pdf
23 ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf
26 More information can be obtained from the VGG website:
27 www.robots.ox.ac.uk/~vgg/research/very_deep/
30 with slim.arg_scope(vgg.vgg_arg_scope()):
31 outputs, end_points = vgg.vgg_a(inputs)
33 with slim.arg_scope(vgg.vgg_arg_scope()):
34 outputs, end_points = vgg.vgg_16(inputs)
40 from __future__ import absolute_import
41 from __future__ import division
42 from __future__ import print_function
44 import tensorflow as tf
46 slim = tf.contrib.slim
49 def vgg_arg_scope(weight_decay=0.0005):
50 """Defines the VGG arg scope.
53 weight_decay: The l2 regularization coefficient.
58 with slim.arg_scope([slim.conv2d, slim.fully_connected],
59 activation_fn=tf.nn.relu,
60 weights_regularizer=slim.l2_regularizer(weight_decay),
61 biases_initializer=tf.zeros_initializer()):
62 with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
69 dropout_keep_prob=0.5,
72 fc_conv_padding='VALID'):
73 """Oxford Net VGG 11-Layers version A Example.
75 Note: All the fully_connected layers have been transformed to conv2d layers.
76 To use in classification mode, resize input to 224x224.
79 inputs: a tensor of size [batch_size, height, width, channels].
80 num_classes: number of predicted classes.
81 is_training: whether or not the model is being trained.
82 dropout_keep_prob: the probability that activations are kept in the dropout
83 layers during training.
84 spatial_squeeze: whether or not should squeeze the spatial dimensions of the
85 outputs. Useful to remove unnecessary dimensions for classification.
86 scope: Optional scope for the variables.
87 fc_conv_padding: the type of padding to use for the fully connected layer
88 that is implemented as a convolutional layer. Use 'SAME' padding if you
89 are applying the network in a fully convolutional manner and want to
90 get a prediction map downsampled by a factor of 32 as an output. Otherwise,
91 the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
94 the last op containing the log predictions and end_points dict.
96 with tf.variable_scope(scope, 'vgg_a', [inputs]) as sc:
97 end_points_collection = sc.name + '_end_points'
98 # Collect outputs for conv2d, fully_connected and max_pool2d.
99 with slim.arg_scope([slim.conv2d, slim.max_pool2d],
100 outputs_collections=end_points_collection):
101 net = slim.repeat(inputs, 1, slim.conv2d, 64, [3, 3], scope='conv1')
102 net = slim.max_pool2d(net, [2, 2], scope='pool1')
103 net = slim.repeat(net, 1, slim.conv2d, 128, [3, 3], scope='conv2')
104 net = slim.max_pool2d(net, [2, 2], scope='pool2')
105 net = slim.repeat(net, 2, slim.conv2d, 256, [3, 3], scope='conv3')
106 net = slim.max_pool2d(net, [2, 2], scope='pool3')
107 net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv4')
108 net = slim.max_pool2d(net, [2, 2], scope='pool4')
109 net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], scope='conv5')
110 net = slim.max_pool2d(net, [2, 2], scope='pool5')
111 # Use conv2d instead of fully_connected layers.
112 net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
113 net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
115 net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
116 net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
118 net = slim.conv2d(net, num_classes, [1, 1],
122 # Convert end_points_collection into a end_point dict.
123 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
125 net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
126 end_points[sc.name + '/fc8'] = net
127 return net, end_points
128 vgg_a.default_image_size = 224
134 dropout_keep_prob=0.5,
135 spatial_squeeze=True,
137 fc_conv_padding='VALID'):
138 """Oxford Net VGG 16-Layers version D Example.
140 Note: All the fully_connected layers have been transformed to conv2d layers.
141 To use in classification mode, resize input to 224x224.
144 inputs: a tensor of size [batch_size, height, width, channels].
145 num_classes: number of predicted classes.
146 is_training: whether or not the model is being trained.
147 dropout_keep_prob: the probability that activations are kept in the dropout
148 layers during training.
149 spatial_squeeze: whether or not should squeeze the spatial dimensions of the
150 outputs. Useful to remove unnecessary dimensions for classification.
151 scope: Optional scope for the variables.
152 fc_conv_padding: the type of padding to use for the fully connected layer
153 that is implemented as a convolutional layer. Use 'SAME' padding if you
154 are applying the network in a fully convolutional manner and want to
155 get a prediction map downsampled by a factor of 32 as an output. Otherwise,
156 the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
159 the last op containing the log predictions and end_points dict.
161 with tf.variable_scope(scope, 'vgg_16', [inputs]) as sc:
162 end_points_collection = sc.name + '_end_points'
163 # Collect outputs for conv2d, fully_connected and max_pool2d.
164 with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
165 outputs_collections=end_points_collection):
166 net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
167 net = slim.max_pool2d(net, [2, 2], scope='pool1')
168 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
169 net = slim.max_pool2d(net, [2, 2], scope='pool2')
170 net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
171 net = slim.max_pool2d(net, [2, 2], scope='pool3')
172 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
173 net = slim.max_pool2d(net, [2, 2], scope='pool4')
174 net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
175 net = slim.max_pool2d(net, [2, 2], scope='pool5')
176 # Use conv2d instead of fully_connected layers.
177 net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
178 net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
180 net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
181 net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
183 # yjr_feature = tf.squeeze(net)
184 net = slim.conv2d(net, num_classes, [1, 1],
188 # Convert end_points_collection into a end_point dict.
189 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
191 net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
192 end_points[sc.name + '/fc8'] = net
193 # end_points['yjr_feature'] = yjr_feature
194 end_points['predictions'] = slim.softmax(net, scope='predictions')
195 return net, end_points
196 vgg_16.default_image_size = 224
202 dropout_keep_prob=0.5,
203 spatial_squeeze=True,
205 fc_conv_padding='VALID'):
206 """Oxford Net VGG 19-Layers version E Example.
208 Note: All the fully_connected layers have been transformed to conv2d layers.
209 To use in classification mode, resize input to 224x224.
212 inputs: a tensor of size [batch_size, height, width, channels].
213 num_classes: number of predicted classes.
214 is_training: whether or not the model is being trained.
215 dropout_keep_prob: the probability that activations are kept in the dropout
216 layers during training.
217 spatial_squeeze: whether or not should squeeze the spatial dimensions of the
218 outputs. Useful to remove unnecessary dimensions for classification.
219 scope: Optional scope for the variables.
220 fc_conv_padding: the type of padding to use for the fully connected layer
221 that is implemented as a convolutional layer. Use 'SAME' padding if you
222 are applying the network in a fully convolutional manner and want to
223 get a prediction map downsampled by a factor of 32 as an output. Otherwise,
224 the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
227 the last op containing the log predictions and end_points dict.
229 with tf.variable_scope(scope, 'vgg_19', [inputs]) as sc:
230 end_points_collection = sc.name + '_end_points'
231 # Collect outputs for conv2d, fully_connected and max_pool2d.
232 with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
233 outputs_collections=end_points_collection):
234 net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
235 net = slim.max_pool2d(net, [2, 2], scope='pool1')
236 net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
237 net = slim.max_pool2d(net, [2, 2], scope='pool2')
238 net = slim.repeat(net, 4, slim.conv2d, 256, [3, 3], scope='conv3')
239 net = slim.max_pool2d(net, [2, 2], scope='pool3')
240 net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv4')
241 net = slim.max_pool2d(net, [2, 2], scope='pool4')
242 net = slim.repeat(net, 4, slim.conv2d, 512, [3, 3], scope='conv5')
243 net = slim.max_pool2d(net, [2, 2], scope='pool5')
244 # Use conv2d instead of fully_connected layers.
245 net = slim.conv2d(net, 4096, [7, 7], padding=fc_conv_padding, scope='fc6')
246 net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
248 net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
249 net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
251 net = slim.conv2d(net, num_classes, [1, 1],
255 # Convert end_points_collection into a end_point dict.
256 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
258 net = tf.squeeze(net, [1, 2], name='fc8/squeezed')
259 end_points[sc.name + '/fc8'] = net
260 return net, end_points
261 vgg_19.default_image_size = 224