3 # Interface for accessing the Microsoft COCO dataset.
5 # Microsoft COCO is a large image dataset designed for object detection,
6 # segmentation, and caption generation. pycocotools is a Python API that
7 # assists in loading, parsing and visualizing the annotations in COCO.
8 # Please visit http://mscoco.org/ for more information on COCO, including
9 # for the data, paper, and tutorials. The exact format of the annotations
10 # is also described on the COCO website. For example usage of the pycocotools
11 # please see pycocotools_demo.ipynb. In addition to this API, please download both
12 # the COCO images and annotations in order to run the demo.
14 # An alternative to using the API is to load the annotations directly
15 # into Python dictionary
16 # Using the API provides additional utility functions. Note that this API
17 # supports both *instance* and *caption* annotations. In the case of
18 # captions not all functions are defined (e.g. categories are undefined).
20 # The following API functions are defined:
21 # COCO - COCO api class that loads COCO annotation file and prepare data structures.
22 # decodeMask - Decode binary mask M encoded via run-length encoding.
23 # encodeMask - Encode binary mask M using run-length encoding.
24 # getAnnIds - Get ann ids that satisfy given filter conditions.
25 # getCatIds - Get cat ids that satisfy given filter conditions.
26 # getImgIds - Get img ids that satisfy given filter conditions.
27 # loadAnns - Load anns with the specified ids.
28 # loadCats - Load cats with the specified ids.
29 # loadImgs - Load imgs with the specified ids.
30 # annToMask - Convert segmentation in an annotation to binary mask.
31 # showAnns - Display the specified annotations.
32 # loadRes - Load algorithm results and create API for accessing them.
33 # download - Download COCO images from mscoco.org server.
34 # Throughout the API "ann"=annotation, "cat"=category, and "img"=image.
35 # Help on each functions can be accessed by: "help COCO>function".
37 # See also COCO>decodeMask,
38 # COCO>encodeMask, COCO>getAnnIds, COCO>getCatIds,
39 # COCO>getImgIds, COCO>loadAnns, COCO>loadCats,
40 # COCO>loadImgs, COCO>annToMask, COCO>showAnns
42 # Microsoft COCO Toolbox. version 2.0
43 # Data, paper, and tutorials available at: http://mscoco.org/
44 # Code written by Piotr Dollar and Tsung-Yi Lin, 2014.
45 # Licensed under the Simplified BSD License [see bsd.txt]
49 import matplotlib.pyplot as plt
50 from matplotlib.collections import PatchCollection
51 from matplotlib.patches import Polygon
55 from . import mask as maskUtils
57 from collections import defaultdict
59 PYTHON_VERSION = sys.version_info[0]
60 if PYTHON_VERSION == 2:
61 from urllib import urlretrieve
62 elif PYTHON_VERSION == 3:
63 from urllib.request import urlretrieve
66 def _isArrayLike(obj):
67 return hasattr(obj, '__iter__') and hasattr(obj, '__len__')
71 def __init__(self, annotation_file=None):
73 Constructor of Microsoft COCO helper class for reading and visualizing annotations.
74 :param annotation_file (str): location of annotation file
75 :param image_folder (str): location to the folder that hosts images.
79 self.dataset,self.anns,self.cats,self.imgs = dict(),dict(),dict(),dict()
80 self.imgToAnns, self.catToImgs = defaultdict(list), defaultdict(list)
81 if not annotation_file == None:
82 print('loading annotations into memory...')
84 dataset = json.load(open(annotation_file, 'r'))
85 assert type(dataset)==dict, 'annotation file format {} not supported'.format(type(dataset))
86 print('Done (t={:0.2f}s)'.format(time.time()- tic))
87 self.dataset = dataset
90 def createIndex(self):
92 print('creating index...')
93 anns, cats, imgs = {}, {}, {}
94 imgToAnns,catToImgs = defaultdict(list),defaultdict(list)
95 if 'annotations' in self.dataset:
96 for ann in self.dataset['annotations']:
97 imgToAnns[ann['image_id']].append(ann)
100 if 'images' in self.dataset:
101 for img in self.dataset['images']:
102 imgs[img['id']] = img
104 if 'categories' in self.dataset:
105 for cat in self.dataset['categories']:
106 cats[cat['id']] = cat
108 if 'annotations' in self.dataset and 'categories' in self.dataset:
109 for ann in self.dataset['annotations']:
110 catToImgs[ann['category_id']].append(ann['image_id'])
112 print('index created!')
114 # create class members
116 self.imgToAnns = imgToAnns
117 self.catToImgs = catToImgs
123 Print information about the annotation file.
126 for key, value in self.dataset['info'].items():
127 print('{}: {}'.format(key, value))
129 def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
131 Get ann ids that satisfy given filter conditions. default skips that filter
132 :param imgIds (int array) : get anns for given imgs
133 catIds (int array) : get anns for given cats
134 areaRng (float array) : get anns for given area range (e.g. [0 inf])
135 iscrowd (boolean) : get anns for given crowd label (False or True)
136 :return: ids (int array) : integer array of ann ids
138 imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
139 catIds = catIds if _isArrayLike(catIds) else [catIds]
141 if len(imgIds) == len(catIds) == len(areaRng) == 0:
142 anns = self.dataset['annotations']
144 if not len(imgIds) == 0:
145 lists = [self.imgToAnns[imgId] for imgId in imgIds if imgId in self.imgToAnns]
146 anns = list(itertools.chain.from_iterable(lists))
148 anns = self.dataset['annotations']
149 anns = anns if len(catIds) == 0 else [ann for ann in anns if ann['category_id'] in catIds]
150 anns = anns if len(areaRng) == 0 else [ann for ann in anns if ann['area'] > areaRng[0] and ann['area'] < areaRng[1]]
151 if not iscrowd == None:
152 ids = [ann['id'] for ann in anns if ann['iscrowd'] == iscrowd]
154 ids = [ann['id'] for ann in anns]
157 def getCatIds(self, catNms=[], supNms=[], catIds=[]):
159 filtering parameters. default skips that filter.
160 :param catNms (str array) : get cats for given cat names
161 :param supNms (str array) : get cats for given supercategory names
162 :param catIds (int array) : get cats for given cat ids
163 :return: ids (int array) : integer array of cat ids
165 catNms = catNms if _isArrayLike(catNms) else [catNms]
166 supNms = supNms if _isArrayLike(supNms) else [supNms]
167 catIds = catIds if _isArrayLike(catIds) else [catIds]
169 if len(catNms) == len(supNms) == len(catIds) == 0:
170 cats = self.dataset['categories']
172 cats = self.dataset['categories']
173 cats = cats if len(catNms) == 0 else [cat for cat in cats if cat['name'] in catNms]
174 cats = cats if len(supNms) == 0 else [cat for cat in cats if cat['supercategory'] in supNms]
175 cats = cats if len(catIds) == 0 else [cat for cat in cats if cat['id'] in catIds]
176 ids = [cat['id'] for cat in cats]
179 def getImgIds(self, imgIds=[], catIds=[]):
181 Get img ids that satisfy given filter conditions.
182 :param imgIds (int array) : get imgs for given ids
183 :param catIds (int array) : get imgs with all given cats
184 :return: ids (int array) : integer array of img ids
186 imgIds = imgIds if _isArrayLike(imgIds) else [imgIds]
187 catIds = catIds if _isArrayLike(catIds) else [catIds]
189 if len(imgIds) == len(catIds) == 0:
190 ids = self.imgs.keys()
193 for i, catId in enumerate(catIds):
194 if i == 0 and len(ids) == 0:
195 ids = set(self.catToImgs[catId])
197 ids &= set(self.catToImgs[catId])
200 def loadAnns(self, ids=[]):
202 Load anns with the specified ids.
203 :param ids (int array) : integer ids specifying anns
204 :return: anns (object array) : loaded ann objects
206 if _isArrayLike(ids):
207 return [self.anns[id] for id in ids]
208 elif type(ids) == int:
209 return [self.anns[ids]]
211 def loadCats(self, ids=[]):
213 Load cats with the specified ids.
214 :param ids (int array) : integer ids specifying cats
215 :return: cats (object array) : loaded cat objects
217 if _isArrayLike(ids):
218 return [self.cats[id] for id in ids]
219 elif type(ids) == int:
220 return [self.cats[ids]]
222 def loadImgs(self, ids=[]):
224 Load anns with the specified ids.
225 :param ids (int array) : integer ids specifying img
226 :return: imgs (object array) : loaded img objects
228 if _isArrayLike(ids):
229 return [self.imgs[id] for id in ids]
230 elif type(ids) == int:
231 return [self.imgs[ids]]
233 def showAnns(self, anns):
235 Display the specified annotations.
236 :param anns (array of object): annotations to display
241 if 'segmentation' in anns[0] or 'keypoints' in anns[0]:
242 datasetType = 'instances'
243 elif 'caption' in anns[0]:
244 datasetType = 'captions'
246 raise Exception('datasetType not supported')
247 if datasetType == 'instances':
249 ax.set_autoscale_on(False)
253 c = (np.random.random((1, 3))*0.6+0.4).tolist()[0]
254 if 'segmentation' in ann:
255 if type(ann['segmentation']) == list:
257 for seg in ann['segmentation']:
258 poly = np.array(seg).reshape((int(len(seg)/2), 2))
259 polygons.append(Polygon(poly))
263 t = self.imgs[ann['image_id']]
264 if type(ann['segmentation']['counts']) == list:
265 rle = maskUtils.frPyObjects([ann['segmentation']], t['height'], t['width'])
267 rle = [ann['segmentation']]
268 m = maskUtils.decode(rle)
269 img = np.ones( (m.shape[0], m.shape[1], 3) )
270 if ann['iscrowd'] == 1:
271 color_mask = np.array([2.0,166.0,101.0])/255
272 if ann['iscrowd'] == 0:
273 color_mask = np.random.random((1, 3)).tolist()[0]
275 img[:,:,i] = color_mask[i]
276 ax.imshow(np.dstack( (img, m*0.5) ))
277 if 'keypoints' in ann and type(ann['keypoints']) == list:
278 # turn skeleton into zero-based index
279 sks = np.array(self.loadCats(ann['category_id'])[0]['skeleton'])-1
280 kp = np.array(ann['keypoints'])
286 plt.plot(x[sk],y[sk], linewidth=3, color=c)
287 plt.plot(x[v>0], y[v>0],'o',markersize=8, markerfacecolor=c, markeredgecolor='k',markeredgewidth=2)
288 plt.plot(x[v>1], y[v>1],'o',markersize=8, markerfacecolor=c, markeredgecolor=c, markeredgewidth=2)
289 p = PatchCollection(polygons, facecolor=color, linewidths=0, alpha=0.4)
291 p = PatchCollection(polygons, facecolor='none', edgecolors=color, linewidths=2)
293 elif datasetType == 'captions':
295 print(ann['caption'])
297 def loadRes(self, resFile):
299 Load result file and return a result api object.
300 :param resFile (str) : file name of result file
301 :return: res (obj) : result api object
304 res.dataset['images'] = [img for img in self.dataset['images']]
306 print('Loading and preparing results...')
308 if type(resFile) == str or type(resFile) == unicode:
309 anns = json.load(open(resFile))
310 elif type(resFile) == np.ndarray:
311 anns = self.loadNumpyAnnotations(resFile)
314 assert type(anns) == list, 'results in not an array of objects'
315 annsImgIds = [ann['image_id'] for ann in anns]
316 assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
317 'Results do not correspond to current coco set'
318 if 'caption' in anns[0]:
319 imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
320 res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
321 for id, ann in enumerate(anns):
323 elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
324 res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
325 for id, ann in enumerate(anns):
327 x1, x2, y1, y2 = [bb[0], bb[0]+bb[2], bb[1], bb[1]+bb[3]]
328 if not 'segmentation' in ann:
329 ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
330 ann['area'] = bb[2]*bb[3]
333 elif 'segmentation' in anns[0]:
334 res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
335 for id, ann in enumerate(anns):
336 # now only support compressed RLE format as segmentation results
337 ann['area'] = maskUtils.area(ann['segmentation'])
338 if not 'bbox' in ann:
339 ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
342 elif 'keypoints' in anns[0]:
343 res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
344 for id, ann in enumerate(anns):
348 x0,x1,y0,y1 = np.min(x), np.max(x), np.min(y), np.max(y)
349 ann['area'] = (x1-x0)*(y1-y0)
351 ann['bbox'] = [x0,y0,x1-x0,y1-y0]
352 print('DONE (t={:0.2f}s)'.format(time.time()- tic))
354 res.dataset['annotations'] = anns
358 def download(self, tarDir = None, imgIds = [] ):
360 Download COCO images from mscoco.org server.
361 :param tarDir (str): COCO results directory name
362 imgIds (list): images to be downloaded
366 print('Please specify target directory')
369 imgs = self.imgs.values()
371 imgs = self.loadImgs(imgIds)
373 if not os.path.exists(tarDir):
375 for i, img in enumerate(imgs):
377 fname = os.path.join(tarDir, img['file_name'])
378 if not os.path.exists(fname):
379 urlretrieve(img['coco_url'], fname)
380 print('downloaded {}/{} images (t={:0.1f}s)'.format(i, N, time.time()- tic))
382 def loadNumpyAnnotations(self, data):
384 Convert result data from a numpy array [Nx7] where each row contains {imageID,x1,y1,w,h,score,class}
385 :param data (numpy.ndarray)
386 :return: annotations (python nested list)
388 print('Converting ndarray to lists...')
389 assert(type(data) == np.ndarray)
391 assert(data.shape[1] == 7)
396 print('{}/{}'.format(i,N))
398 'image_id' : int(data[i, 0]),
399 'bbox' : [ data[i, 1], data[i, 2], data[i, 3], data[i, 4] ],
400 'score' : data[i, 5],
401 'category_id': int(data[i, 6]),
405 def annToRLE(self, ann):
407 Convert annotation which can be polygons, uncompressed RLE to RLE.
408 :return: binary mask (numpy 2D array)
410 t = self.imgs[ann['image_id']]
411 h, w = t['height'], t['width']
412 segm = ann['segmentation']
413 if type(segm) == list:
414 # polygon -- a single object might consist of multiple parts
415 # we merge all parts into one mask rle code
416 rles = maskUtils.frPyObjects(segm, h, w)
417 rle = maskUtils.merge(rles)
418 elif type(segm['counts']) == list:
420 rle = maskUtils.frPyObjects(segm, h, w)
423 rle = ann['segmentation']
426 def annToMask(self, ann):
428 Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
429 :return: binary mask (numpy 2D array)
431 rle = self.annToRLE(ann)
432 m = maskUtils.decode(rle)