__author__ = 'tsungyi' import pycocotools._mask as _mask # Interface for manipulating masks stored in RLE format. # # RLE is a simple yet efficient format for storing binary masks. RLE # first divides a vector (or vectorized image) into a series of piecewise # constant regions and then for each piece simply stores the length of # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] # (note that the odd counts are always the numbers of zeros). Instead of # storing the counts directly, additional compression is achieved with a # variable bitrate representation based on a common scheme called LEB128. # # Compression is greatest given large piecewise constant regions. # Specifically, the size of the RLE is proportional to the number of # *boundaries* in M (or for an image the number of boundaries in the y # direction). Assuming fairly simple shapes, the RLE representation is # O(sqrt(n)) where n is number of pixels in the object. Hence space usage # is substantially lower, especially for large simple objects (large n). # # Many common operations on masks can be computed directly using the RLE # (without need for decoding). This includes computations such as area, # union, intersection, etc. All of these operations are linear in the # size of the RLE, in other words they are O(sqrt(n)) where n is the area # of the object. Computing these operations on the original mask is O(n). # Thus, using the RLE can result in substantial computational savings. # # The following API functions are defined: # encode - Encode binary masks using RLE. # decode - Decode binary masks encoded via RLE. # merge - Compute union or intersection of encoded masks. # iou - Compute intersection over union between masks. # area - Compute area of encoded masks. # toBbox - Get bounding boxes surrounding encoded masks. # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. # # Usage: # Rs = encode( masks ) # masks = decode( Rs ) # R = merge( Rs, intersect=false ) # o = iou( dt, gt, iscrowd ) # a = area( Rs ) # bbs = toBbox( Rs ) # Rs = frPyObjects( [pyObjects], h, w ) # # In the API the following formats are used: # Rs - [dict] Run-length encoding of binary masks # R - dict Run-length encoding of binary mask # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore # bbs - [nx4] Bounding box(es) stored as [x y w h] # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) # dt,gt - May be either bounding boxes or encoded masks # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). # # Finally, a note about the intersection over union (iou) computation. # The standard iou of a ground truth (gt) and detected (dt) object is # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) # For "crowd" regions, we use a modified criteria. If a gt object is # marked as "iscrowd", we allow a dt to match any subregion of the gt. # Choosing gt' in the crowd gt that best matches the dt can be done using # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) # For crowd gt regions we use this modified criteria above for the iou. # # To compile run "python setup.py build_ext --inplace" # Please do not contact us for help with compiling. # # Microsoft COCO Toolbox. version 2.0 # Data, paper, and tutorials available at: http://mscoco.org/ # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. # Licensed under the Simplified BSD License [see coco/license.txt] iou = _mask.iou merge = _mask.merge frPyObjects = _mask.frPyObjects def encode(bimask): if len(bimask.shape) == 3: return _mask.encode(bimask) elif len(bimask.shape) == 2: h, w = bimask.shape return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] def decode(rleObjs): if type(rleObjs) == list: return _mask.decode(rleObjs) else: return _mask.decode([rleObjs])[:,:,0] def area(rleObjs): if type(rleObjs) == list: return _mask.area(rleObjs) else: return _mask.area([rleObjs])[0] def toBbox(rleObjs): if type(rleObjs) == list: return _mask.toBbox(rleObjs) else: return _mask.toBbox([rleObjs])[0]