DeepLearningExamples/TensorFlow2/Segmentation/MaskRCNN/mask_rcnn/coco_metric.py
2020-03-05 09:49:01 +01:00

352 lines
13 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""COCO-style evaluation metrics.
Implements the interface of COCO API and metric_fn in tf.TPUEstimator.
COCO API: github.com/cocodataset/cocoapi/
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import atexit
import copy
import tempfile
import numpy as np
import tensorflow as tf
from mask_rcnn.utils.logging_formatter import logging
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import pycocotools.mask as maskUtils
import cv2
class MaskCOCO(COCO):
"""COCO object for mask evaluation.
"""
def reset(self, dataset):
"""Reset the dataset and groundtruth data index in this object.
Args:
dataset: dict of groundtruth data. It should has similar structure as the
COCO groundtruth JSON file. Must contains three keys: {'images',
'annotations', 'categories'}.
'images': list of image information dictionary. Required keys: 'id',
'width' and 'height'.
'annotations': list of dict. Bounding boxes and segmentations related
information. Required keys: {'id', 'image_id', 'category_id', 'bbox',
'iscrowd', 'area', 'segmentation'}.
'categories': list of dict of the category information.
Required key: 'id'.
Refer to http://cocodataset.org/#format-data for more details.
Raises:
AttributeError: If the dataset is empty or not a dict.
"""
assert dataset, 'Groundtruth should not be empty.'
assert isinstance(dataset,
dict), 'annotation file format {} not supported'.format(
type(dataset))
self.anns, self.cats, self.imgs = dict(), dict(), dict()
self.dataset = copy.deepcopy(dataset)
self.createIndex()
def loadRes(self, detection_results, include_mask, is_image_mask=False):
"""Load result file and return a result api object.
Args:
detection_results: a dictionary containing predictions results.
include_mask: a boolean, whether to include mask in detection results.
is_image_mask: a boolean, where the predict mask is a whole image mask.
Returns:
res: result MaskCOCO api object
"""
res = MaskCOCO()
res.dataset['images'] = [img for img in self.dataset['images']]
logging.info('Loading and preparing results...')
predictions = self.load_predictions(
detection_results,
include_mask=include_mask,
is_image_mask=is_image_mask)
assert isinstance(predictions, list), 'results in not an array of objects'
if predictions:
image_ids = [pred['image_id'] for pred in predictions]
assert set(image_ids) == (set(image_ids) & set(self.getImgIds())), \
'Results do not correspond to current coco set'
if (predictions and 'bbox' in predictions[0] and predictions[0]['bbox']):
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for idx, pred in enumerate(predictions):
bb = pred['bbox']
x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
if 'segmentation' not in pred:
pred['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
pred['area'] = bb[2] * bb[3]
pred['id'] = idx + 1
pred['iscrowd'] = 0
elif 'segmentation' in predictions[0]:
res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
for idx, pred in enumerate(predictions):
# now only support compressed RLE format as segmentation results
pred['area'] = maskUtils.area(pred['segmentation'])
if 'bbox' not in pred:
pred['bbox'] = maskUtils.toBbox(pred['segmentation'])
pred['id'] = idx + 1
pred['iscrowd'] = 0
res.dataset['annotations'] = predictions
res.createIndex()
return res
def load_predictions(self,
detection_results,
include_mask,
is_image_mask=False):
"""Create prediction dictionary list from detection and mask results.
Args:
detection_results: a dictionary containing numpy arrays which corresponds
to prediction results.
include_mask: a boolean, whether to include mask in detection results.
is_image_mask: a boolean, where the predict mask is a whole image mask.
Returns:
a list of dictionary including different prediction results from the model
in numpy form.
"""
predictions = []
num_detections = detection_results['detection_scores'].size
current_index = 0
for i, image_id in enumerate(detection_results['source_id']):
if include_mask:
box_coorindates_in_image = detection_results['detection_boxes'][i]
segments = generate_segmentation_from_masks(
detection_results['detection_masks'][i],
box_coorindates_in_image,
int(detection_results['image_info'][i][3]),
int(detection_results['image_info'][i][4]),
is_image_mask=is_image_mask
)
# Convert the mask to uint8 and then to fortranarray for RLE encoder.
encoded_masks = [
maskUtils.encode(np.asfortranarray(instance_mask.astype(np.uint8)))
for instance_mask in segments
]
for box_index in range(int(detection_results['num_detections'][i])):
if current_index % 1000 == 0:
logging.info('{}/{}'.format(current_index, num_detections))
current_index += 1
prediction = {
'image_id': int(image_id),
'bbox': detection_results['detection_boxes'][i][box_index].tolist(),
'score': detection_results['detection_scores'][i][box_index],
'category_id': int(
detection_results['detection_classes'][i][box_index]),
}
if include_mask:
prediction['segmentation'] = encoded_masks[box_index]
predictions.append(prediction)
return predictions
def generate_segmentation_from_masks(masks,
detected_boxes,
image_height,
image_width,
is_image_mask=False):
"""Generates segmentation result from instance masks.
Args:
masks: a numpy array of shape [N, mask_height, mask_width] representing the
instance masks w.r.t. the `detected_boxes`.
detected_boxes: a numpy array of shape [N, 4] representing the reference
bounding boxes.
image_height: an integer representing the height of the image.
image_width: an integer representing the width of the image.
is_image_mask: bool. True: input masks are whole-image masks. False: input
masks are bounding-box level masks.
Returns:
segms: a numpy array of shape [N, image_height, image_width] representing
the instance masks *pasted* on the image canvas.
"""
def expand_boxes(boxes, scale):
"""Expands an array of boxes by a given scale."""
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/utils/boxes.py#L227
# The `boxes` in the reference implementation is in [x1, y1, x2, y2] form,
# whereas `boxes` here is in [x1, y1, w, h] form
w_half = boxes[:, 2] * .5
h_half = boxes[:, 3] * .5
x_c = boxes[:, 0] + w_half
y_c = boxes[:, 1] + h_half
w_half *= scale
h_half *= scale
boxes_exp = np.zeros(boxes.shape)
boxes_exp[:, 0] = x_c - w_half
boxes_exp[:, 2] = x_c + w_half
boxes_exp[:, 1] = y_c - h_half
boxes_exp[:, 3] = y_c + h_half
return boxes_exp
# Reference: https://github.com/facebookresearch/Detectron/blob/master/detectron/core/test.py#L812
# To work around an issue with cv2.resize (it seems to automatically pad
# with repeated border values), we manually zero-pad the masks by 1 pixel
# prior to resizing back to the original image resolution. This prevents
# "top hat" artifacts. We therefore need to expand the reference boxes by an
# appropriate factor.
_, mask_height, mask_width = masks.shape
scale = max((mask_width + 2.0) / mask_width,
(mask_height + 2.0) / mask_height)
ref_boxes = expand_boxes(detected_boxes, scale)
ref_boxes = ref_boxes.astype(np.int32)
padded_mask = np.zeros((mask_height + 2, mask_width + 2), dtype=np.float32)
segms = []
for mask_ind, mask in enumerate(masks):
im_mask = np.zeros((image_height, image_width), dtype=np.uint8)
if is_image_mask:
# Process whole-image masks.
im_mask[:, :] = mask[:, :]
else:
# Process mask inside bounding boxes.
padded_mask[1:-1, 1:-1] = mask[:, :]
ref_box = ref_boxes[mask_ind, :]
w = ref_box[2] - ref_box[0] + 1
h = ref_box[3] - ref_box[1] + 1
w = np.maximum(w, 1)
h = np.maximum(h, 1)
mask = cv2.resize(padded_mask, (w, h))
mask = np.array(mask > 0.5, dtype=np.uint8)
x_0 = max(ref_box[0], 0)
x_1 = min(ref_box[2] + 1, image_width)
y_0 = max(ref_box[1], 0)
y_1 = min(ref_box[3] + 1, image_height)
im_mask[y_0:y_1, x_0:x_1] = mask[(y_0 - ref_box[1]):(y_1 - ref_box[1]), (
x_0 - ref_box[0]):(x_1 - ref_box[0])]
segms.append(im_mask)
segms = np.array(segms)
assert masks.shape[0] == segms.shape[0]
return segms
class EvaluationMetric(object):
"""COCO evaluation metric class."""
def __init__(self, filename, include_mask):
"""Constructs COCO evaluation class.
The class provides the interface to metrics_fn in TPUEstimator. The
_evaluate() loads a JSON file in COCO annotation format as the
groundtruths and runs COCO evaluation.
Args:
filename: Ground truth JSON file name. If filename is None, use
groundtruth data passed from the dataloader for evaluation.
include_mask: boolean to indicate whether or not to include mask eval.
"""
if filename:
if filename.startswith('gs://'):
_, local_val_json = tempfile.mkstemp(suffix='.json')
tf.io.gfile.remove(local_val_json)
tf.io.gfile.copy(filename, local_val_json)
atexit.register(tf.io.gfile.remove, local_val_json)
else:
local_val_json = filename
self.coco_gt = MaskCOCO(local_val_json)
self.filename = filename
self.metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
self._include_mask = include_mask
if self._include_mask:
mask_metric_names = ['mask_' + x for x in self.metric_names]
self.metric_names.extend(mask_metric_names)
self._reset()
def _reset(self):
"""Reset COCO API object."""
if self.filename is None and not hasattr(self, 'coco_gt'):
self.coco_gt = MaskCOCO()
def predict_metric_fn(self,
predictions,
is_predict_image_mask=False,
groundtruth_data=None):
"""Generates COCO metrics."""
image_ids = list(set(predictions['source_id']))
if groundtruth_data is not None:
self.coco_gt.reset(groundtruth_data)
coco_dt = self.coco_gt.loadRes(
predictions, self._include_mask, is_image_mask=is_predict_image_mask)
coco_eval = COCOeval(self.coco_gt, coco_dt, iouType='bbox')
coco_eval.params.imgIds = image_ids
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()
coco_metrics = coco_eval.stats
if self._include_mask:
# Create another object for instance segmentation metric evaluation.
mcoco_eval = COCOeval(self.coco_gt, coco_dt, iouType='segm')
mcoco_eval.params.imgIds = image_ids
mcoco_eval.evaluate()
mcoco_eval.accumulate()
mcoco_eval.summarize()
mask_coco_metrics = mcoco_eval.stats
if self._include_mask:
metrics = np.hstack((coco_metrics, mask_coco_metrics))
else:
metrics = coco_metrics
# clean up after evaluation is done.
self._reset()
metrics = metrics.astype(np.float32)
metrics_dict = {}
for i, name in enumerate(self.metric_names):
metrics_dict[name] = metrics[i]
return metrics_dict