453 lines
16 KiB
Python
453 lines
16 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
"""Functions to perform COCO evaluation."""
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
|
|
import copy
|
|
import operator
|
|
import pprint
|
|
import six
|
|
import time
|
|
|
|
import io
|
|
from PIL import Image
|
|
|
|
import numpy as np
|
|
import tensorflow as tf
|
|
|
|
from mask_rcnn.utils.logging_formatter import logging
|
|
|
|
from mask_rcnn import coco_metric
|
|
from mask_rcnn.utils import coco_utils
|
|
|
|
from mask_rcnn.object_detection import visualization_utils
|
|
|
|
import dllogger
|
|
from dllogger import Verbosity
|
|
|
|
|
|
def process_prediction_for_eval(prediction):
|
|
"""Process the model prediction for COCO eval."""
|
|
image_info = prediction['image_info']
|
|
box_coordinates = prediction['detection_boxes']
|
|
processed_box_coordinates = np.zeros_like(box_coordinates)
|
|
|
|
for image_id in range(box_coordinates.shape[0]):
|
|
scale = image_info[image_id][2]
|
|
|
|
for box_id in range(box_coordinates.shape[1]):
|
|
# Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
|
|
# Map [y1, x1, y2, x2] -> [x1, y1, w, h] and multiply detections
|
|
# by image scale.
|
|
y1, x1, y2, x2 = box_coordinates[image_id, box_id, :]
|
|
new_box = scale * np.array([x1, y1, x2 - x1, y2 - y1])
|
|
processed_box_coordinates[image_id, box_id, :] = new_box
|
|
|
|
prediction['detection_boxes'] = processed_box_coordinates
|
|
return prediction
|
|
|
|
|
|
def compute_coco_eval_metric(predictor,
|
|
num_batches=-1,
|
|
include_mask=True,
|
|
annotation_json_file="",
|
|
eval_batch_size=-1,
|
|
report_frequency=None):
|
|
"""Compute COCO eval metric given a prediction generator.
|
|
|
|
Args:
|
|
predictor: a generator that iteratively pops a dictionary of predictions
|
|
with the format compatible with COCO eval tool.
|
|
num_batches: the number of batches to be aggregated in eval. This is how
|
|
many times that the predictor gets pulled.
|
|
include_mask: a boolean that indicates whether we include the mask eval.
|
|
annotation_json_file: the annotation json file of the eval dataset.
|
|
|
|
Returns:
|
|
eval_results: the aggregated COCO metric eval results.
|
|
"""
|
|
|
|
if annotation_json_file == "":
|
|
annotation_json_file = None
|
|
|
|
use_groundtruth_from_json = (annotation_json_file is not None)
|
|
|
|
predictions = dict()
|
|
batch_idx = 0
|
|
|
|
if use_groundtruth_from_json:
|
|
eval_metric = coco_metric.EvaluationMetric(annotation_json_file, include_mask=include_mask)
|
|
|
|
else:
|
|
eval_metric = coco_metric.EvaluationMetric(filename=None, include_mask=include_mask)
|
|
|
|
def evaluation_preds(preds):
|
|
|
|
# Essential to avoid modifying the source dict
|
|
_preds = copy.deepcopy(preds)
|
|
|
|
for k, v in six.iteritems(_preds):
|
|
_preds[k] = np.concatenate(_preds[k], axis=0)
|
|
|
|
if 'orig_images' in _preds and _preds['orig_images'].shape[0] > 10:
|
|
# Only samples a few images for visualization.
|
|
_preds['orig_images'] = _preds['orig_images'][:10]
|
|
|
|
if use_groundtruth_from_json:
|
|
eval_results = eval_metric.predict_metric_fn(_preds)
|
|
|
|
else:
|
|
images, annotations = coco_utils.extract_coco_groundtruth(_preds, include_mask)
|
|
coco_dataset = coco_utils.create_coco_format_dataset(images, annotations)
|
|
eval_results = eval_metric.predict_metric_fn(_preds, groundtruth_data=coco_dataset)
|
|
|
|
return eval_results
|
|
|
|
# Take into account cuDNN & Tensorflow warmup
|
|
# Drop N first steps for avg throughput calculation
|
|
BURNIN_STEPS = 100
|
|
model_throughput_list = list()
|
|
inference_time_list = list()
|
|
|
|
while num_batches < 0 or batch_idx < num_batches:
|
|
|
|
try:
|
|
step_t0 = time.time()
|
|
step_predictions = six.next(predictor)
|
|
batch_time = time.time() - step_t0
|
|
|
|
throughput = eval_batch_size / batch_time
|
|
model_throughput_list.append(throughput)
|
|
inference_time_list.append(batch_time)
|
|
|
|
logging.info('Running inference on batch %03d/%03d... - Step Time: %.4fs - Throughput: %.1f imgs/s' % (
|
|
batch_idx + 1,
|
|
num_batches,
|
|
batch_time,
|
|
throughput
|
|
))
|
|
|
|
except StopIteration:
|
|
logging.info('Get StopIteration at %d batch.' % (batch_idx + 1))
|
|
break
|
|
|
|
step_predictions = process_prediction_for_eval(step_predictions)
|
|
|
|
for k, v in step_predictions.items():
|
|
|
|
if k not in predictions:
|
|
predictions[k] = [v]
|
|
|
|
else:
|
|
predictions[k].append(v)
|
|
|
|
batch_idx = batch_idx + 1
|
|
|
|
# If you want the report to happen each report_frequency to happen each report_frequency batches.
|
|
# Thus, each report is of eval_batch_size * report_frequency
|
|
if report_frequency and batch_idx % report_frequency == 0:
|
|
eval_results = evaluation_preds(preds=predictions)
|
|
logging.info('Eval results: %s' % pprint.pformat(eval_results, indent=4))
|
|
|
|
inference_time_list.sort()
|
|
eval_results = evaluation_preds(preds=predictions)
|
|
|
|
average_time = np.mean(inference_time_list)
|
|
latency_50 = max(inference_time_list[:int(len(inference_time_list) * 0.5)])
|
|
latency_90 = max(inference_time_list[:int(len(inference_time_list) * 0.90)])
|
|
latency_95 = max(inference_time_list[:int(len(inference_time_list) * 0.95)])
|
|
latency_99 = max(inference_time_list[:int(len(inference_time_list) * 0.99)])
|
|
latency_100 = max(inference_time_list[:int(len(inference_time_list) * 1)])
|
|
|
|
print() # Visual Spacing
|
|
logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
|
|
logging.info(" Evaluation Performance Summary ")
|
|
logging.info("# @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ #")
|
|
|
|
total_processing_hours, rem = divmod(np.sum(model_throughput_list), 3600)
|
|
total_processing_minutes, total_processing_seconds = divmod(rem, 60)
|
|
|
|
if len(model_throughput_list) > BURNIN_STEPS:
|
|
# Take into account cuDNN & Tensorflow warmup
|
|
# Drop N first steps for avg throughput calculation
|
|
# Also drop last step which may have a different batch size
|
|
avg_throughput = np.mean(model_throughput_list[BURNIN_STEPS:-1])
|
|
else:
|
|
avg_throughput = -1.
|
|
|
|
print() # Visual Spacing
|
|
logging.info("Average throughput: {throughput:.1f} samples/sec".format(throughput=avg_throughput))
|
|
logging.info("Inference Latency Average (s) = {avg:.4f}".format(avg=average_time))
|
|
logging.info("Inference Latency 50% (s) = {cf_50:.4f}".format(cf_50=latency_50))
|
|
logging.info("Inference Latency 90% (s) = {cf_90:.4f}".format(cf_90=latency_90))
|
|
logging.info("Inference Latency 95% (s) = {cf_95:.4f}".format(cf_95=latency_95))
|
|
logging.info("Inference Latency 99% (s) = {cf_99:.4f}".format(cf_99=latency_99))
|
|
logging.info("Inference Latency 100% (s) = {cf_100:.4f}".format(cf_100=latency_100))
|
|
logging.info("Total processed steps: {total_steps}".format(total_steps=len(model_throughput_list)))
|
|
logging.info(
|
|
"Total processing time: {hours}h {minutes:02d}m {seconds:02d}s".format(
|
|
hours=total_processing_hours,
|
|
minutes=int(total_processing_minutes),
|
|
seconds=int(total_processing_seconds)
|
|
)
|
|
)
|
|
dllogger.log(step=(), data={"avg_inference_throughput": avg_throughput}, verbosity=Verbosity.DEFAULT)
|
|
avg_inference_time = float(total_processing_hours * 3600 + int(total_processing_minutes) * 60 +
|
|
int(total_processing_seconds))
|
|
dllogger.log(step=(), data={"avg_inference_time": avg_inference_time}, verbosity=Verbosity.DEFAULT)
|
|
logging.info("==================== Metrics ====================")
|
|
|
|
# logging.info('Eval Epoch results: %s' % pprint.pformat(eval_results, indent=4))
|
|
for key, value in sorted(eval_results.items(), key=operator.itemgetter(0)):
|
|
logging.info("%s: %.9f" % (key, value))
|
|
print() # Visual Spacing
|
|
|
|
return eval_results, predictions
|
|
|
|
|
|
def evaluate(eval_estimator,
|
|
input_fn,
|
|
num_eval_samples,
|
|
eval_batch_size,
|
|
include_mask=True,
|
|
validation_json_file="",
|
|
report_frequency=None):
|
|
|
|
"""Runs COCO evaluation once."""
|
|
predictor = eval_estimator.predict(
|
|
input_fn=input_fn,
|
|
yield_single_examples=False
|
|
)
|
|
|
|
# Every predictor.next() gets a batch of prediction (a dictionary).
|
|
num_eval_times = num_eval_samples // eval_batch_size
|
|
assert num_eval_times > 0, 'num_eval_samples must be >= eval_batch_size!'
|
|
|
|
eval_results, predictions = compute_coco_eval_metric(
|
|
predictor,
|
|
num_eval_times,
|
|
include_mask,
|
|
validation_json_file,
|
|
eval_batch_size=eval_batch_size,
|
|
report_frequency=report_frequency
|
|
)
|
|
|
|
return eval_results, predictions
|
|
|
|
|
|
def write_summary(eval_results, summary_dir, current_step, predictions=None):
|
|
"""Write out eval results for the checkpoint."""
|
|
with tf.Graph().as_default():
|
|
summaries = []
|
|
|
|
# Summary writer writes out eval metrics.
|
|
try:
|
|
# Tensorflow 1.x
|
|
summary_writer = tf.compat.v1.summary.FileWriter(summary_dir)
|
|
except AttributeError:
|
|
# Tensorflow 2.x
|
|
summary_writer = tf.summary.create_file_writer(summary_dir)
|
|
summary_writer.as_default()
|
|
|
|
eval_results_dict = {}
|
|
for metric in eval_results:
|
|
try:
|
|
summaries.append(tf.compat.v1.Summary.Value(tag=metric, simple_value=eval_results[metric]))
|
|
eval_results_dict[metric] = float(eval_results[metric])
|
|
|
|
except AttributeError:
|
|
tf.summary.scalar(name=metric, data=eval_results[metric], step=current_step)
|
|
eval_results_dict[metric] = float(eval_results[metric])
|
|
dllogger.log(step=(), data=eval_results_dict, verbosity=Verbosity.DEFAULT)
|
|
|
|
if isinstance(predictions, dict) and predictions:
|
|
images_summary = get_image_summary(predictions, current_step)
|
|
|
|
try:
|
|
summaries += images_summary
|
|
except TypeError:
|
|
summaries.append(images_summary)
|
|
|
|
try:
|
|
# tf_summaries = tf.compat.v1.Summary(value=list(summaries))
|
|
tf_summaries = tf.compat.v1.Summary(value=summaries)
|
|
summary_writer.add_summary(tf_summaries, current_step)
|
|
summary_writer.flush()
|
|
|
|
except AttributeError:
|
|
tf.summary.flush(summary_writer)
|
|
|
|
|
|
def generate_image_preview(image, boxes, scores, classes, gt_boxes=None, segmentations=None):
|
|
"""Creates an image summary given predictions."""
|
|
max_boxes_to_draw = 100
|
|
min_score_thresh = 0.1
|
|
|
|
# Visualizes the predicitons.
|
|
image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
|
|
image,
|
|
boxes,
|
|
classes=classes,
|
|
scores=scores,
|
|
category_index={},
|
|
instance_masks=segmentations,
|
|
use_normalized_coordinates=False,
|
|
max_boxes_to_draw=max_boxes_to_draw,
|
|
min_score_thresh=min_score_thresh,
|
|
agnostic_mode=False
|
|
)
|
|
|
|
if gt_boxes is not None:
|
|
# Visualizes the groundtruth boxes. They are in black by default.
|
|
image_with_detections = visualization_utils.visualize_boxes_and_labels_on_image_array(
|
|
image_with_detections,
|
|
gt_boxes,
|
|
classes=None,
|
|
scores=None,
|
|
category_index={},
|
|
use_normalized_coordinates=False,
|
|
max_boxes_to_draw=max_boxes_to_draw,
|
|
agnostic_mode=True
|
|
)
|
|
|
|
return image_with_detections
|
|
|
|
|
|
def generate_image_buffer(input_image):
|
|
buf = io.BytesIO()
|
|
w, h = input_image.shape[:2]
|
|
ratio = 1024 / w
|
|
new_size = [int(w * ratio), int(h * ratio)]
|
|
|
|
image = Image.fromarray(input_image.astype(np.uint8))
|
|
image.thumbnail(new_size)
|
|
image.save(buf, format='png')
|
|
|
|
return buf.getvalue()
|
|
|
|
|
|
def get_image_summary(predictions, current_step, max_images=10):
|
|
"""Write out image and prediction for summary."""
|
|
|
|
if 'orig_images' not in predictions:
|
|
logging.info('Missing orig_images in predictions: %s', predictions.keys())
|
|
return
|
|
|
|
max_images = min(
|
|
len(predictions['orig_images']) * predictions['orig_images'][0].shape[0],
|
|
max_images
|
|
)
|
|
|
|
_detection_boxes = np.concatenate(predictions['detection_boxes'], axis=0)
|
|
_detection_scores = np.concatenate(predictions['detection_scores'], axis=0)
|
|
_detection_classes = np.concatenate(predictions['detection_classes'], axis=0)
|
|
_image_info = np.concatenate(predictions['image_info'], axis=0)
|
|
_num_detections = np.concatenate(predictions['num_detections'], axis=0)
|
|
_orig_images = np.concatenate(predictions['orig_images'], axis=0)
|
|
|
|
if 'detection_masks' in predictions:
|
|
_detection_masks = np.concatenate(predictions['detection_masks'], axis=0)
|
|
else:
|
|
_detection_masks = None
|
|
|
|
if 'groundtruth_boxes' in predictions:
|
|
_groundtruth_boxes = np.concatenate(predictions['groundtruth_boxes'], axis=0)
|
|
else:
|
|
_groundtruth_boxes = None
|
|
|
|
_orig_images = _orig_images * 255
|
|
_orig_images = _orig_images.astype(np.uint8)
|
|
|
|
image_previews = []
|
|
|
|
for i in range(max_images):
|
|
num_detections = min(len(_detection_boxes[i]), int(_num_detections[i]))
|
|
|
|
detection_boxes = _detection_boxes[i][:num_detections]
|
|
detection_scores = _detection_scores[i][:num_detections]
|
|
detection_classes = _detection_classes[i][:num_detections]
|
|
|
|
image = _orig_images[i]
|
|
image_height = image.shape[0]
|
|
image_width = image.shape[1]
|
|
|
|
# Rescale the box to fit the visualization image.
|
|
h, w = _image_info[i][3:5]
|
|
detection_boxes = detection_boxes / np.array([w, h, w, h])
|
|
detection_boxes = detection_boxes * np.array([image_width, image_height, image_width, image_height])
|
|
|
|
if _groundtruth_boxes is not None:
|
|
gt_boxes = _groundtruth_boxes[i]
|
|
gt_boxes = gt_boxes * np.array([image_height, image_width, image_height, image_width])
|
|
else:
|
|
gt_boxes = None
|
|
|
|
if _detection_masks is not None:
|
|
instance_masks = _detection_masks[i][0:num_detections]
|
|
segmentations = coco_metric.generate_segmentation_from_masks(
|
|
instance_masks,
|
|
detection_boxes,
|
|
image_height,
|
|
image_width
|
|
)
|
|
else:
|
|
segmentations = None
|
|
|
|
# From [x, y, w, h] to [x1, y1, x2, y2] and
|
|
# process_prediction_for_eval() set the box to be [x, y] format, need to
|
|
# reverted them to [y, x] format.
|
|
xmin, ymin, w, h = np.split(detection_boxes, 4, axis=-1)
|
|
xmax = xmin + w
|
|
ymax = ymin + h
|
|
|
|
boxes_to_visualize = np.concatenate([ymin, xmin, ymax, xmax], axis=-1)
|
|
|
|
image_preview = generate_image_preview(
|
|
image,
|
|
boxes=boxes_to_visualize,
|
|
scores=detection_scores,
|
|
classes=detection_classes.astype(np.int32),
|
|
gt_boxes=gt_boxes,
|
|
segmentations=segmentations
|
|
)
|
|
image_previews.append(image_preview)
|
|
|
|
try:
|
|
summaries = []
|
|
|
|
for i, image_preview in enumerate(image_previews):
|
|
image_buffer = generate_image_buffer(image_preview)
|
|
image_summary = tf.compat.v1.Summary.Image(encoded_image_string=image_buffer)
|
|
image_value = tf.compat.v1.Summary.Value(tag='%d_input' % i, image=image_summary)
|
|
|
|
summaries.append(image_value)
|
|
|
|
except AttributeError:
|
|
image_previews = np.array(image_previews)
|
|
summaries = tf.summary.image(
|
|
name='image_summary',
|
|
data=image_previews,
|
|
step=current_step,
|
|
max_outputs=max_images
|
|
)
|
|
|
|
return summaries
|