[UNetmed/TF] Fix registered metric names
This commit is contained in:
parent
8d31298c56
commit
03a93a1cb2
|
@ -50,5 +50,6 @@ class ProfilingHook(tf.estimator.SessionRunHook):
|
||||||
def end(self, session):
|
def end(self, session):
|
||||||
if hvd.rank() == 0:
|
if hvd.rank() == 0:
|
||||||
stats = process_performance_stats(np.array(self._timestamps),
|
stats = process_performance_stats(np.array(self._timestamps),
|
||||||
self._global_batch_size)
|
self._global_batch_size,
|
||||||
self.logger.log(step=(), data={metric: value for (metric, value) in stats})
|
self.mode)
|
||||||
|
self.logger.log(step=(), data=stats)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -17,18 +17,21 @@ import numpy as np
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
def process_performance_stats(timestamps, batch_size):
|
def process_performance_stats(timestamps, batch_size, mode):
|
||||||
timestamps_ms = 1000 * timestamps
|
""" Get confidence intervals
|
||||||
latency_ms = timestamps_ms.mean()
|
|
||||||
std = timestamps_ms.std()
|
:param timestamps: Collection of timestamps
|
||||||
n = np.sqrt(len(timestamps_ms))
|
:param batch_size: Number of samples per batch
|
||||||
throughput_imgps = (1000.0 * batch_size / timestamps_ms).mean()
|
:param mode: Estimator's execution mode
|
||||||
|
:return: Stats
|
||||||
|
"""
|
||||||
|
timestamps_ms = 1000 * timestamps
|
||||||
|
throughput_imgps = (1000.0 * batch_size / timestamps_ms).mean()
|
||||||
|
stats = {f"throughput_{mode}": throughput_imgps,
|
||||||
|
f"latency_{mode}_mean": timestamps_ms.mean()}
|
||||||
|
for level in [90, 95, 99]:
|
||||||
|
stats.update({f"latency_{mode}_{level}": np.percentile(timestamps_ms, level)})
|
||||||
|
|
||||||
stats = [("Throughput Avg", str(throughput_imgps)),
|
|
||||||
('Latency Avg:', str(latency_ms))]
|
|
||||||
for ci, lvl in zip(["90%:", "95%:", "99%:"],
|
|
||||||
[1.645, 1.960, 2.576]):
|
|
||||||
stats.append(("Latency_" + ci, str(latency_ms + lvl * std / n)))
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
@ -77,4 +80,3 @@ if __name__ == '__main__':
|
||||||
parse_convergence_results(path=args.model_dir, environment=args.env)
|
parse_convergence_results(path=args.model_dir, environment=args.env)
|
||||||
elif args.exec_mode == 'benchmark':
|
elif args.exec_mode == 'benchmark':
|
||||||
pass
|
pass
|
||||||
print()
|
|
||||||
|
|
Loading…
Reference in a new issue