[FastPitch/PyT] Small README fixes
This commit is contained in:
parent
66ed01d1ac
commit
23cc1cd5bb
|
@ -643,10 +643,8 @@ To benchmark the inference performance on a specific batch size, run:
|
|||
```
|
||||
|
||||
The output log files will contain performance numbers for the FastPitch model
|
||||
(number of output mel-spectrogram frames per second, reported as `generator_frames/s w
|
||||
`)
|
||||
and for WaveGlow (number of output samples per second, reported as ` waveglow_samples/s
|
||||
`).
|
||||
(number of output mel-spectrogram frames per second, reported as `generator_frames/s`)
|
||||
and for WaveGlow (number of output samples per second, reported as ` waveglow_samples/s`).
|
||||
The `inference.py` script will run a few warm-up iterations before running the benchmark. Inference will be averaged over 100 runs, as set by the `--repeats` flag.
|
||||
|
||||
### Results
|
||||
|
@ -669,10 +667,10 @@ All of the results were produced using the `train.py` script as described in the
|
|||
| FastPitch FP32 | 35.108 | 0.254 | 0.216 | 0.200 | 0.194 | 0.188 | 0.184 |
|
||||
|
||||
|
||||
<div style="text-align:center" align="center">
|
||||
<p align="center">
|
||||
<img src="./img/loss_fp16.png" alt="AMP loss curve" />
|
||||
<img src="./img/loss_fp32.png" alt="FP32 loss curve" />
|
||||
</div>
|
||||
</p>
|
||||
|
||||
#### Training performance results
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@ The examples are organized first by framework, such as TensorFlow, PyTorch, etc.
|
|||
| [Transformer](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Translation/Transformer) |PyTorch | N/A | Yes | Yes | - | - | - | - | - |
|
||||
| [ResNet-50 v1.5](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Classification/RN50v1.5) |TensorFlow | Yes | Yes | Yes | - | - | - | - | - |
|
||||
| [SSD320 v1.2](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Detection/SSD) | TensorFlow | N/A | Yes | Yes | - | - | - | - | - |
|
||||
| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT) |TensorFlow | N/A | Yes | Yes | Yes | Yes | - | [Yes](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT/trtis) | Yes |
|
||||
| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT) |TensorFlow | N/A | Yes | Yes | Yes | Yes | - | [Yes](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT/triton) | Yes |
|
||||
| [BioBert](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT/biobert) | TensorFlow | N/A | Yes | Yes | - | - | - | - | - |
|
||||
| [Transformer-XL](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/Transformer-XL) |TensorFlow | N/A | Yes | Yes | - | - | - | - | - |
|
||||
| [Neural Collaborative Filtering](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Recommendation/NCF) |TensorFlow | N/A | Yes | Yes | - | - | - | - | - |
|
||||
|
|
|
@ -488,6 +488,7 @@ def main(_):
|
|||
training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
|
||||
if FLAGS.use_xla:
|
||||
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
|
||||
tf.enable_resource_variables()
|
||||
|
||||
run_config = tf.estimator.RunConfig(
|
||||
model_dir=FLAGS.output_dir if master_process else None,
|
||||
|
|
|
@ -668,6 +668,7 @@ def main(_):
|
|||
|
||||
if FLAGS.use_xla:
|
||||
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
|
||||
tf.enable_resource_variables()
|
||||
run_config = tf.estimator.RunConfig(
|
||||
model_dir=FLAGS.output_dir if master_process else None,
|
||||
session_config=config,
|
||||
|
|
|
@ -214,14 +214,14 @@ class _LogSessionRunHook(tf.estimator.SessionRunHook):
|
|||
if self.hvd_rank >= 0 and FLAGS.report_loss:
|
||||
if FLAGS.manual_fp16 or FLAGS.use_fp16:
|
||||
self.dllogging.logger.log(step=(print_step),
|
||||
data={"Rank": int(rank), "throughput_train": float(sent_per_sec),
|
||||
data={"Rank": int(self.hvd_rank), "throughput_train": float(sent_per_sec),
|
||||
"mlm_loss":float(mlm_loss), "nsp_loss":float(nsp_loss),
|
||||
"total_loss":float(total_loss), "avg_loss_step":float(avg_loss_step),
|
||||
"learning_rate": str(lr), "loss_scaler":int(loss_scaler)},
|
||||
verbosity=Verbosity.DEFAULT)
|
||||
else:
|
||||
self.dllogging.logger.log(step=int(print_step),
|
||||
data={"Rank": int(rank), "throughput_train": float(sent_per_sec),
|
||||
data={"Rank": int(self.hvd_rank), "throughput_train": float(sent_per_sec),
|
||||
"mlm_loss":float(mlm_loss), "nsp_loss":float(nsp_loss),
|
||||
"total_loss":float(total_loss), "avg_loss_step":float(avg_loss_step),
|
||||
"learning_rate": str(lr)},
|
||||
|
@ -584,6 +584,7 @@ def main(_):
|
|||
if FLAGS.use_xla:
|
||||
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
|
||||
config.graph_options.rewrite_options.memory_optimization = rewriter_config_pb2.RewriterConfig.NO_MEM_OPT
|
||||
tf.enable_resource_variables()
|
||||
|
||||
run_config = tf.estimator.RunConfig(
|
||||
model_dir=FLAGS.output_dir,
|
||||
|
|
|
@ -779,6 +779,7 @@ def main(_):
|
|||
|
||||
if FLAGS.use_xla:
|
||||
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
|
||||
tf.enable_resource_variables()
|
||||
run_config = tf.estimator.RunConfig(
|
||||
model_dir=FLAGS.output_dir if master_process else None,
|
||||
session_config=config,
|
||||
|
|
|
@ -965,6 +965,7 @@ def main(_):
|
|||
training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
|
||||
if FLAGS.use_xla:
|
||||
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
|
||||
tf.enable_resource_variables()
|
||||
run_config = tf.estimator.RunConfig(
|
||||
model_dir=FLAGS.output_dir if master_process else None,
|
||||
session_config=config,
|
||||
|
|
Loading…
Reference in a new issue