[FastPitch/PyT] Small README fixes

This commit is contained in:
Przemek Strzelczyk 2020-06-12 12:13:05 +02:00
parent 66ed01d1ac
commit 23cc1cd5bb
7 changed files with 12 additions and 9 deletions

View file

@ -643,10 +643,8 @@ To benchmark the inference performance on a specific batch size, run:
```
The output log files will contain performance numbers for the FastPitch model
(number of output mel-spectrogram frames per second, reported as `generator_frames/s w
`)
and for WaveGlow (number of output samples per second, reported as ` waveglow_samples/s
`).
(number of output mel-spectrogram frames per second, reported as `generator_frames/s`)
and for WaveGlow (number of output samples per second, reported as ` waveglow_samples/s`).
The `inference.py` script will run a few warm-up iterations before running the benchmark. Inference will be averaged over 100 runs, as set by the `--repeats` flag.
### Results
@ -669,10 +667,10 @@ All of the results were produced using the `train.py` script as described in the
| FastPitch FP32 | 35.108 | 0.254 | 0.216 | 0.200 | 0.194 | 0.188 | 0.184 |
<div style="text-align:center" align="center">
<p align="center">
<img src="./img/loss_fp16.png" alt="AMP loss curve" />
<img src="./img/loss_fp32.png" alt="FP32 loss curve" />
</div>
</p>
#### Training performance results

View file

@ -81,7 +81,7 @@ The examples are organized first by framework, such as TensorFlow, PyTorch, etc.
| [Transformer](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Translation/Transformer) |PyTorch | N/A | Yes | Yes | - | - | - | - | - |
| [ResNet-50 v1.5](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Classification/RN50v1.5) |TensorFlow | Yes | Yes | Yes | - | - | - | - | - |
| [SSD320 v1.2](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Detection/SSD) | TensorFlow | N/A | Yes | Yes | - | - | - | - | - |
| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT) |TensorFlow | N/A | Yes | Yes | Yes | Yes | - | [Yes](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT/trtis) | Yes |
| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT) |TensorFlow | N/A | Yes | Yes | Yes | Yes | - | [Yes](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT/triton) | Yes |
| [BioBert](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/BERT/biobert) | TensorFlow | N/A | Yes | Yes | - | - | - | - | - |
| [Transformer-XL](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/LanguageModeling/Transformer-XL) |TensorFlow | N/A | Yes | Yes | - | - | - | - | - |
| [Neural Collaborative Filtering](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow/Recommendation/NCF) |TensorFlow | N/A | Yes | Yes | - | - | - | - | - |

View file

@ -488,6 +488,7 @@ def main(_):
training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
if FLAGS.use_xla:
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
tf.enable_resource_variables()
run_config = tf.estimator.RunConfig(
model_dir=FLAGS.output_dir if master_process else None,

View file

@ -668,6 +668,7 @@ def main(_):
if FLAGS.use_xla:
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
tf.enable_resource_variables()
run_config = tf.estimator.RunConfig(
model_dir=FLAGS.output_dir if master_process else None,
session_config=config,

View file

@ -214,14 +214,14 @@ class _LogSessionRunHook(tf.estimator.SessionRunHook):
if self.hvd_rank >= 0 and FLAGS.report_loss:
if FLAGS.manual_fp16 or FLAGS.use_fp16:
self.dllogging.logger.log(step=(print_step),
data={"Rank": int(rank), "throughput_train": float(sent_per_sec),
data={"Rank": int(self.hvd_rank), "throughput_train": float(sent_per_sec),
"mlm_loss":float(mlm_loss), "nsp_loss":float(nsp_loss),
"total_loss":float(total_loss), "avg_loss_step":float(avg_loss_step),
"learning_rate": str(lr), "loss_scaler":int(loss_scaler)},
verbosity=Verbosity.DEFAULT)
else:
self.dllogging.logger.log(step=int(print_step),
data={"Rank": int(rank), "throughput_train": float(sent_per_sec),
data={"Rank": int(self.hvd_rank), "throughput_train": float(sent_per_sec),
"mlm_loss":float(mlm_loss), "nsp_loss":float(nsp_loss),
"total_loss":float(total_loss), "avg_loss_step":float(avg_loss_step),
"learning_rate": str(lr)},
@ -584,6 +584,7 @@ def main(_):
if FLAGS.use_xla:
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
config.graph_options.rewrite_options.memory_optimization = rewriter_config_pb2.RewriterConfig.NO_MEM_OPT
tf.enable_resource_variables()
run_config = tf.estimator.RunConfig(
model_dir=FLAGS.output_dir,

View file

@ -779,6 +779,7 @@ def main(_):
if FLAGS.use_xla:
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
tf.enable_resource_variables()
run_config = tf.estimator.RunConfig(
model_dir=FLAGS.output_dir if master_process else None,
session_config=config,

View file

@ -965,6 +965,7 @@ def main(_):
training_hooks.append(hvd.BroadcastGlobalVariablesHook(0))
if FLAGS.use_xla:
config.graph_options.optimizer_options.global_jit_level = tf.compat.v1.OptimizerOptions.ON_1
tf.enable_resource_variables()
run_config = tf.estimator.RunConfig(
model_dir=FLAGS.output_dir if master_process else None,
session_config=config,