DeepLearningExamples/TensorFlow/Recommendation/NCF/inference.py
Przemek Strzelczyk a644350589 Updating models and adding BERT/PyT
Tacotron2+Waveglow/PyT
* AMP support
* Data preprocessing for Tacotron 2 training
* Fixed dropouts on LSTMCells

SSD/PyT
* script and notebook for inference
* AMP support
* README update
* updates to examples/*

BERT/PyT
* initial release

GNMT/PyT
* Default container updated to NGC PyTorch 19.05-py3
* Mixed precision training implemented using APEX AMP
* Added inference throughput and latency results on NVIDIA Tesla V100 16G
* Added option to run inference on user-provided raw input text from command line

NCF/PyT
* Updated performance tables.
* Default container changed to PyTorch 19.06-py3.
* Caching validation negatives between runs

Transformer/PyT
* new README
* jit support added

UNet Medical/TF
* inference example scripts added
* inference benchmark measuring latency added
* TRT/TF-TRT support added
* README updated

GNMT/TF
* Performance improvements

Small updates (mostly README) for other models.
2019-07-16 21:13:08 +02:00

128 lines
4.6 KiB
Python

#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import os
import json
import argparse
import numpy as np
import tensorflow as tf
from neumf import ncf_model_ops
def parse_args():
parser = argparse.ArgumentParser(description="Benchmark inference performance of the NCF model")
parser.add_argument('--load_checkpoint_path', default=None, type=str,
help='Path to the checkpoint file to be loaded. If None will use random weights')
parser.add_argument('--n_users', default=138493, type=int,
help='Number of users. Defaults to the number of users in the ml-20m dataset after preprocessing')
parser.add_argument('--n_items', default=26744, type=int,
help='Number of items. Defaults to the number of users in the ml-20m dataset after preprocessing')
parser.add_argument('-f', '--factors', type=int, default=64,
help='Number of predictive factors')
parser.add_argument('--layers', nargs='+', type=int,
default=[256, 256, 128, 64],
help='Sizes of hidden layers for MLP')
parser.add_argument('--batch_size', default=1, type=int, help='Batch size for inference')
parser.add_argument('--num_batches', default=20, type=int,
help='Number of batches for which to measure latency and throughput')
parser.add_argument('--no_amp', dest='amp', action='store_false', default=True,
help='Disable mixed precision')
parser.add_argument('--xla', dest='xla', action='store_true', default=False,
help='Enable XLA')
parser.add_argument('--log_path', default='nvlog.json', type=str,
help='Path to the path to store benchmark results')
return parser.parse_args()
def main():
args = parse_args()
if args.amp:
os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "1"
# Input tensors
users = tf.placeholder(tf.int32, shape=(None,))
items = tf.placeholder(tf.int32, shape=(None,))
dropout = tf.placeholder_with_default(0.0, shape=())
# Model ops and saver
logits_op = ncf_model_ops(
users=users,
items=items,
labels=None,
dup_mask=None,
params={
'fp16': False,
'val_batch_size': args.batch_size,
'num_users': args.n_users,
'num_items': args.n_items,
'num_factors': args.factors,
'mf_reg': 0,
'layer_sizes': args.layers,
'layer_regs': [0. for i in args.layers],
'dropout': 0.0,
'sigmoid': True,
'top_k': None,
'learning_rate': None,
'beta_1': None,
'beta_2': None,
'epsilon': None,
'loss_scale': None,
},
mode='INFERENCE'
)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
if args.xla:
config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
sess = tf.Session(config=config)
saver = tf.train.Saver()
if args.load_checkpoint_path:
saver.restore(sess, args.load_checkpoint_path)
else:
# Manual initialize weights
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
users_batch = np.random.randint(size=args.batch_size, low=0, high=args.n_users)
items_batch = np.random.randint(size=args.batch_size, low=0, high=args.n_items)
latencies = []
for _ in range(args.num_batches):
start = time.time()
logits = sess.run(logits_op, feed_dict={users: users_batch, items: items_batch, dropout: 0.0 })
latencies.append(time.time() - start)
results = {
'args' : vars(args),
'best_inference_throughput' : args.batch_size / min(latencies),
'best_inference_latency' : min(latencies),
'inference_latencies' : latencies
}
print('RESULTS: ', json.dumps(results, indent=4))
if args.log_path is not None:
json.dump(results, open(args.log_path, 'w'), indent=4)
if __name__ == '__main__':
main()