DeepLearningExamples/TensorFlow/Recommendation/NCF/inference.py

#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import time
import os
import json
import argparse

import numpy as np
import tensorflow as tf
from neumf import ncf_model_ops

def parse_args():
    parser = argparse.ArgumentParser(description="Benchmark inference performance of the NCF model")
    parser.add_argument('--load_checkpoint_path', default=None, type=str,
                        help='Path to the checkpoint file to be loaded. If None will use random weights')
    parser.add_argument('--n_users', default=138493, type=int,
                        help='Number of users. Defaults to the number of users in the ml-20m dataset after preprocessing')
    parser.add_argument('--n_items', default=26744, type=int,
                        help='Number of items. Defaults to the number of users in the ml-20m dataset after preprocessing')
    parser.add_argument('-f', '--factors', type=int, default=64,
                        help='Number of predictive factors')
    parser.add_argument('--layers', nargs='+', type=int,
                        default=[256, 256, 128, 64],
                        help='Sizes of hidden layers for MLP')
    parser.add_argument('--batch_size', default=1, type=int, help='Batch size for inference')
    parser.add_argument('--num_batches', default=20, type=int,
                        help='Number of batches for which to measure latency and throughput')
    parser.add_argument('--no_amp', dest='amp', action='store_false', default=True,
                        help='Disable mixed precision')
    parser.add_argument('--xla', dest='xla', action='store_true', default=False,
                        help='Enable XLA')
    parser.add_argument('--log_path', default='nvlog.json', type=str,
                        help='Path to the path to store benchmark results')

    return parser.parse_args()


def main():
    args = parse_args()

    if args.amp:
        os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "1"

    # Input tensors
    users = tf.placeholder(tf.int32, shape=(None,))
    items = tf.placeholder(tf.int32, shape=(None,))
    dropout = tf.placeholder_with_default(0.0, shape=())

    # Model ops and saver
    logits_op = ncf_model_ops(
        users=users,
        items=items,
        labels=None,
        dup_mask=None,
        params={
            'fp16': False,
            'val_batch_size': args.batch_size,
            'num_users': args.n_users,
            'num_items': args.n_items,
            'num_factors': args.factors,
            'mf_reg': 0,
            'layer_sizes': args.layers,
            'layer_regs': [0. for i in args.layers],
            'dropout': 0.0,
            'sigmoid': True,
            'top_k': None,
            'learning_rate': None,
            'beta_1': None,
            'beta_2': None,
            'epsilon': None,
            'loss_scale': None,
        },
        mode='INFERENCE'
    )

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    if args.xla:
        config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1

    sess = tf.Session(config=config)

    saver = tf.train.Saver()
    if args.load_checkpoint_path:
        saver.restore(sess, args.load_checkpoint_path)
    else:
        # Manual initialize weights
        sess.run(tf.global_variables_initializer())

    sess.run(tf.local_variables_initializer())


    users_batch = np.random.randint(size=args.batch_size, low=0, high=args.n_users)
    items_batch = np.random.randint(size=args.batch_size, low=0, high=args.n_items)

    latencies = []
    for _ in range(args.num_batches):
        start = time.time()
        logits = sess.run(logits_op, feed_dict={users: users_batch, items: items_batch, dropout: 0.0 })
        latencies.append(time.time() - start)

    results = {
        'args' : vars(args),
        'best_inference_throughput' : args.batch_size / min(latencies),
        'best_inference_latency' : min(latencies),
        'inference_latencies' : latencies
    }
    print('RESULTS: ', json.dumps(results, indent=4))
    if args.log_path is not None:
        json.dump(results, open(args.log_path, 'w'), indent=4)

if __name__ == '__main__':
    main()
Updating models and adding BERT/PyT Tacotron2+Waveglow/PyT * AMP support * Data preprocessing for Tacotron 2 training * Fixed dropouts on LSTMCells SSD/PyT * script and notebook for inference * AMP support * README update * updates to examples/* BERT/PyT * initial release GNMT/PyT * Default container updated to NGC PyTorch 19.05-py3 * Mixed precision training implemented using APEX AMP * Added inference throughput and latency results on NVIDIA Tesla V100 16G * Added option to run inference on user-provided raw input text from command line NCF/PyT * Updated performance tables. * Default container changed to PyTorch 19.06-py3. * Caching validation negatives between runs Transformer/PyT * new README * jit support added UNet Medical/TF * inference example scripts added * inference benchmark measuring latency added * TRT/TF-TRT support added * README updated GNMT/TF * Performance improvements Small updates (mostly README) for other models. 2019-07-16 21:13:08 +02:00			`#`
			`# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`


			`import time`
			`import os`
			`import json`
			`import argparse`

			`import numpy as np`
			`import tensorflow as tf`
			`from neumf import ncf_model_ops`

			`def parse_args():`
			`parser = argparse.ArgumentParser(description="Benchmark inference performance of the NCF model")`
			`parser.add_argument('--load_checkpoint_path', default=None, type=str,`
			`help='Path to the checkpoint file to be loaded. If None will use random weights')`
			`parser.add_argument('--n_users', default=138493, type=int,`
			`help='Number of users. Defaults to the number of users in the ml-20m dataset after preprocessing')`
			`parser.add_argument('--n_items', default=26744, type=int,`
			`help='Number of items. Defaults to the number of users in the ml-20m dataset after preprocessing')`
			`parser.add_argument('-f', '--factors', type=int, default=64,`
			`help='Number of predictive factors')`
			`parser.add_argument('--layers', nargs='+', type=int,`
			`default=[256, 256, 128, 64],`
			`help='Sizes of hidden layers for MLP')`
			`parser.add_argument('--batch_size', default=1, type=int, help='Batch size for inference')`
			`parser.add_argument('--num_batches', default=20, type=int,`
			`help='Number of batches for which to measure latency and throughput')`
			`parser.add_argument('--no_amp', dest='amp', action='store_false', default=True,`
			`help='Disable mixed precision')`
			`parser.add_argument('--xla', dest='xla', action='store_true', default=False,`
			`help='Enable XLA')`
			`parser.add_argument('--log_path', default='nvlog.json', type=str,`
			`help='Path to the path to store benchmark results')`

			`return parser.parse_args()`


			`def main():`
			`args = parse_args()`

			`if args.amp:`
			`os.environ["TF_ENABLE_AUTO_MIXED_PRECISION"] = "1"`

			`# Input tensors`
			`users = tf.placeholder(tf.int32, shape=(None,))`
			`items = tf.placeholder(tf.int32, shape=(None,))`
			`dropout = tf.placeholder_with_default(0.0, shape=())`

			`# Model ops and saver`
			`logits_op = ncf_model_ops(`
			`users=users,`
			`items=items,`
			`labels=None,`
			`dup_mask=None,`
			`params={`
			`'fp16': False,`
			`'val_batch_size': args.batch_size,`
			`'num_users': args.n_users,`
			`'num_items': args.n_items,`
			`'num_factors': args.factors,`
			`'mf_reg': 0,`
			`'layer_sizes': args.layers,`
			`'layer_regs': [0. for i in args.layers],`
			`'dropout': 0.0,`
			`'sigmoid': True,`
			`'top_k': None,`
			`'learning_rate': None,`
			`'beta_1': None,`
			`'beta_2': None,`
			`'epsilon': None,`
			`'loss_scale': None,`
			`},`
			`mode='INFERENCE'`
			`)`

			`config = tf.ConfigProto()`
			`config.gpu_options.allow_growth = True`
			`if args.xla:`
			`config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1`

			`sess = tf.Session(config=config)`

			`saver = tf.train.Saver()`
			`if args.load_checkpoint_path:`
			`saver.restore(sess, args.load_checkpoint_path)`
			`else:`
			`# Manual initialize weights`
			`sess.run(tf.global_variables_initializer())`

			`sess.run(tf.local_variables_initializer())`


			`users_batch = np.random.randint(size=args.batch_size, low=0, high=args.n_users)`
			`items_batch = np.random.randint(size=args.batch_size, low=0, high=args.n_items)`

			`latencies = []`
			`for _ in range(args.num_batches):`
			`start = time.time()`
			`logits = sess.run(logits_op, feed_dict={users: users_batch, items: items_batch, dropout: 0.0 })`
			`latencies.append(time.time() - start)`

			`results = {`
			`'args' : vars(args),`
			`'best_inference_throughput' : args.batch_size / min(latencies),`
			`'best_inference_latency' : min(latencies),`
			`'inference_latencies' : latencies`
			`}`
			`print('RESULTS: ', json.dumps(results, indent=4))`
			`if args.log_path is not None:`
			`json.dump(results, open(args.log_path, 'w'), indent=4)`

			`if __name__ == '__main__':`
			`main()`