# Copyright 2018 MLBenchmark Group. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== # # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Common values reported VALUE_EPOCH = "epoch" VALUE_ITERATION = "iteration" VALUE_ACCURACY = "accuracy" VALUE_BLEU = "bleu" VALUE_TOP1 = "top1" VALUE_TOP5 = "top5" VALUE_BBOX_MAP = "bbox_map" VALUE_MASK_MAP = "mask_map" VALUE_BCE = "binary_cross_entropy" # Timed blocks (used with timed_function & timed_block # For each there should be *_start and *_stop tags defined RUN_BLOCK = "run" SETUP_BLOCK = "setup" PREPROC_BLOCK = "preproc" TRAIN_BLOCK = "train" TRAIN_PREPROC_BLOCK = "train_preproc" TRAIN_EPOCH_BLOCK = "train_epoch" TRAIN_EPOCH_PREPROC_BLOCK = "train_epoch_preproc" TRAIN_CHECKPOINT_BLOCK = "train_checkpoint" TRAIN_ITER_BLOCK = "train_iteration" EVAL_BLOCK = "eval" EVAL_ITER_BLOCK = "eval_iteration" TIMED_BLOCKS = { RUN_BLOCK, SETUP_BLOCK, PREPROC_BLOCK, TRAIN_BLOCK, TRAIN_PREPROC_BLOCK, TRAIN_EPOCH_BLOCK, TRAIN_EPOCH_PREPROC_BLOCK, TRAIN_CHECKPOINT_BLOCK, TRAIN_ITER_BLOCK, EVAL_BLOCK, EVAL_ITER_BLOCK, } # Events RUN_INIT = "run_init" SETUP_START = "setup_start" SETUP_STOP = "setup_stop" PREPROC_START = "preproc_start" PREPROC_STOP = "preproc_stop" RUN_START = "run_start" RUN_STOP = "run_stop" RUN_FINAL = "run_final" TRAIN_CHECKPOINT_START = "train_checkpoint_start" TRAIN_CHECKPOINT_STOP = "train_checkpoint_stop" TRAIN_PREPROC_START = "train_preproc_start" TRAIN_PREPROC_STOP = "train_preproc_stop" TRAIN_EPOCH_PREPROC_START = "train_epoch_preproc_start" TRAIN_EPOCH_PREPROC_STOP = "train_epoch_preproc_stop" TRAIN_ITER_START = "train_iter_start" TRAIN_ITER_STOP = "train_iter_stop" TRAIN_EPOCH_START = "train_epoch_start" TRAIN_EPOCH_STOP = "train_epoch_stop" # MLPerf specific tags RUN_CLEAR_CACHES = "run_clear_caches" PREPROC_NUM_TRAIN_EXAMPLES = "preproc_num_train_examples" PREPROC_NUM_EVAL_EXAMPLES = "preproc_num_eval_examples" PREPROC_TOKENIZE_TRAINING = "preproc_tokenize_training" PREPROC_TOKENIZE_EVAL = "preproc_tokenize_eval" PREPROC_VOCAB_SIZE = "preproc_vocab_size" RUN_SET_RANDOM_SEED = "run_set_random_seed" INPUT_SIZE = "input_size" INPUT_BATCH_SIZE = "input_batch_size" INPUT_ORDER = "input_order" INPUT_SHARD = "input_shard" INPUT_BN_SPAN = "input_bn_span" INPUT_CENTRAL_CROP = "input_central_crop" INPUT_CROP_USES_BBOXES = "input_crop_uses_bboxes" INPUT_DISTORTED_CROP_MIN_OBJ_COV = "input_distorted_crop_min_object_covered" INPUT_DISTORTED_CROP_RATIO_RANGE = "input_distorted_crop_aspect_ratio_range" INPUT_DISTORTED_CROP_AREA_RANGE = "input_distorted_crop_area_range" INPUT_DISTORTED_CROP_MAX_ATTEMPTS = "input_distorted_crop_max_attempts" INPUT_MEAN_SUBTRACTION = "input_mean_subtraction" INPUT_RANDOM_FLIP = "input_random_flip" INPUT_RESIZE = "input_resize" INPUT_RESIZE_ASPECT_PRESERVING = "input_resize_aspect_preserving" # Opt OPT_NAME = "opt_name" OPT_LR = "opt_learning_rate" OPT_MOMENTUM = "opt_momentum" OPT_WEIGHT_DECAY = "opt_weight_decay" OPT_HP_ADAM_BETA1 = "opt_hp_Adam_beta1" OPT_HP_ADAM_BETA2 = "opt_hp_Adam_beta2" OPT_HP_ADAM_EPSILON = "opt_hp_Adam_epsilon" OPT_LR_WARMUP_STEPS = "opt_learning_rate_warmup_steps" # Train TRAIN_LOOP = "train_loop" TRAIN_EPOCH = "train_epoch" TRAIN_CHECKPOINT = "train_checkpoint" TRAIN_LOSS = "train_loss" TRAIN_ITERATION_LOSS = "train_iteration_loss" # Eval EVAL_START = "eval_start" EVAL_SIZE = "eval_size" EVAL_TARGET = "eval_target" EVAL_ACCURACY = "eval_accuracy" EVAL_STOP = "eval_stop" # Perf PERF_IT_PER_SEC = "perf_it_per_sec" PERF_TIME_TO_TRAIN = "time_to_train" EVAL_ITERATION_ACCURACY = "eval_iteration_accuracy" # Model MODEL_HP_LOSS_FN = "model_hp_loss_fn" MODEL_HP_INITIAL_SHAPE = "model_hp_initial_shape" MODEL_HP_FINAL_SHAPE = "model_hp_final_shape" MODEL_L2_REGULARIZATION = "model_l2_regularization" MODEL_EXCLUDE_BN_FROM_L2 = "model_exclude_bn_from_l2" MODEL_HP_RELU = "model_hp_relu" MODEL_HP_CONV2D_FIXED_PADDING = "model_hp_conv2d_fixed_padding" MODEL_HP_BATCH_NORM = "model_hp_batch_norm" MODEL_HP_DENSE = "model_hp_dense" # GNMT specific MODEL_HP_LOSS_SMOOTHING = "model_hp_loss_smoothing" MODEL_HP_NUM_LAYERS = "model_hp_num_layers" MODEL_HP_HIDDEN_SIZE = "model_hp_hidden_size" MODEL_HP_DROPOUT = "model_hp_dropout" EVAL_HP_BEAM_SIZE = "eval_hp_beam_size" TRAIN_HP_MAX_SEQ_LEN = "train_hp_max_sequence_length" EVAL_HP_MAX_SEQ_LEN = "eval_hp_max_sequence_length" EVAL_HP_LEN_NORM_CONST = "eval_hp_length_normalization_constant" EVAL_HP_LEN_NORM_FACTOR = "eval_hp_length_normalization_factor" EVAL_HP_COV_PENALTY_FACTOR = "eval_hp_coverage_penalty_factor" # NCF specific PREPROC_HP_MIN_RATINGS = "preproc_hp_min_ratings" PREPROC_HP_NUM_EVAL = "preproc_hp_num_eval" PREPROC_HP_SAMPLE_EVAL_REPLACEMENT = "preproc_hp_sample_eval_replacement" INPUT_HP_NUM_NEG = "input_hp_num_neg" INPUT_HP_SAMPLE_TRAIN_REPLACEMENT = "input_hp_sample_train_replacement" INPUT_STEP_TRAIN_NEG_GEN = "input_step_train_neg_gen" INPUT_STEP_EVAL_NEG_GEN = "input_step_eval_neg_gen" EVAL_HP_NUM_USERS = "eval_hp_num_users" EVAL_HP_NUM_NEG = "eval_hp_num_neg" MODEL_HP_MF_DIM = "model_hp_mf_dim" MODEL_HP_MLP_LAYER_SIZES = "model_hp_mlp_layer_sizes" # RESNET specific EVAL_EPOCH_OFFSET = "eval_offset" MODEL_HP_INITIAL_MAX_POOL = "model_hp_initial_max_pool" MODEL_HP_BEGIN_BLOCK = "model_hp_begin_block" MODEL_HP_END_BLOCK = "model_hp_end_block" MODEL_HP_BLOCK_TYPE = "model_hp_block_type" MODEL_HP_PROJECTION_SHORTCUT = "model_hp_projection_shortcut" MODEL_HP_SHORTCUT_ADD = "model_hp_shorcut_add" MODEL_HP_RESNET_TOPOLOGY = "model_hp_resnet_topology" # Transformer specific INPUT_MAX_LENGTH = "input_max_length" MODEL_HP_INITIALIZER_GAIN = "model_hp_initializer_gain" MODEL_HP_VOCAB_SIZE = "model_hp_vocab_size" MODEL_HP_NUM_HIDDEN_LAYERS = "model_hp_hidden_layers" MODEL_HP_EMBEDDING_SHARED_WEIGHTS = "model_hp_embedding_shared_weights" MODEL_HP_ATTENTION_DENSE = "model_hp_attention_dense" MODEL_HP_ATTENTION_DROPOUT = "model_hp_attention_dropout" MODEL_HP_FFN_OUTPUT_DENSE = "model_hp_ffn_output_dense" MODEL_HP_FFN_FILTER_DENSE = "model_hp_ffn_filter_dense" MODEL_HP_RELU_DROPOUT = "model_hp_relu_dropout" MODEL_HP_LAYER_POSTPROCESS_DROPOUT = "model_hp_layer_postprocess_dropout" MODEL_HP_NORM = "model_hp_norm" MODEL_HP_SEQ_BEAM_SEARCH = "model_hp_sequence_beam_search"