# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # SSD with Resnet 50 v1 FPN feature extractor, shared box predictor and focal # loss (a.k.a Retinanet). # See Lin et al, https://arxiv.org/abs/1708.02002 # Trained on COCO, initialized from Imagenet classification checkpoint model { ssd { inplace_batchnorm_update: true freeze_batchnorm: true num_classes: 90 box_coder { faster_rcnn_box_coder { y_scale: 10.0 x_scale: 10.0 height_scale: 5.0 width_scale: 5.0 } } matcher { argmax_matcher { matched_threshold: 0.5 unmatched_threshold: 0.5 ignore_thresholds: false negatives_lower_than_unmatched: true force_match_for_each_row: true use_matmul_gather: true } } similarity_calculator { iou_similarity { } } encode_background_as_zeros: true anchor_generator { multiscale_anchor_generator { min_level: 3 max_level: 7 anchor_scale: 4.0 aspect_ratios: [1.0, 2.0, 0.5] scales_per_octave: 2 } } image_resizer { fixed_shape_resizer { height: 320 width: 320 } } box_predictor { weight_shared_convolutional_box_predictor { depth: 256 class_prediction_bias_init: -4.6 conv_hyperparams { activation: RELU_6, regularizer { l2_regularizer { weight: 0.0004 } } initializer { random_normal_initializer { stddev: 0.01 mean: 0.0 } } batch_norm { scale: true, decay: 0.997, epsilon: 0.001, } } num_layers_before_predictor: 4 kernel_size: 3 } } feature_extractor { type: 'ssd_resnet50_v1_fpn' fpn { min_level: 3 max_level: 7 } min_depth: 16 depth_multiplier: 1.0 conv_hyperparams { activation: RELU_6, regularizer { l2_regularizer { weight: 0.0004 } } initializer { truncated_normal_initializer { stddev: 0.03 mean: 0.0 } } batch_norm { scale: true, decay: 0.997, epsilon: 0.001, } } override_base_feature_extractor_hyperparams: true } loss { classification_loss { weighted_sigmoid_focal { alpha: 0.25 gamma: 2.0 } } localization_loss { weighted_smooth_l1 { } } classification_weight: 1.0 localization_weight: 1.0 } normalize_loss_by_num_matches: true normalize_loc_loss_by_codesize: true post_processing { batch_non_max_suppression { score_threshold: 1e-8 iou_threshold: 0.6 max_detections_per_class: 100 max_total_detections: 100 } score_converter: SIGMOID } } } train_config: { fine_tune_checkpoint: "/checkpoints/resnet_v1_50/model.ckpt" fine_tune_checkpoint_type: "classification" batch_size: 32 sync_replicas: true startup_delay_steps: 0 replicas_to_aggregate: 8 num_steps: 1250 data_augmentation_options { random_horizontal_flip { } } data_augmentation_options { random_crop_image { min_object_covered: 0.0 min_aspect_ratio: 0.75 max_aspect_ratio: 3.0 min_area: 0.75 max_area: 1.0 overlap_thresh: 0.0 } } optimizer { momentum_optimizer: { learning_rate: { cosine_decay_learning_rate { learning_rate_base: .02000000000000000000 total_steps: 1250 warmup_learning_rate: .00866640000000000000 warmup_steps: 400 } } momentum_optimizer_value: 0.9 } use_moving_average: false } max_number_of_boxes: 100 unpad_groundtruth_tensors: false } train_input_reader: { tf_record_input_reader { input_path: "/data/coco2017_tfrecords/*train*" } label_map_path: "object_detection/data/mscoco_label_map.pbtxt" } eval_config: { metrics_set: "coco_detection_metrics" use_moving_averages: false num_examples: 8000 } eval_input_reader: { tf_record_input_reader { input_path: "/data/coco2017_tfrecords/*val*" } label_map_path: "object_detection/data/mscoco_label_map.pbtxt" shuffle: false num_readers: 1 }