DeepLearningExamples/TensorFlow/Detection/SSD/examples/SSD320_FP32_1GPU_BENCHMARK.sh
Przemek Strzelczyk d2bc3da0a1 Changes in TF models:
* added UNet for medical image segmentation
* added TF-AMP support for RN50
* small updates for other models (READMEs, benchmark & testing scripts)
2019-05-25 01:23:11 +02:00

19 lines
848 B
Bash

CKPT_DIR=${1:-"/results/SSD320_FP32_1GPU"}
PIPELINE_CONFIG_PATH=${2:-"/workdir/models/research/configs"}"/ssd320_bench.config"
GPUS=1
TENSOR_OPS=0
export TF_ENABLE_CUBLAS_TENSOR_OP_MATH_FP32=${TENSOR_OPS}
export TF_ENABLE_CUDNN_TENSOR_OP_MATH_FP32=${TENSOR_OPS}
export TF_ENABLE_CUDNN_RNN_TENSOR_OP_MATH_FP32=${TENSOR_OPS}
TRAIN_LOG=$(python -u ./object_detection/model_main.py \
--pipeline_config_path=${PIPELINE_CONFIG_PATH} \
--model_dir=${CKPT_DIR} \
--alsologtostder \
"${@:3}" 2>&1)
PERF=$(echo "$TRAIN_LOG" | awk -v GPUS=$GPUS '/global_step\/sec/{ array[num++]=$2 } END { for (x = 3*num/4; x < num; ++x) { sum += array[x] }; print GPUS*32*4*sum/num " img/s"}')
mkdir -p $CKPT_DIR
echo "Single GPU single precision training performance: $PERF" | tee $CKPT_DIR/train_log
echo "$TRAIN_LOG" >> $CKPT_DIR/train_log