#!/bin/bash

MRPC_DIR=/workspace/bert/data/glue/MRPC
OUT_DIR=/results/MRPC

mkdir -p $OUT_DIR

echo "Container nvidia build = " $NVIDIA_BUILD_ID

init_checkpoint=${1}
mode=${2:-"train"}
max_steps=${3:-"-1.0"} # if < 0, has no effect
batch_size=${4:-"12"}
learning_rate=${5:-"5e-6"}
precision=${6:-"fp32"}
num_gpu=${7:-"8"}
epochs=${8:-"2"}

if [ "$mode" != "train" ] ; then
  num_gpu=1
fi

use_fp16=""
if [ "$precision" = "fp16" ] ; then
  echo "fp16 activated!"
  use_fp16="--fp16"
fi

if [ "$num_gpu" = "1" ] ; then
  mpi_command=""
else
  mpi_command="torch.distributed.launch --nproc_per_node=$num_gpu"
fi

CMD="python -m $mpi_command run_glue.py "
CMD+="--task_name MRPC "
if [ "$mode" = "train" ] ; then
  CMD+="--do_train "
  CMD+="--train_batch_size=$batch_size "
else
  CMD+="--do_eval "
  CMD+="--eval_batch_size=$batch_size "
fi
CMD+="--do_lower_case "
CMD+="--data_dir $MRPC_DIR "
CMD+="--bert_model bert-large-uncased "
CMD+="--init_checkpoint $init_checkpoint "
CMD+="--max_seq_length 128 "
CMD+="--learning_rate $learning_rate "
CMD+="--num_train_epochs $epochs "
CMD+="--max_steps $max_steps "
CMD+="--output_dir $OUT_DIR "
CMD+="$use_fp16"

LOGFILE=$OUT_DIR/logfile
$CMD |& tee $LOGFILE

sed -r 's/|(\[A)/\n/g' $LOGFILE > $LOGFILE.edit

throughput=`cat $LOGFILE.edit | grep -E 'Iteration.*[0-9.]+(s/it|it/s)' | tail -1 | egrep -o '[0-9.]+(s/it|it/s)'`

echo "throughput: $throughput"