fix pytorch bert
This commit is contained in:
parent
248927e6fd
commit
6101e02baf
13
PyTorch/LanguageModeling/BERT/bert_config_base.json
Normal file
13
PyTorch/LanguageModeling/BERT/bert_config_base.json
Normal file
|
@ -0,0 +1,13 @@
|
|||
{
|
||||
"attention_probs_dropout_prob": 0.1,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_dropout_prob": 0.1,
|
||||
"hidden_size": 768,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"max_position_embeddings": 512,
|
||||
"num_attention_heads": 12,
|
||||
"num_hidden_layers": 12,
|
||||
"type_vocab_size": 2,
|
||||
"vocab_size": 30522
|
||||
}
|
21
PyTorch/LanguageModeling/BERT/run_benchmark.sh
Normal file
21
PyTorch/LanguageModeling/BERT/run_benchmark.sh
Normal file
|
@ -0,0 +1,21 @@
|
|||
#!/bin/bash
|
||||
set -xe
|
||||
export LD_LIBRARY_PATH=/usr/lib/libibverbs/:/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH
|
||||
|
||||
PADDLE_TRAINER_ENDPOINTS=`echo $PADDLE_TRAINER_ENDPOINTS | tr ',' '\n' | head -n 1`
|
||||
|
||||
batch_size=${1:-"96"} # batch size per gpu
|
||||
num_gpus=${2:-"8"} # number of gpu
|
||||
precision=${3:-"fp16"} # fp32 | fp16
|
||||
gradient_accumulation_steps=$(expr 67584 \/ $batch_size \/ $num_gpus)
|
||||
train_batch_size=$(expr 67584 \/ $num_gpus) # total batch_size per gpu
|
||||
train_steps=${4:-4} # max train steps
|
||||
|
||||
export NODE_RANK=`python get_mpi_rank.py`
|
||||
|
||||
cd ${HOME_WORK_DIR}/workspace/env_run/zengjinle/DeepLearningExamples/PyTorch/LanguageModeling/BERT
|
||||
|
||||
rm -rf ./results/checkpoints
|
||||
# run pre-training
|
||||
bash scripts/run_pretraining.sh $train_batch_size 6e-3 $precision $num_gpus 0.2843 $train_steps 200 false true true $gradient_accumulation_steps
|
||||
|
|
@ -34,12 +34,13 @@ learning_rate_phase2=${17:-"4e-3"}
|
|||
warmup_proportion_phase2=${18:-"0.128"}
|
||||
train_steps_phase2=${19:-1563}
|
||||
gradient_accumulation_steps_phase2=${20:-512}
|
||||
DATASET=hdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/wikicorpus_en # change this for other datasets
|
||||
export BERT_PREP_WORKING_DIR=${BERT_PREP_WORKING_DIR:-"/root/paddlejob/workspace/env_run/data"}
|
||||
DATASET=hdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/wikicorpus_en/training # change this for other datasets
|
||||
DATA_DIR_PHASE1=${21:-$BERT_PREP_WORKING_DIR/${DATASET}/}
|
||||
BERT_CONFIG=bert_config.json
|
||||
BERT_CONFIG=bert_config_base.json
|
||||
DATASET2=hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/wikicorpus_en # change this for other datasets
|
||||
DATA_DIR_PHASE2=${22:-$BERT_PREP_WORKING_DIR/${DATASET2}/}
|
||||
CODEDIR=${23:-"/workspace/bert"}
|
||||
CODEDIR=${23:-"/root/paddlejob/workspace/env_run/zengjinle/DeepLearningExamples/PyTorch/LanguageModeling/BERT"}
|
||||
init_checkpoint=${24:-"None"}
|
||||
RESULTS_DIR=$CODEDIR/results
|
||||
CHECKPOINTS_DIR=$RESULTS_DIR/checkpoints
|
||||
|
@ -107,7 +108,7 @@ CMD=" $CODEDIR/run_pretraining.py"
|
|||
CMD+=" --input_dir=$DATA_DIR_PHASE1"
|
||||
CMD+=" --output_dir=$CHECKPOINTS_DIR"
|
||||
CMD+=" --config_file=$BERT_CONFIG"
|
||||
CMD+=" --bert_model=bert-large-uncased"
|
||||
CMD+=" --bert_model=bert-base-uncased"
|
||||
CMD+=" --train_batch_size=$train_batch_size"
|
||||
CMD+=" --max_seq_length=128"
|
||||
CMD+=" --max_predictions_per_seq=20"
|
||||
|
@ -125,7 +126,12 @@ CMD+=" $INIT_CHECKPOINT"
|
|||
CMD+=" --do_train"
|
||||
CMD+=" --json-summary ${RESULTS_DIR}/dllogger.json "
|
||||
|
||||
CMD="python3 -m torch.distributed.launch --nproc_per_node=$num_gpus $CMD"
|
||||
MASTER_ADDR=`echo $PADDLE_TRAINER_ENDPOINTS | awk -F',' '{print $1}' | awk -F':' '{print $1}'`
|
||||
MASTER_PORT=`echo $PADDLE_TRAINER_ENDPOINTS | awk -F',' '{print $1}' | awk -F':' '{print $2}'`
|
||||
NUM_NODES=`echo $PADDLE_TRAINER_ENDPOINTS | tr ',' '\n' | wc -l`
|
||||
NODE_RANK=$PADDLE_TRAINER_ID
|
||||
|
||||
CMD="python3 -m torch.distributed.launch --nproc_per_node=$num_gpus --nnodes=$NUM_NODES --node_rank=$NODE_RANK --master_addr $MASTER_ADDR --master_port $MASTER_PORT $CMD"
|
||||
|
||||
|
||||
if [ "$create_logfile" = "true" ] ; then
|
||||
|
@ -148,78 +154,3 @@ fi
|
|||
set +x
|
||||
|
||||
echo "finished pretraining"
|
||||
|
||||
#Start Phase2
|
||||
|
||||
PREC=""
|
||||
if [ "$precision" = "fp16" ] ; then
|
||||
PREC="--fp16"
|
||||
elif [ "$precision" = "fp32" ] ; then
|
||||
PREC=""
|
||||
elif [ "$precision" = "tf32" ] ; then
|
||||
PREC=""
|
||||
else
|
||||
echo "Unknown <precision> argument"
|
||||
exit -2
|
||||
fi
|
||||
|
||||
ACCUMULATE_GRADIENTS=""
|
||||
if [ "$accumulate_gradients" == "true" ] ; then
|
||||
ACCUMULATE_GRADIENTS="--gradient_accumulation_steps=$gradient_accumulation_steps_phase2"
|
||||
fi
|
||||
|
||||
ALL_REDUCE_POST_ACCUMULATION=""
|
||||
if [ "$allreduce_post_accumulation" == "true" ] ; then
|
||||
ALL_REDUCE_POST_ACCUMULATION="--allreduce_post_accumulation"
|
||||
fi
|
||||
|
||||
ALL_REDUCE_POST_ACCUMULATION_FP16=""
|
||||
if [ "$allreduce_post_accumulation_fp16" == "true" ] ; then
|
||||
ALL_REDUCE_POST_ACCUMULATION_FP16="--allreduce_post_accumulation_fp16"
|
||||
fi
|
||||
|
||||
echo $DATA_DIR_PHASE2
|
||||
INPUT_DIR=$DATA_DIR_PHASE2
|
||||
CMD=" $CODEDIR/run_pretraining.py"
|
||||
CMD+=" --input_dir=$DATA_DIR_PHASE2"
|
||||
CMD+=" --output_dir=$CHECKPOINTS_DIR"
|
||||
CMD+=" --config_file=$BERT_CONFIG"
|
||||
CMD+=" --bert_model=bert-large-uncased"
|
||||
CMD+=" --train_batch_size=$train_batch_size_phase2"
|
||||
CMD+=" --max_seq_length=512"
|
||||
CMD+=" --max_predictions_per_seq=80"
|
||||
CMD+=" --max_steps=$train_steps_phase2"
|
||||
CMD+=" --warmup_proportion=$warmup_proportion_phase2"
|
||||
CMD+=" --num_steps_per_checkpoint=$save_checkpoint_steps"
|
||||
CMD+=" --learning_rate=$learning_rate_phase2"
|
||||
CMD+=" --seed=$seed"
|
||||
CMD+=" $PREC"
|
||||
CMD+=" $ACCUMULATE_GRADIENTS"
|
||||
CMD+=" $CHECKPOINT"
|
||||
CMD+=" $ALL_REDUCE_POST_ACCUMULATION"
|
||||
CMD+=" $ALL_REDUCE_POST_ACCUMULATION_FP16"
|
||||
CMD+=" --do_train --phase2 --resume_from_checkpoint --phase1_end_step=$train_steps"
|
||||
CMD+=" --json-summary ${RESULTS_DIR}/dllogger.json "
|
||||
|
||||
CMD="python3 -m torch.distributed.launch --nproc_per_node=$num_gpus $CMD"
|
||||
|
||||
if [ "$create_logfile" = "true" ] ; then
|
||||
export GBS=$(expr $train_batch_size_phase2 \* $num_gpus)
|
||||
printf -v TAG "pyt_bert_pretraining_phase2_%s_gbs%d" "$precision" $GBS
|
||||
DATESTAMP=`date +'%y%m%d%H%M%S'`
|
||||
LOGFILE=$RESULTS_DIR/$job_name.$TAG.$DATESTAMP.log
|
||||
printf "Logs written to %s\n" "$LOGFILE"
|
||||
fi
|
||||
|
||||
set -x
|
||||
if [ -z "$LOGFILE" ] ; then
|
||||
$CMD
|
||||
else
|
||||
(
|
||||
$CMD
|
||||
) |& tee $LOGFILE
|
||||
fi
|
||||
|
||||
set +x
|
||||
|
||||
echo "finished phase2"
|
||||
|
|
Loading…
Reference in a new issue