DeepLearningExamples/PyTorch/LanguageModeling/BERT/data/utils/config.sh
Przemek Strzelczyk 0663b67c1a Updating models
2019-07-08 22:51:28 +02:00

25 lines
593 B
Bash
Executable file

#! /bin/bash
set -e
USE_BERT_LARGE=true
MAX_SEQUENCE_LENGTH=512
MAX_PREDICTIONS_PER_SEQUENCE=80
MASKED_LM_PROB=0.15
SEED=12345
DUPE_FACTOR=5
DO_LOWER_CASE="True"
N_LINES_PER_SHARD_APPROX=396000 # Default=396000 creates 256 shards
N_PROCS_PREPROCESS=4 # Adjust this based on memory requirements and available number of cores
BERT_BASE_DIR="/workspace/bert/vocab/uncased_L-12_H-768_A-12"
BERT_LARGE_DIR="/workspace/bert/vocab/uncased_L-24_H-1024_A-16"
if [ "$USE_BERT_LARGE" = true ] ; then
VOCAB_FILE="${BERT_LARGE_DIR}/vocab.txt"
else
VOCAB_FILE="${BERT_BASE_DIR}/vocab.txt"
fi