25 lines
593 B
Bash
Executable file
25 lines
593 B
Bash
Executable file
#! /bin/bash
|
|
|
|
set -e
|
|
|
|
USE_BERT_LARGE=true
|
|
MAX_SEQUENCE_LENGTH=512
|
|
MAX_PREDICTIONS_PER_SEQUENCE=80
|
|
MASKED_LM_PROB=0.15
|
|
SEED=12345
|
|
DUPE_FACTOR=5
|
|
DO_LOWER_CASE="True"
|
|
N_LINES_PER_SHARD_APPROX=396000 # Default=396000 creates 256 shards
|
|
|
|
N_PROCS_PREPROCESS=4 # Adjust this based on memory requirements and available number of cores
|
|
|
|
BERT_BASE_DIR="/workspace/bert/vocab/uncased_L-12_H-768_A-12"
|
|
BERT_LARGE_DIR="/workspace/bert/vocab/uncased_L-24_H-1024_A-16"
|
|
|
|
if [ "$USE_BERT_LARGE" = true ] ; then
|
|
VOCAB_FILE="${BERT_LARGE_DIR}/vocab.txt"
|
|
else
|
|
VOCAB_FILE="${BERT_BASE_DIR}/vocab.txt"
|
|
fi
|
|
|