diff --git a/PyTorch/SpeechRecognition/QuartzNet/.dockerignore b/PyTorch/SpeechRecognition/QuartzNet/.dockerignore new file mode 100644 index 00000000..6fa7d3a6 --- /dev/null +++ b/PyTorch/SpeechRecognition/QuartzNet/.dockerignore @@ -0,0 +1,2 @@ +pretrained_models/ +results/ diff --git a/PyTorch/SpeechRecognition/QuartzNet/README.md b/PyTorch/SpeechRecognition/QuartzNet/README.md index 5f9e1990..f1d4a751 100644 --- a/PyTorch/SpeechRecognition/QuartzNet/README.md +++ b/PyTorch/SpeechRecognition/QuartzNet/README.md @@ -264,7 +264,7 @@ To train your model using mixed or TF32 precision with Tensor Cores or using FP3 7. Start inference/predictions. Inside the container, use the following script to run inference. Make sure the downloaded and preprocessed dataset is located at `$DATA_DIR/LibriSpeech` on the host, which is mounted as `/datasets/LibriSpeech` inside the container. - A pretrained model checkpoint can be downloaded from [NGC model repository](https://ngc.nvidia.com/catalog/models). + A pretrained model checkpoint can be downloaded from [NGC model repository](https://ngc.nvidia.com/catalog/models), manually or automatically using `scripts/download_quartznet.sh`. ```bash [OPTION1=value1 OPTION2=value2 ...] bash scripts/inference.sh diff --git a/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh b/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh index 9144fac9..f87cb1a6 100755 --- a/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh +++ b/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh @@ -4,13 +4,11 @@ SCRIPT_DIR=$(cd $(dirname $0); pwd) QN_REPO=${QN_REPO:-"${SCRIPT_DIR}/../.."} DATA_DIR=${1:-${DATA_DIR-${QN_REPO}"/datasets"}} -CHECKPOINT_DIR=${2:-${CHECKPOINT_DIR:-${QN_REPO}"/checkpoints"}} -RESULT_DIR=${3:-${RESULT_DIR:-${QN_REPO}"/results"}} -PROGRAM_PATH=${PROGRAM_PATH} +RESULT_DIR=${2:-${RESULT_DIR:-${QN_REPO}"/results"}} +SCRIPT=${3:-${SCRIPT:-""}} MOUNTS="" MOUNTS+=" -v $DATA_DIR:/datasets" -MOUNTS+=" -v $CHECKPOINT_DIR:/checkpoints" MOUNTS+=" -v $RESULT_DIR:/results" MOUNTS+=" -v ${QN_REPO}:/quartznet" @@ -21,4 +19,4 @@ docker run -it --rm --gpus all\ --ulimit stack=67108864 \ $MOUNTS \ -w /quartznet \ - quartznet:latest bash $PROGRAM_PATH + quartznet:latest bash $SCRIPT diff --git a/PyTorch/SpeechRecognition/QuartzNet/scripts/download_quartznet.sh b/PyTorch/SpeechRecognition/QuartzNet/scripts/download_quartznet.sh new file mode 100755 index 00000000..3d54b31d --- /dev/null +++ b/PyTorch/SpeechRecognition/QuartzNet/scripts/download_quartznet.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +set -e + +: ${MODEL_DIR:="pretrained_models/quartznet"} +MODEL_ZIP="quartznet_pyt_ckpt_amp_21.03.0.zip" +MODEL="nvidia_quartznet_210504.pt" +MODEL_URL="https://api.ngc.nvidia.com/v2/models/nvidia/quartznet_pyt_ckpt_amp/versions/21.03.0/zip" + +mkdir -p "$MODEL_DIR" + +if [ ! -f "${MODEL_DIR}/${MODEL_ZIP}" ]; then + echo "Downloading ${MODEL_ZIP} ..." + wget -qO ${MODEL_DIR}/${MODEL_ZIP} ${MODEL_URL} \ + || { echo "ERROR: Failed to download ${MODEL_ZIP} from NGC"; exit 1; } +fi + +if [ ! -f "${MODEL_DIR}/${MODEL}" ]; then + echo "Extracting ${MODEL} ..." + unzip -qo ${MODEL_DIR}/${MODEL_ZIP} -d ${MODEL_DIR} \ + || { echo "ERROR: Failed to extract ${MODEL_ZIP}"; exit 1; } + + echo "OK" + +else + echo "${MODEL} already downloaded." +fi diff --git a/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh b/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh index c45289e5..4d6dcf2e 100755 --- a/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh +++ b/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh @@ -17,7 +17,7 @@ : ${DATA_DIR:=${1:-"/datasets/LibriSpeech"}} : ${MODEL_CONFIG:=${2:-"configs/quartznet15x5_speedp-online-1.15_speca.yaml"}} : ${OUTPUT_DIR:=${3:-"/results"}} -: ${CHECKPOINT:=${4:-"/checkpoints/quartznet_fp16.pt"}} +: ${CHECKPOINT:=${4:-"pretrained_models/quartznet/nvidia_quartznet_210504.pt"}} : ${DATASET:="test-other"} : ${LOG_FILE:=""} : ${CUDNN_BENCHMARK:=false}