diff --git a/PyTorch/SpeechRecognition/QuartzNet/.dockerignore b/PyTorch/SpeechRecognition/QuartzNet/.dockerignore
new file mode 100644
index 00000000..6fa7d3a6
--- /dev/null
+++ b/PyTorch/SpeechRecognition/QuartzNet/.dockerignore
@@ -0,0 +1,2 @@
+pretrained_models/
+results/
diff --git a/PyTorch/SpeechRecognition/QuartzNet/README.md b/PyTorch/SpeechRecognition/QuartzNet/README.md
index 5f9e1990..f1d4a751 100644
--- a/PyTorch/SpeechRecognition/QuartzNet/README.md
+++ b/PyTorch/SpeechRecognition/QuartzNet/README.md
@@ -264,7 +264,7 @@ To train your model using mixed or TF32 precision with Tensor Cores or using FP3
 7. Start inference/predictions.
    Inside the container, use the following script to run inference.
    Make sure the downloaded and preprocessed dataset is located at `$DATA_DIR/LibriSpeech` on the host, which is mounted as `/datasets/LibriSpeech` inside the container.
-   A pretrained model checkpoint can be downloaded from [NGC model repository](https://ngc.nvidia.com/catalog/models).
+   A pretrained model checkpoint can be downloaded from [NGC model repository](https://ngc.nvidia.com/catalog/models), manually or automatically using `scripts/download_quartznet.sh`.
 
    ```bash
    [OPTION1=value1 OPTION2=value2 ...] bash scripts/inference.sh
diff --git a/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh b/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh
index 9144fac9..f87cb1a6 100755
--- a/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh
+++ b/PyTorch/SpeechRecognition/QuartzNet/scripts/docker/launch.sh
@@ -4,13 +4,11 @@ SCRIPT_DIR=$(cd $(dirname $0); pwd)
 QN_REPO=${QN_REPO:-"${SCRIPT_DIR}/../.."}
 
 DATA_DIR=${1:-${DATA_DIR-${QN_REPO}"/datasets"}}
-CHECKPOINT_DIR=${2:-${CHECKPOINT_DIR:-${QN_REPO}"/checkpoints"}}
-RESULT_DIR=${3:-${RESULT_DIR:-${QN_REPO}"/results"}}
-PROGRAM_PATH=${PROGRAM_PATH}
+RESULT_DIR=${2:-${RESULT_DIR:-${QN_REPO}"/results"}}
+SCRIPT=${3:-${SCRIPT:-""}}
 
 MOUNTS=""
 MOUNTS+=" -v $DATA_DIR:/datasets"
-MOUNTS+=" -v $CHECKPOINT_DIR:/checkpoints"
 MOUNTS+=" -v $RESULT_DIR:/results"
 MOUNTS+=" -v ${QN_REPO}:/quartznet"
 
@@ -21,4 +19,4 @@ docker run -it --rm --gpus all\
   --ulimit stack=67108864 \
   $MOUNTS \
   -w /quartznet \
-  quartznet:latest bash $PROGRAM_PATH
+  quartznet:latest bash $SCRIPT
diff --git a/PyTorch/SpeechRecognition/QuartzNet/scripts/download_quartznet.sh b/PyTorch/SpeechRecognition/QuartzNet/scripts/download_quartznet.sh
new file mode 100755
index 00000000..3d54b31d
--- /dev/null
+++ b/PyTorch/SpeechRecognition/QuartzNet/scripts/download_quartznet.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+set -e
+
+: ${MODEL_DIR:="pretrained_models/quartznet"}
+MODEL_ZIP="quartznet_pyt_ckpt_amp_21.03.0.zip"
+MODEL="nvidia_quartznet_210504.pt"
+MODEL_URL="https://api.ngc.nvidia.com/v2/models/nvidia/quartznet_pyt_ckpt_amp/versions/21.03.0/zip"
+
+mkdir -p "$MODEL_DIR"
+
+if [ ! -f "${MODEL_DIR}/${MODEL_ZIP}" ]; then
+  echo "Downloading ${MODEL_ZIP} ..."
+  wget -qO ${MODEL_DIR}/${MODEL_ZIP} ${MODEL_URL} \
+       || { echo "ERROR: Failed to download ${MODEL_ZIP} from NGC"; exit 1; }
+fi
+
+if [ ! -f "${MODEL_DIR}/${MODEL}" ]; then
+  echo "Extracting ${MODEL} ..."
+  unzip -qo ${MODEL_DIR}/${MODEL_ZIP} -d ${MODEL_DIR} \
+        || { echo "ERROR: Failed to extract ${MODEL_ZIP}"; exit 1; }
+
+  echo "OK"
+
+else
+  echo "${MODEL} already downloaded."
+fi
diff --git a/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh b/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh
index c45289e5..4d6dcf2e 100755
--- a/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh
+++ b/PyTorch/SpeechRecognition/QuartzNet/scripts/inference.sh
@@ -17,7 +17,7 @@
 : ${DATA_DIR:=${1:-"/datasets/LibriSpeech"}}
 : ${MODEL_CONFIG:=${2:-"configs/quartznet15x5_speedp-online-1.15_speca.yaml"}}
 : ${OUTPUT_DIR:=${3:-"/results"}}
-: ${CHECKPOINT:=${4:-"/checkpoints/quartznet_fp16.pt"}}
+: ${CHECKPOINT:=${4:-"pretrained_models/quartznet/nvidia_quartznet_210504.pt"}}
 : ${DATASET:="test-other"}
 : ${LOG_FILE:=""}
 : ${CUDNN_BENCHMARK:=false}