[Jasper/PyT] Triton update

This commit is contained in:
kkudrynski 2020-08-05 16:44:50 +02:00
parent 9fa75813e6
commit 557f4d01ea
22 changed files with 259 additions and 52 deletions

View file

@ -5,3 +5,5 @@ checkpoints/
datasets/
external/tensorrt-inference-server/
checkpoints/
triton/model_repo
triton/deploy

View file

@ -1,4 +1,4 @@
[submodule "external/tensorrt-inference-server"]
path = external/tensorrt-inference-server
url = https://github.com/NVIDIA/tensorrt-inference-server.git
branch = r19.06
[submodule "external/triton-inference-server"]
path = external/triton-inference-server
url = https://github.com/NVIDIA/triton-inference-server
branch = r19.12

View file

@ -0,0 +1,95 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Default setting is building on nvidia/cuda:10.1-devel-ubuntu18.04
ARG BASE_IMAGE=nvidia/cuda:10.1-devel-ubuntu18.04
FROM ${BASE_IMAGE}
# Default to use Python3. Allowed values are "2" and "3".
ARG PYVER=3
# Ensure apt-get won't prompt for selecting options
ENV DEBIAN_FRONTEND=noninteractive
ENV PYVER=$PYVER
RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
apt-get update && \
apt-get install -y --no-install-recommends \
software-properties-common \
autoconf \
automake \
build-essential \
cmake \
curl \
git \
libopencv-dev \
libopencv-core-dev \
libssl-dev \
libtool \
pkg-config \
python${PYSFX} \
python${PYSFX}-pip \
python${PYSFX}-dev && \
pip${PYSFX} install --upgrade setuptools wheel
RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
pip${PYSFX} install --upgrade grpcio-tools
# Build expects "python" executable (not python3).
RUN rm -f /usr/bin/python && \
ln -s /usr/bin/python$PYVER /usr/bin/python
# Build the client library and examples
WORKDIR /workspace
COPY VERSION .
COPY build build
COPY src/clients src/clients
COPY src/core src/core
RUN cd build && \
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH=/workspace/install && \
make -j16 trtis-clients
RUN cd install && \
export VERSION=`cat /workspace/VERSION` && \
tar zcf /workspace/v$VERSION.clients.tar.gz *
# For CI testing need to install a test script.
COPY qa/L0_client_tar/test.sh /tmp/test.sh
# Install an image needed by the quickstart and other documentation.
COPY qa/images/mug.jpg images/mug.jpg
# Install the dependencies needed to run the client examples. These
# are not needed for building but including them allows this image to
# be used to run the client examples. The special upgrade and handling
# of pip is needed to get numpy to install correctly with python2 on
# ubuntu 16.04.
RUN python -m pip install --user --upgrade pip && \
python -m pip install --upgrade install/python/tensorrtserver-*.whl numpy pillow
ENV PATH //workspace/install/bin:${PATH}
ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}

@ -0,0 +1 @@
Subproject commit a1f3860ba65c0fd8f2be3adfcab2673efd039348

View file

@ -56,6 +56,10 @@ def parse_args():
parser.add_argument("--wav", type=str, help='absolute path to .wav file (16KHz)')
parser.add_argument("--cpu", action="store_true", help="Run inference on CPU")
parser.add_argument("--ema", action="store_true", help="If available, load EMA model weights")
# FIXME Unused, but passed by Triton helper scripts
parser.add_argument("--pyt_fp16", action='store_true', help='use half precision')
return parser.parse_args()
def calc_wer(data_layer, audio_processor,

View file

@ -100,7 +100,7 @@ class SpecAugment(nn.Module):
def forward(self, x):
sh = x.shape
mask = torch.zeros(x.shape).byte()
mask = torch.zeros(x.shape, dtype=torch.bool)
for idx in range(sh[0]):
for _ in range(self.cutout_x_regions):
cutout_x_left = int(random.uniform(0, sh[1] - self.cutout_x_width))
@ -130,7 +130,7 @@ class SpecCutoutRegions(nn.Module):
def forward(self, x):
sh = x.shape
mask = torch.zeros(x.shape, dtype=torch.uint8)
mask = torch.zeros(x.shape, dtype=torch.bool)
for idx in range(sh[0]):
for i in range(self.cutout_rect_regions):
@ -275,7 +275,7 @@ class MaskedConv1d(nn.Conv1d):
def get_seq_len(self, lens):
return ((lens + 2 * self.padding[0] - self.dilation[0] * (
self.kernel_size[0] - 1) - 1) / self.stride[0] + 1)
self.kernel_size[0] - 1) - 1) // self.stride[0] + 1)
def forward(self, inp):
if self.use_conv_mask:

View file

@ -98,7 +98,7 @@ class AdamW(Optimizer):
state['step'] += 1
# Decay the first and second moment running average coefficient
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
if amsgrad:
# Maintains the maximum of all 2nd moment running avg. till now
@ -111,7 +111,7 @@ class AdamW(Optimizer):
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
p.data.add_(-step_size, torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom) )
p.data.add_(torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom), alpha=-step_size)
return loss
@ -201,7 +201,7 @@ class Novograd(Optimizer):
if exp_avg_sq == 0:
exp_avg_sq.copy_(norm)
else:
exp_avg_sq.mul_(beta2).add_(1 - beta2, norm)
exp_avg_sq.mul_(beta2).add_(norm, alpha=1 - beta2)
if amsgrad:
# Maintains the maximum of all 2nd moment running avg. till now
@ -213,11 +213,11 @@ class Novograd(Optimizer):
grad.div_(denom)
if group['weight_decay'] != 0:
grad.add_(group['weight_decay'], p.data)
grad.add_(p.data, alpha=group['weight_decay'])
if group['grad_averaging']:
grad.mul_(1 - beta1)
exp_avg.mul_(beta1).add_(grad)
p.data.add_(-group['lr'], exp_avg)
p.data.add_(exp_avg, alpha=-group['lr'])
return loss

View file

@ -47,9 +47,9 @@ CMD+=" --seed=$SEED"
CMD+=" --optimizer=novograd"
CMD+=" --dataset_dir=$DATA_DIR"
CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json"
CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,"
CMD+="$DATA_DIR/librispeech-train-clean-360-wav.json,"
CMD+="$DATA_DIR/librispeech-train-other-500-wav.json"
CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json"
CMD+=",$DATA_DIR/librispeech-train-clean-360-wav.json"
CMD+=",$DATA_DIR/librispeech-train-other-500-wav.json"
CMD+=" --weight_decay=1e-3"
CMD+=" --save_freq=$SAVE_FREQUENCY"
CMD+=" --eval_freq=100"

View file

@ -1,8 +1,10 @@
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.03-py3
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3
FROM ${FROM_IMAGE_NAME}
RUN apt-get update && apt-get install -y python3
WORKDIR /tmp/onnx-trt
COPY trt/onnx-trt.patch .
COPY tensorrt/onnx-trt.patch .
RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout 8716c9b && git submodule update --init --recursive && \
patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
@ -11,7 +13,7 @@ RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && g
# At the same step, also install TRT pip reqs
WORKDIR /tmp/pipReqs
COPY requirements.txt /tmp/pipReqs/jocRequirements.txt
COPY trt/requirements.txt /tmp/pipReqs/trtRequirements.txt
COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
RUN pip install --disable-pip-version-check -U -r jocRequirements.txt -r trtRequirements.txt

View file

@ -1,5 +1,5 @@
#!/bin/bash
# Constructs a docker image containing dependencies for execution of JASPER through TRT
echo "docker build . -f ./trt/Dockerfile -t jasper:trt6"
docker build . -f ./trt/Dockerfile -t jasper:trt6
echo "docker build . -f ./tensorrt/Dockerfile -t jasper:trt6"
docker build . -f ./tensorrt/Dockerfile -t jasper:trt6

View file

@ -130,7 +130,7 @@ else
PYT_PREDICTION_PATH=" --pyt_prediction_path=${PYT_PREDICTION_PATH}"
fi
CMD="python trt/perf.py"
CMD="python tensorrt/perf.py"
CMD+=" --batch_size $BATCH_SIZE"
CMD+=" --engine_batch_size $BATCH_SIZE"
CMD+=" --model_toml configs/jasper10x5dr_nomask.toml"

View file

@ -1,22 +1,38 @@
ARG FROM_IMAGE_NAME=nvcr.io/nvidian/pytorch:20.03-py3
ARG TRITON_BASE_IMAGE=nvcr.io/nvidia/tritonserver:20.03.1-py3-clientsdk
FROM ${TRITON_BASE_IMAGE} as triton
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3
FROM tensorrtserver_client as trtis-client
FROM ${FROM_IMAGE_NAME}
RUN apt-get update && apt-get install -y python3
ARG version=6.0.1-1+cuda10.1
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.1.243-1_amd64.deb \
&& dpkg -i cuda-repo-*.deb \
&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb \
&& dpkg -i nvidia-machine-learning-repo-*.deb \
&& apt-get update \
&& apt-get install -y --no-install-recommends libnvinfer6=${version} libnvonnxparsers6=${version} libnvparsers6=${version} libnvinfer-plugin6=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python-libnvinfer=${version} python3-libnvinfer=${version}
RUN cp -r /usr/lib/python3.6/dist-packages/tensorrt /opt/conda/lib/python3.6/site-packages/tensorrt
ADD requirements.txt .
RUN pip install -r requirements.txt
RUN pip install onnxruntime
ADD triton/requirements.txt .
RUN pip install -r requirements.txt
ENV PATH=$PATH:/usr/src/tensorrt/bin
WORKDIR /tmp/onnx-trt
COPY tensorrt/onnx-trt.patch .
RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout b677b9cbf19af803fa6f76d05ce558e657e4d8b6 && git submodule update --init --recursive && \
patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
ADD tensorrt/requirements.txt .
RUN pip install -r requirements.txt
COPY --from=triton /opt/tritonserver/qa/pkgs/tensorrtserver-1.13.0-py3-none-linux_x86_64.whl ./tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
# Here's a good place to install pip reqs from JoC repo.
# At the same step, also install TRT pip reqs
WORKDIR /tmp/pipReqs
COPY requirements.txt /tmp/pipReqs/pytRequirements.txt
COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
COPY triton/requirements.txt /tmp/pipReqs/trtisRequirements.txt
RUN apt-get update && apt-get install -y --no-install-recommends portaudio19-dev && pip install -r pytRequirements.txt && pip install -r trtRequirements.txt && pip install -r trtisRequirements.txt
RUN pip install tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
#Copy the perf_client over
COPY --from=trtis-client /workspace/install/bin/perf_client /workspace/install/bin/perf_client
#Copy the python wheel and install with pip
COPY --from=trtis-client /workspace/install/python/tensorrtserver*.whl /tmp/
RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
WORKDIR /workspace/jasper
COPY . .
RUN pip install --no-cache-dir -e .

View file

@ -4,5 +4,6 @@ SCRIPT_DIR=$(cd $(dirname $0); pwd)
PROJECT_DIR=${SCRIPT_DIR}/../../../
docker pull nvcr.io/nvidia/tensorrtserver:19.09-py3
git submodule update --init --recursive
docker build -t tensorrtserver_client -f ${PROJECT_DIR}/external/triton-inference-server/Dockerfile.client ${PROJECT_DIR}/external/triton-inference-server
docker build . --rm -f ${PROJECT_DIR}/trtis/Dockerfile -t jasper:trtis
docker build -t tensorrtserver_client \
-f ${PROJECT_DIR}/external/Dockerfile.client.patched ${PROJECT_DIR}/external/triton-inference-server
docker build . --rm -f ${PROJECT_DIR}/triton/Dockerfile -t jasper:triton

View file

@ -29,11 +29,11 @@ fi
echo $MOUNTS
docker run -it --rm \
--runtime=nvidia \
--gpus=all \
--shm-size=4g \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
${MOUNTS} \
-v ${JASPER_REPO}:/jasper \
${EXTRA_JASPER_ENV} \
jasper:trtis bash $PROGRAM_PATH
jasper:triton bash $PROGRAM_PATH

View file

@ -44,7 +44,7 @@ export GPU=${GPU:-}
SCRIPT_DIR=$(cd $(dirname $0); pwd)
PROJECT_DIR=${SCRIPT_DIR}/../..
MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/trtis/model_repo"}
MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/triton/model_repo"}
# We need to make sure TRTIS uses only one GPU, same as export does
# for TRTIS
@ -78,7 +78,7 @@ do
if [ "${REGENERATE_ENGINES}" == "yes" ]; then
ARCH=${ARCH} CHECKPOINT_DIR=${CHECKPOINT_DIR} CHECKPOINT=${CHECKPOINT} PRECISION=${PRECISION} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} \
${PROJECT_DIR}/trtis/scripts/export_model.sh || exit 1
${PROJECT_DIR}/triton/scripts/export_model.sh || exit 1
fi
for BATCH_SIZE in 1 2 4 8 16 32 64;

View file

@ -25,12 +25,12 @@ GPU=${GPU:-0}
SCRIPT_DIR=$(cd $(dirname $0); pwd)
PROJECT_DIR=${SCRIPT_DIR}/../..
if [ -f /.dockerenv ]; then # inside docker
CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/trtis/scripts/export_model_helper.sh || exit 1
CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/triton/scripts/export_model_helper.sh || exit 1
else
set -x
PROGRAM_PATH="/jasper/trtis/scripts/export_model_helper.sh" \
PROGRAM_PATH="/jasper/triton/scripts/export_model_helper.sh" \
EXTRA_JASPER_ENV="-e PRECISION=${PRECISION} -e CHECKPOINT=${CHECKPOINT} -e CHECKPOINT_DIR=/checkpoints -e ARCH=${ARCH} -e MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} -e CUDA_VISIBLE_DEVICES=${GPU}" \
CHECKPOINT_DIR=${CHECKPOINT_DIR} DATA_DIR= RESULT_DIR= \
${PROJECT_DIR}/trtis/scripts/docker/launch.sh || exit 1
${PROJECT_DIR}/triton/scripts/docker/launch.sh || exit 1
set +x
fi

View file

@ -66,13 +66,13 @@ echo "export_model.sh: Exporting TRT engine, CUDA ARCH = ${ARCH} ... "
PREC_FLAGS=""
if [ "$PRECISION" == "fp16" ]
then
PREC_FLAGS="--trt_fp16"
PREC_FLAGS="--trt_fp16"
fi
# remove targtes first
rm -f ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan ${MODEL_REPO}/jasper-onnx/1/jasper.onnx
python ${JASPER_REPO}/trt/perf.py \
python ${JASPER_REPO}/tensorrt/perf.py \
--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
--wav=${JASPER_REPO}/notebooks/example1.wav \
--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
@ -85,7 +85,7 @@ if [ "$PRECISION" == "fp16" ]
then
PREC_FLAGS="--trt_fp16 --pyt_fp16"
fi
python ${JASPER_REPO}/trt/perf.py \
python ${JASPER_REPO}/tensorrt/perf.py \
--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
--wav=${JASPER_REPO}/notebooks/example1.wav \
--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \

View file

@ -26,13 +26,13 @@ FILE=${3} # json manifest file, OR single wav file
JASPER_CONTAINER_TAG=${JASPER_CONTAINER_TAG:-jasper:trtis}
if [ "$#" -ge 1 ] && [ "${FILE: -4}" == ".wav" ]; then
CMD="python /jasper/trtis/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
CMD="python /jasper/triton/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
ARGS=""
ARGS="$ARGS -v $DATA_DIR:/data"
elif [ "$#" -ge 1 ] && [ "${FILE: -4}" == "json" ]; then
ARGS=""
ARGS="$ARGS -v $DATA_DIR:/data"
CMD="python /jasper/trtis/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
CMD="python /jasper/triton/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
else
ARGS="-it"
CMD=""
@ -49,4 +49,4 @@ nvidia-docker run --rm -it \
-v ${PROJECT_DIR}:/jasper \
--name=trtis-client \
${ARGS} ${JASPER_CONTAINER_TAG} ${CMD}
set +x
set +x

View file

@ -67,13 +67,13 @@ ARGS="\
curl -s "http://${SERVER_HOSTNAME}:8000/api/status/${MODEL_NAME}" | grep ready_state | grep SERVER_READY || (echo "Model ${MODEL_NAME} is not ready, perf_client skipped..." && exit 1)
echo "=== STARTING: perf client ${ARGS} --concurrency-range 1:4:1 ==="
docker run -e DISPLAY=${DISPLAY} --runtime nvidia --rm \
docker run -e DISPLAY=${DISPLAY} --gpus all --rm \
--privileged --net=host \
-v ${RESULT_DIR_H}:/results --name jasper-perf-client \
${TRTIS_CLIENT_CONTAINER_TAG} perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p1 --concurrency-range 1:4:1 2>&1 | tee -a $LOGNAME
echo "=== STARTING: perf client ${ARGS} --concurrency-range 8:${MAX_CONCURRENCY}:8 ==="
docker run -e DISPLAY=${DISPLAY} --runtime nvidia --rm \
docker run -e DISPLAY=${DISPLAY} --gpus all --rm \
--privileged --net=host \
-v ${RESULT_DIR_H}:/results --name jasper-perf-client \
${TRTIS_CLIENT_CONTAINER_TAG} perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p2 --concurrency-range 8:${MAX_CONCURRENCY}:8 2>&1 | tee -a $LOGNAME

View file

@ -48,7 +48,7 @@ RM=${RM:-"--rm"}
set -x
docker run -p 8000:8000 -p 8001:8001 -p 8002:8002 \
--runtime nvidia \
--gpus all \
-e NVIDIA_VISIBLE_DEVICES=${NV_VISIBLE_DEVICES} \
-v ${MODELS_DIR}:/models \
-v ${TRTIS_DIR}/model_repo:/model_repo \

View file

@ -0,0 +1,43 @@
set -o nounset
set -o errexit
set -o pipefail
cd ..
cp -r /data/joc/gnmt_tf/19.08 output_dir
# hack to work with pytorch dataset
sed -ie 's/ src_vocab_file = hparams.vocab_prefix + "." + hparams.src/ src_vocab_file = hparams.vocab_prefix/g' nmt.py
sed -ie 's/ tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/ tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
( python nmt.py --amp --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
python scripts/parse_log.py log.log | tee log.json
python << END
import json
import numpy as np
from pathlib import Path
baseline = 10254
bleu_baseline = 25.1
log = json.loads(Path('log.json').read_text())
speed = np.mean(log['eval_tokens_per_sec'])
bleu = log['bleu'][0]
print('Eval speed :', speed)
print('Baseline :', baseline)
print('Bleu :', bleu)
print('Bleu baseline :', bleu_baseline)
if speed < baseline * 0.9:
print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
exit(1)
if bleu < bleu_baseline - 0.2:
print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
exit(1)
print('SUCCESS')
END

View file

@ -0,0 +1,43 @@
set -o nounset
set -o errexit
set -o pipefail
cd ..
cp -r /data/joc/gnmt_tf/19.08 output_dir
# hack to work with pytorch dataset
sed -ie 's/ src_vocab_file = hparams.vocab_prefix + "." + hparams.src/ src_vocab_file = hparams.vocab_prefix/g' nmt.py
sed -ie 's/ tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/ tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
( python nmt.py --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
python scripts/parse_log.py log.log | tee log.json
python << END
import json
import numpy as np
from pathlib import Path
baseline = 5374
bleu_baseline = 25.1
log = json.loads(Path('log.json').read_text())
speed = np.mean(log['eval_tokens_per_sec'])
bleu = log['bleu'][0]
print('Eval speed :', speed)
print('Baseline :', baseline)
print('Bleu :', bleu)
print('Bleu baseline :', bleu_baseline)
if speed < baseline * 0.9:
print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
exit(1)
if bleu < bleu_baseline - 0.2:
print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
exit(1)
print('SUCCESS')
END