[Jasper/PyT] Triton update
This commit is contained in:
parent
9fa75813e6
commit
557f4d01ea
|
@ -5,3 +5,5 @@ checkpoints/
|
|||
datasets/
|
||||
external/tensorrt-inference-server/
|
||||
checkpoints/
|
||||
triton/model_repo
|
||||
triton/deploy
|
||||
|
|
8
PyTorch/SpeechRecognition/Jasper/.gitmodules
vendored
8
PyTorch/SpeechRecognition/Jasper/.gitmodules
vendored
|
@ -1,4 +1,4 @@
|
|||
[submodule "external/tensorrt-inference-server"]
|
||||
path = external/tensorrt-inference-server
|
||||
url = https://github.com/NVIDIA/tensorrt-inference-server.git
|
||||
branch = r19.06
|
||||
[submodule "external/triton-inference-server"]
|
||||
path = external/triton-inference-server
|
||||
url = https://github.com/NVIDIA/triton-inference-server
|
||||
branch = r19.12
|
||||
|
|
95
PyTorch/SpeechRecognition/Jasper/external/Dockerfile.client.patched
vendored
Normal file
95
PyTorch/SpeechRecognition/Jasper/external/Dockerfile.client.patched
vendored
Normal file
|
@ -0,0 +1,95 @@
|
|||
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
# Default setting is building on nvidia/cuda:10.1-devel-ubuntu18.04
|
||||
ARG BASE_IMAGE=nvidia/cuda:10.1-devel-ubuntu18.04
|
||||
|
||||
FROM ${BASE_IMAGE}
|
||||
|
||||
# Default to use Python3. Allowed values are "2" and "3".
|
||||
ARG PYVER=3
|
||||
|
||||
# Ensure apt-get won't prompt for selecting options
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
ENV PYVER=$PYVER
|
||||
|
||||
RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
software-properties-common \
|
||||
autoconf \
|
||||
automake \
|
||||
build-essential \
|
||||
cmake \
|
||||
curl \
|
||||
git \
|
||||
libopencv-dev \
|
||||
libopencv-core-dev \
|
||||
libssl-dev \
|
||||
libtool \
|
||||
pkg-config \
|
||||
python${PYSFX} \
|
||||
python${PYSFX}-pip \
|
||||
python${PYSFX}-dev && \
|
||||
pip${PYSFX} install --upgrade setuptools wheel
|
||||
|
||||
RUN PYSFX=`[ "$PYVER" != "2" ] && echo "$PYVER" || echo ""` && \
|
||||
pip${PYSFX} install --upgrade grpcio-tools
|
||||
|
||||
# Build expects "python" executable (not python3).
|
||||
RUN rm -f /usr/bin/python && \
|
||||
ln -s /usr/bin/python$PYVER /usr/bin/python
|
||||
|
||||
# Build the client library and examples
|
||||
WORKDIR /workspace
|
||||
COPY VERSION .
|
||||
COPY build build
|
||||
COPY src/clients src/clients
|
||||
COPY src/core src/core
|
||||
|
||||
RUN cd build && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX:PATH=/workspace/install && \
|
||||
make -j16 trtis-clients
|
||||
RUN cd install && \
|
||||
export VERSION=`cat /workspace/VERSION` && \
|
||||
tar zcf /workspace/v$VERSION.clients.tar.gz *
|
||||
|
||||
# For CI testing need to install a test script.
|
||||
COPY qa/L0_client_tar/test.sh /tmp/test.sh
|
||||
|
||||
# Install an image needed by the quickstart and other documentation.
|
||||
COPY qa/images/mug.jpg images/mug.jpg
|
||||
|
||||
# Install the dependencies needed to run the client examples. These
|
||||
# are not needed for building but including them allows this image to
|
||||
# be used to run the client examples. The special upgrade and handling
|
||||
# of pip is needed to get numpy to install correctly with python2 on
|
||||
# ubuntu 16.04.
|
||||
RUN python -m pip install --user --upgrade pip && \
|
||||
python -m pip install --upgrade install/python/tensorrtserver-*.whl numpy pillow
|
||||
|
||||
ENV PATH //workspace/install/bin:${PATH}
|
||||
ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
|
1
PyTorch/SpeechRecognition/Jasper/external/triton-inference-server
vendored
Submodule
1
PyTorch/SpeechRecognition/Jasper/external/triton-inference-server
vendored
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit a1f3860ba65c0fd8f2be3adfcab2673efd039348
|
|
@ -56,6 +56,10 @@ def parse_args():
|
|||
parser.add_argument("--wav", type=str, help='absolute path to .wav file (16KHz)')
|
||||
parser.add_argument("--cpu", action="store_true", help="Run inference on CPU")
|
||||
parser.add_argument("--ema", action="store_true", help="If available, load EMA model weights")
|
||||
|
||||
# FIXME Unused, but passed by Triton helper scripts
|
||||
parser.add_argument("--pyt_fp16", action='store_true', help='use half precision')
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
def calc_wer(data_layer, audio_processor,
|
||||
|
|
|
@ -100,7 +100,7 @@ class SpecAugment(nn.Module):
|
|||
def forward(self, x):
|
||||
sh = x.shape
|
||||
|
||||
mask = torch.zeros(x.shape).byte()
|
||||
mask = torch.zeros(x.shape, dtype=torch.bool)
|
||||
for idx in range(sh[0]):
|
||||
for _ in range(self.cutout_x_regions):
|
||||
cutout_x_left = int(random.uniform(0, sh[1] - self.cutout_x_width))
|
||||
|
@ -130,7 +130,7 @@ class SpecCutoutRegions(nn.Module):
|
|||
def forward(self, x):
|
||||
sh = x.shape
|
||||
|
||||
mask = torch.zeros(x.shape, dtype=torch.uint8)
|
||||
mask = torch.zeros(x.shape, dtype=torch.bool)
|
||||
|
||||
for idx in range(sh[0]):
|
||||
for i in range(self.cutout_rect_regions):
|
||||
|
@ -275,7 +275,7 @@ class MaskedConv1d(nn.Conv1d):
|
|||
|
||||
def get_seq_len(self, lens):
|
||||
return ((lens + 2 * self.padding[0] - self.dilation[0] * (
|
||||
self.kernel_size[0] - 1) - 1) / self.stride[0] + 1)
|
||||
self.kernel_size[0] - 1) - 1) // self.stride[0] + 1)
|
||||
|
||||
def forward(self, inp):
|
||||
if self.use_conv_mask:
|
||||
|
|
|
@ -98,7 +98,7 @@ class AdamW(Optimizer):
|
|||
|
||||
state['step'] += 1
|
||||
# Decay the first and second moment running average coefficient
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
if amsgrad:
|
||||
# Maintains the maximum of all 2nd moment running avg. till now
|
||||
|
@ -111,7 +111,7 @@ class AdamW(Optimizer):
|
|||
bias_correction1 = 1 - beta1 ** state['step']
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
step_size = group['lr'] * math.sqrt(bias_correction2) / bias_correction1
|
||||
p.data.add_(-step_size, torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom) )
|
||||
p.data.add_(torch.mul(p.data, group['weight_decay']).addcdiv_(1, exp_avg, denom), alpha=-step_size)
|
||||
|
||||
return loss
|
||||
|
||||
|
@ -201,7 +201,7 @@ class Novograd(Optimizer):
|
|||
if exp_avg_sq == 0:
|
||||
exp_avg_sq.copy_(norm)
|
||||
else:
|
||||
exp_avg_sq.mul_(beta2).add_(1 - beta2, norm)
|
||||
exp_avg_sq.mul_(beta2).add_(norm, alpha=1 - beta2)
|
||||
|
||||
if amsgrad:
|
||||
# Maintains the maximum of all 2nd moment running avg. till now
|
||||
|
@ -213,11 +213,11 @@ class Novograd(Optimizer):
|
|||
|
||||
grad.div_(denom)
|
||||
if group['weight_decay'] != 0:
|
||||
grad.add_(group['weight_decay'], p.data)
|
||||
grad.add_(p.data, alpha=group['weight_decay'])
|
||||
if group['grad_averaging']:
|
||||
grad.mul_(1 - beta1)
|
||||
exp_avg.mul_(beta1).add_(grad)
|
||||
|
||||
p.data.add_(-group['lr'], exp_avg)
|
||||
p.data.add_(exp_avg, alpha=-group['lr'])
|
||||
|
||||
return loss
|
||||
|
|
|
@ -47,9 +47,9 @@ CMD+=" --seed=$SEED"
|
|||
CMD+=" --optimizer=novograd"
|
||||
CMD+=" --dataset_dir=$DATA_DIR"
|
||||
CMD+=" --val_manifest=$DATA_DIR/librispeech-dev-clean-wav.json"
|
||||
CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json,"
|
||||
CMD+="$DATA_DIR/librispeech-train-clean-360-wav.json,"
|
||||
CMD+="$DATA_DIR/librispeech-train-other-500-wav.json"
|
||||
CMD+=" --train_manifest=$DATA_DIR/librispeech-train-clean-100-wav.json"
|
||||
CMD+=",$DATA_DIR/librispeech-train-clean-360-wav.json"
|
||||
CMD+=",$DATA_DIR/librispeech-train-other-500-wav.json"
|
||||
CMD+=" --weight_decay=1e-3"
|
||||
CMD+=" --save_freq=$SAVE_FREQUENCY"
|
||||
CMD+=" --eval_freq=100"
|
||||
|
|
|
@ -1,8 +1,10 @@
|
|||
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.03-py3
|
||||
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3
|
||||
FROM ${FROM_IMAGE_NAME}
|
||||
|
||||
RUN apt-get update && apt-get install -y python3
|
||||
|
||||
WORKDIR /tmp/onnx-trt
|
||||
COPY trt/onnx-trt.patch .
|
||||
COPY tensorrt/onnx-trt.patch .
|
||||
RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout 8716c9b && git submodule update --init --recursive && \
|
||||
patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
|
||||
|
||||
|
@ -11,7 +13,7 @@ RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && g
|
|||
# At the same step, also install TRT pip reqs
|
||||
WORKDIR /tmp/pipReqs
|
||||
COPY requirements.txt /tmp/pipReqs/jocRequirements.txt
|
||||
COPY trt/requirements.txt /tmp/pipReqs/trtRequirements.txt
|
||||
COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
|
||||
RUN pip install --disable-pip-version-check -U -r jocRequirements.txt -r trtRequirements.txt
|
||||
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Constructs a docker image containing dependencies for execution of JASPER through TRT
|
||||
echo "docker build . -f ./trt/Dockerfile -t jasper:trt6"
|
||||
docker build . -f ./trt/Dockerfile -t jasper:trt6
|
||||
echo "docker build . -f ./tensorrt/Dockerfile -t jasper:trt6"
|
||||
docker build . -f ./tensorrt/Dockerfile -t jasper:trt6
|
||||
|
|
|
@ -130,7 +130,7 @@ else
|
|||
PYT_PREDICTION_PATH=" --pyt_prediction_path=${PYT_PREDICTION_PATH}"
|
||||
fi
|
||||
|
||||
CMD="python trt/perf.py"
|
||||
CMD="python tensorrt/perf.py"
|
||||
CMD+=" --batch_size $BATCH_SIZE"
|
||||
CMD+=" --engine_batch_size $BATCH_SIZE"
|
||||
CMD+=" --model_toml configs/jasper10x5dr_nomask.toml"
|
||||
|
|
|
@ -1,22 +1,38 @@
|
|||
ARG FROM_IMAGE_NAME=nvcr.io/nvidian/pytorch:20.03-py3
|
||||
ARG TRITON_BASE_IMAGE=nvcr.io/nvidia/tritonserver:20.03.1-py3-clientsdk
|
||||
FROM ${TRITON_BASE_IMAGE} as triton
|
||||
ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.09-py3
|
||||
|
||||
FROM tensorrtserver_client as trtis-client
|
||||
FROM ${FROM_IMAGE_NAME}
|
||||
RUN apt-get update && apt-get install -y python3
|
||||
ARG version=6.0.1-1+cuda10.1
|
||||
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_10.1.243-1_amd64.deb \
|
||||
&& dpkg -i cuda-repo-*.deb \
|
||||
&& wget https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/nvidia-machine-learning-repo-ubuntu1804_1.0.0-1_amd64.deb \
|
||||
&& dpkg -i nvidia-machine-learning-repo-*.deb \
|
||||
&& apt-get update \
|
||||
&& apt-get install -y --no-install-recommends libnvinfer6=${version} libnvonnxparsers6=${version} libnvparsers6=${version} libnvinfer-plugin6=${version} libnvinfer-dev=${version} libnvonnxparsers-dev=${version} libnvparsers-dev=${version} libnvinfer-plugin-dev=${version} python-libnvinfer=${version} python3-libnvinfer=${version}
|
||||
RUN cp -r /usr/lib/python3.6/dist-packages/tensorrt /opt/conda/lib/python3.6/site-packages/tensorrt
|
||||
|
||||
ADD requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
RUN pip install onnxruntime
|
||||
|
||||
ADD triton/requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
ENV PATH=$PATH:/usr/src/tensorrt/bin
|
||||
WORKDIR /tmp/onnx-trt
|
||||
COPY tensorrt/onnx-trt.patch .
|
||||
RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git checkout b677b9cbf19af803fa6f76d05ce558e657e4d8b6 && git submodule update --init --recursive && \
|
||||
patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
|
||||
|
||||
ADD tensorrt/requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
COPY --from=triton /opt/tritonserver/qa/pkgs/tensorrtserver-1.13.0-py3-none-linux_x86_64.whl ./tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
|
||||
# Here's a good place to install pip reqs from JoC repo.
|
||||
# At the same step, also install TRT pip reqs
|
||||
WORKDIR /tmp/pipReqs
|
||||
COPY requirements.txt /tmp/pipReqs/pytRequirements.txt
|
||||
COPY tensorrt/requirements.txt /tmp/pipReqs/trtRequirements.txt
|
||||
COPY triton/requirements.txt /tmp/pipReqs/trtisRequirements.txt
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends portaudio19-dev && pip install -r pytRequirements.txt && pip install -r trtRequirements.txt && pip install -r trtisRequirements.txt
|
||||
|
||||
RUN pip install tensorrtserver-1.13.0-py3-none-linux_x86_64.whl
|
||||
#Copy the perf_client over
|
||||
COPY --from=trtis-client /workspace/install/bin/perf_client /workspace/install/bin/perf_client
|
||||
#Copy the python wheel and install with pip
|
||||
COPY --from=trtis-client /workspace/install/python/tensorrtserver*.whl /tmp/
|
||||
RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
|
||||
|
||||
WORKDIR /workspace/jasper
|
||||
COPY . .
|
||||
RUN pip install --no-cache-dir -e .
|
||||
|
|
|
@ -4,5 +4,6 @@ SCRIPT_DIR=$(cd $(dirname $0); pwd)
|
|||
PROJECT_DIR=${SCRIPT_DIR}/../../../
|
||||
docker pull nvcr.io/nvidia/tensorrtserver:19.09-py3
|
||||
git submodule update --init --recursive
|
||||
docker build -t tensorrtserver_client -f ${PROJECT_DIR}/external/triton-inference-server/Dockerfile.client ${PROJECT_DIR}/external/triton-inference-server
|
||||
docker build . --rm -f ${PROJECT_DIR}/trtis/Dockerfile -t jasper:trtis
|
||||
docker build -t tensorrtserver_client \
|
||||
-f ${PROJECT_DIR}/external/Dockerfile.client.patched ${PROJECT_DIR}/external/triton-inference-server
|
||||
docker build . --rm -f ${PROJECT_DIR}/triton/Dockerfile -t jasper:triton
|
||||
|
|
|
@ -29,11 +29,11 @@ fi
|
|||
|
||||
echo $MOUNTS
|
||||
docker run -it --rm \
|
||||
--runtime=nvidia \
|
||||
--gpus=all \
|
||||
--shm-size=4g \
|
||||
--ulimit memlock=-1 \
|
||||
--ulimit stack=67108864 \
|
||||
${MOUNTS} \
|
||||
-v ${JASPER_REPO}:/jasper \
|
||||
${EXTRA_JASPER_ENV} \
|
||||
jasper:trtis bash $PROGRAM_PATH
|
||||
jasper:triton bash $PROGRAM_PATH
|
||||
|
|
|
@ -44,7 +44,7 @@ export GPU=${GPU:-}
|
|||
|
||||
SCRIPT_DIR=$(cd $(dirname $0); pwd)
|
||||
PROJECT_DIR=${SCRIPT_DIR}/../..
|
||||
MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/trtis/model_repo"}
|
||||
MODEL_REPO=${MODEL_REPO:-"${PROJECT_DIR}/triton/model_repo"}
|
||||
|
||||
# We need to make sure TRTIS uses only one GPU, same as export does
|
||||
# for TRTIS
|
||||
|
@ -78,7 +78,7 @@ do
|
|||
|
||||
if [ "${REGENERATE_ENGINES}" == "yes" ]; then
|
||||
ARCH=${ARCH} CHECKPOINT_DIR=${CHECKPOINT_DIR} CHECKPOINT=${CHECKPOINT} PRECISION=${PRECISION} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} \
|
||||
${PROJECT_DIR}/trtis/scripts/export_model.sh || exit 1
|
||||
${PROJECT_DIR}/triton/scripts/export_model.sh || exit 1
|
||||
fi
|
||||
|
||||
for BATCH_SIZE in 1 2 4 8 16 32 64;
|
||||
|
|
|
@ -25,12 +25,12 @@ GPU=${GPU:-0}
|
|||
SCRIPT_DIR=$(cd $(dirname $0); pwd)
|
||||
PROJECT_DIR=${SCRIPT_DIR}/../..
|
||||
if [ -f /.dockerenv ]; then # inside docker
|
||||
CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/trtis/scripts/export_model_helper.sh || exit 1
|
||||
CUDA_VISIBLE_DEVICES=${GPU} CHECKPOINT=${CHECKPOINT} CHECKPOINT_DIR=${CHECKPOINT_DIR} PRECISION=${PRECISION} ARCH=${ARCH} MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} ${PROJECT_DIR}/triton/scripts/export_model_helper.sh || exit 1
|
||||
else
|
||||
set -x
|
||||
PROGRAM_PATH="/jasper/trtis/scripts/export_model_helper.sh" \
|
||||
PROGRAM_PATH="/jasper/triton/scripts/export_model_helper.sh" \
|
||||
EXTRA_JASPER_ENV="-e PRECISION=${PRECISION} -e CHECKPOINT=${CHECKPOINT} -e CHECKPOINT_DIR=/checkpoints -e ARCH=${ARCH} -e MAX_SEQUENCE_LENGTH_FOR_ENGINE=${MAX_SEQUENCE_LENGTH_FOR_ENGINE} -e CUDA_VISIBLE_DEVICES=${GPU}" \
|
||||
CHECKPOINT_DIR=${CHECKPOINT_DIR} DATA_DIR= RESULT_DIR= \
|
||||
${PROJECT_DIR}/trtis/scripts/docker/launch.sh || exit 1
|
||||
${PROJECT_DIR}/triton/scripts/docker/launch.sh || exit 1
|
||||
set +x
|
||||
fi
|
||||
|
|
|
@ -66,13 +66,13 @@ echo "export_model.sh: Exporting TRT engine, CUDA ARCH = ${ARCH} ... "
|
|||
PREC_FLAGS=""
|
||||
if [ "$PRECISION" == "fp16" ]
|
||||
then
|
||||
PREC_FLAGS="--trt_fp16"
|
||||
PREC_FLAGS="--trt_fp16"
|
||||
fi
|
||||
|
||||
# remove targtes first
|
||||
rm -f ${MODEL_REPO}/jasper-trt/1/jasper_${ARCH}.plan ${MODEL_REPO}/jasper-onnx/1/jasper.onnx
|
||||
|
||||
python ${JASPER_REPO}/trt/perf.py \
|
||||
python ${JASPER_REPO}/tensorrt/perf.py \
|
||||
--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
|
||||
--wav=${JASPER_REPO}/notebooks/example1.wav \
|
||||
--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
|
||||
|
@ -85,7 +85,7 @@ if [ "$PRECISION" == "fp16" ]
|
|||
then
|
||||
PREC_FLAGS="--trt_fp16 --pyt_fp16"
|
||||
fi
|
||||
python ${JASPER_REPO}/trt/perf.py \
|
||||
python ${JASPER_REPO}/tensorrt/perf.py \
|
||||
--ckpt_path ${CHECKPOINT_DIR}/${CHECKPOINT} \
|
||||
--wav=${JASPER_REPO}/notebooks/example1.wav \
|
||||
--model_toml=${JASPER_REPO}/configs/${MODEL_CONFIG} \
|
||||
|
|
|
@ -26,13 +26,13 @@ FILE=${3} # json manifest file, OR single wav file
|
|||
JASPER_CONTAINER_TAG=${JASPER_CONTAINER_TAG:-jasper:trtis}
|
||||
|
||||
if [ "$#" -ge 1 ] && [ "${FILE: -4}" == ".wav" ]; then
|
||||
CMD="python /jasper/trtis/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
|
||||
CMD="python /jasper/triton/jasper-client.py --data_dir /data --audio_filename ${FILE} --model_platform ${MODEL_TYPE}"
|
||||
ARGS=""
|
||||
ARGS="$ARGS -v $DATA_DIR:/data"
|
||||
elif [ "$#" -ge 1 ] && [ "${FILE: -4}" == "json" ]; then
|
||||
ARGS=""
|
||||
ARGS="$ARGS -v $DATA_DIR:/data"
|
||||
CMD="python /jasper/trtis/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
|
||||
CMD="python /jasper/triton/jasper-client.py --manifest_filename ${FILE} --model_platform ${MODEL_TYPE} --data_dir /data"
|
||||
else
|
||||
ARGS="-it"
|
||||
CMD=""
|
||||
|
@ -49,4 +49,4 @@ nvidia-docker run --rm -it \
|
|||
-v ${PROJECT_DIR}:/jasper \
|
||||
--name=trtis-client \
|
||||
${ARGS} ${JASPER_CONTAINER_TAG} ${CMD}
|
||||
set +x
|
||||
set +x
|
||||
|
|
|
@ -67,13 +67,13 @@ ARGS="\
|
|||
curl -s "http://${SERVER_HOSTNAME}:8000/api/status/${MODEL_NAME}" | grep ready_state | grep SERVER_READY || (echo "Model ${MODEL_NAME} is not ready, perf_client skipped..." && exit 1)
|
||||
|
||||
echo "=== STARTING: perf client ${ARGS} --concurrency-range 1:4:1 ==="
|
||||
docker run -e DISPLAY=${DISPLAY} --runtime nvidia --rm \
|
||||
docker run -e DISPLAY=${DISPLAY} --gpus all --rm \
|
||||
--privileged --net=host \
|
||||
-v ${RESULT_DIR_H}:/results --name jasper-perf-client \
|
||||
${TRTIS_CLIENT_CONTAINER_TAG} perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p1 --concurrency-range 1:4:1 2>&1 | tee -a $LOGNAME
|
||||
|
||||
echo "=== STARTING: perf client ${ARGS} --concurrency-range 8:${MAX_CONCURRENCY}:8 ==="
|
||||
docker run -e DISPLAY=${DISPLAY} --runtime nvidia --rm \
|
||||
docker run -e DISPLAY=${DISPLAY} --gpus all --rm \
|
||||
--privileged --net=host \
|
||||
-v ${RESULT_DIR_H}:/results --name jasper-perf-client \
|
||||
${TRTIS_CLIENT_CONTAINER_TAG} perf_client $ARGS -f /results/${OUTPUT_FILE_CSV}_p2 --concurrency-range 8:${MAX_CONCURRENCY}:8 2>&1 | tee -a $LOGNAME
|
||||
|
|
|
@ -48,7 +48,7 @@ RM=${RM:-"--rm"}
|
|||
|
||||
set -x
|
||||
docker run -p 8000:8000 -p 8001:8001 -p 8002:8002 \
|
||||
--runtime nvidia \
|
||||
--gpus all \
|
||||
-e NVIDIA_VISIBLE_DEVICES=${NV_VISIBLE_DEVICES} \
|
||||
-v ${MODELS_DIR}:/models \
|
||||
-v ${TRTIS_DIR}/model_repo:/model_repo \
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
set -o nounset
|
||||
set -o errexit
|
||||
set -o pipefail
|
||||
|
||||
cd ..
|
||||
cp -r /data/joc/gnmt_tf/19.08 output_dir
|
||||
|
||||
# hack to work with pytorch dataset
|
||||
sed -ie 's/ src_vocab_file = hparams.vocab_prefix + "." + hparams.src/ src_vocab_file = hparams.vocab_prefix/g' nmt.py
|
||||
sed -ie 's/ tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/ tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
|
||||
|
||||
( python nmt.py --amp --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
|
||||
python scripts/parse_log.py log.log | tee log.json
|
||||
|
||||
python << END
|
||||
import json
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
baseline = 10254
|
||||
bleu_baseline = 25.1
|
||||
|
||||
log = json.loads(Path('log.json').read_text())
|
||||
speed = np.mean(log['eval_tokens_per_sec'])
|
||||
bleu = log['bleu'][0]
|
||||
|
||||
print('Eval speed :', speed)
|
||||
print('Baseline :', baseline)
|
||||
|
||||
print('Bleu :', bleu)
|
||||
print('Bleu baseline :', bleu_baseline)
|
||||
|
||||
if speed < baseline * 0.9:
|
||||
print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
|
||||
exit(1)
|
||||
|
||||
if bleu < bleu_baseline - 0.2:
|
||||
print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
|
||||
exit(1)
|
||||
|
||||
print('SUCCESS')
|
||||
END
|
||||
|
|
@ -0,0 +1,43 @@
|
|||
set -o nounset
|
||||
set -o errexit
|
||||
set -o pipefail
|
||||
|
||||
cd ..
|
||||
cp -r /data/joc/gnmt_tf/19.08 output_dir
|
||||
|
||||
# hack to work with pytorch dataset
|
||||
sed -ie 's/ src_vocab_file = hparams.vocab_prefix + "." + hparams.src/ src_vocab_file = hparams.vocab_prefix/g' nmt.py
|
||||
sed -ie 's/ tgt_vocab_file = hparams.vocab_prefix + "." + hparams.tgt/ tgt_vocab_file = hparams.vocab_prefix/g' nmt.py
|
||||
|
||||
( python nmt.py --data_dir=/data/pytorch/wmt16_de_en --output_dir=output_dir --mode=infer --infer_batch_size=512 2>&1 ) | tee log.log
|
||||
python scripts/parse_log.py log.log | tee log.json
|
||||
|
||||
python << END
|
||||
import json
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
|
||||
baseline = 5374
|
||||
bleu_baseline = 25.1
|
||||
|
||||
log = json.loads(Path('log.json').read_text())
|
||||
speed = np.mean(log['eval_tokens_per_sec'])
|
||||
bleu = log['bleu'][0]
|
||||
|
||||
print('Eval speed :', speed)
|
||||
print('Baseline :', baseline)
|
||||
|
||||
print('Bleu :', bleu)
|
||||
print('Bleu baseline :', bleu_baseline)
|
||||
|
||||
if speed < baseline * 0.9:
|
||||
print("FAILED: speed ({}) doesn't match the baseline ({})".format(speed, baseline))
|
||||
exit(1)
|
||||
|
||||
if bleu < bleu_baseline - 0.2:
|
||||
print("FAILED: bleu ({}) doesn't match the baseline ({})".format(bleu, bleu_baseline))
|
||||
exit(1)
|
||||
|
||||
print('SUCCESS')
|
||||
END
|
||||
|
Loading…
Reference in a new issue