[Tacotron2/PyT] custom TensorRT backend on TensorRT Inference Server; Conversional AI demo; fixed checkpoints loading; fixed FP16 export to TensorRT

2020-04-02 17:18:26 +02:00 · 2020-04-02 17:18:26 +02:00 · 5e3b487b89
parent 157a3acaa9
commit 5e3b487b89
244 changed files with 62925 additions and 61 deletions
--- a/PyTorch/SpeechSynthesis/Tacotron2/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/README.md
@ -710,6 +710,12 @@ December 2019
 January 2020
 * Updated batch sizes and performance results for Tacotron 2.

+March 2020
+* Added Tacotron 2 and WaveGlow inference using TensorRT Inference Server with custom TensorRT backend in `trtis_cpp`
+* Added Conversational AI demo script in `notebooks/conversationalai`
+* Fixed loading CUDA RNG state in `load_checkpoint()` function in `train.py`
+* Fixed FP16 export to TensorRT in `trt/README.md`
+
 ### Known issues

 There are no known issues in this release.
--- a/PyTorch/SpeechSynthesis/Tacotron2/exports/export_tacotron2_onnx.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/exports/export_tacotron2_onnx.py
@ -45,6 +45,8 @@ def parse_args(parser):
                        help='full path to the Tacotron2 model checkpoint file')
    parser.add_argument('-o', '--output', type=str, required=True,
                        help='Directory for the exported Tacotron 2 ONNX model')
+    parser.add_argument('--fp16', action='store_true',
+                        help='Export with half precision to ONNX')

    return parser

@ -105,7 +107,7 @@ def prenet_infer(self, x):
    for linear in self.layers:
        x1 = F.relu(linear(x1))
        x0 = x1[0].unsqueeze(0)
-        mask = torch.le(torch.rand(256, device='cuda').to(torch.float32), 0.5).to(torch.float32)
+        mask = torch.le(torch.rand(256, device='cuda').to(x.dtype), 0.5).to(x.dtype)
        mask = mask.expand(x1.size(0), x1.size(1))
        x1 = x1*mask*2.0

@ -216,7 +218,6 @@ class DecoderIter(torch.nn.Module):
        return outputs


-
 def test_inference(encoder, decoder_iter, postnet):

    encoder.eval()
@ -236,6 +237,7 @@ def test_inference(encoder, decoder_iter, postnet):

    print("Running Tacotron2 Decoder")
    device = memory.device
+    dtype = memory.dtype
    mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device)
    not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device)
    mel_outputs, gate_outputs, alignments = (torch.zeros(1), torch.zeros(1), torch.zeros(1))
@ -295,7 +297,8 @@ def main():
    parser = parse_args(parser)
    args, _ = parser.parse_known_args()

-    tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, False)
+    tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2,
+                                     amp_run=args.fp16)

    opset_version = 10

@ -323,6 +326,8 @@ def main():

    decoder_iter = DecoderIter(tacotron2)
    memory = torch.randn((1,sequence_lengths[0],512)).cuda() #encoder_outputs
+    if args.fp16:
+        memory = memory.half()
    memory_lengths = sequence_lengths
    # initialize decoder states for dummy_input
    decoder_input = tacotron2.decoder.get_go_frame(memory)
@ -399,6 +404,8 @@ def main():

    postnet = Postnet(tacotron2)
    dummy_input = torch.randn((1,80,620)).cuda()
+    if args.fp16:
+        dummy_input = dummy_input.half()
    torch.onnx.export(postnet, dummy_input, args.output+"/"+"postnet.onnx",
                      opset_version=opset_version,
                      do_constant_folding=True,
--- a/PyTorch/SpeechSynthesis/Tacotron2/exports/export_tacotron2_ts_config.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/exports/export_tacotron2_ts_config.py
@ -94,6 +94,11 @@ output [
    data_type: TYPE_INT32
    dims: [1]
    reshape: {{ shape: [ ] }}
+  }},
+  {{
+    name: "alignments__2"
+    data_type: {fp_type}
+    dims: [-1,-1]
  }}
 ]
 """
--- a/PyTorch/SpeechSynthesis/Tacotron2/exports/export_waveglow_onnx.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/exports/export_waveglow_onnx.py
@ -42,7 +42,7 @@ def parse_args(parser):
                        help='full path to the WaveGlow model checkpoint file')
    parser.add_argument('-o', '--output', type=str, required=True,
                        help='Directory for the exported WaveGlow ONNX model')
-    parser.add_argument('--amp-run', action='store_true',
+    parser.add_argument('--fp16', action='store_true',
                        help='inference with AMP')
    parser.add_argument('-s', '--sigma-infer', default=0.6, type=float)

@ -165,20 +165,16 @@ def infer_onnx(self, spect, z, sigma=0.9):
 def export_onnx(parser, args):

    waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow,
-                                    args.amp_run, forward_is_infer=False)
+                                    args.fp16, forward_is_infer=False)

    # 80 mel channels, 620 mel spectrograms ~ 7 seconds of speech
    mel = torch.randn(1, 80, 620).cuda()
    stride = 256 # value from waveglow upsample
-    kernel_size = 1024 # value from waveglow upsample
    n_group = 8
-    z_size2 = (mel.size(2)-1)*stride+(kernel_size-1)+1
-    # corresponds to cutoff in infer_onnx
-    z_size2 = z_size2 - (kernel_size-stride)
-    z_size2 = z_size2//n_group
+    z_size2 = (mel.size(2)*stride)//n_group
    z = torch.randn(1, n_group, z_size2, 1).cuda()

-    if args.amp_run:
+    if args.fp16:
        mel = mel.half()
        z = z.half()
    with torch.no_grad():
@ -187,12 +183,12 @@ def export_onnx(parser, args):

        # export to ONNX
        convert_1d_to_2d_(waveglow)
+        if args.fp16:
+            waveglow = waveglow.half()

        fType = types.MethodType
        waveglow.forward = fType(infer_onnx, waveglow)

-        if args.amp_run:
-            waveglow.half()
        mel = mel.unsqueeze(3)

        opset_version = 10
--- a/PyTorch/SpeechSynthesis/Tacotron2/inference.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/inference.py
@ -31,6 +31,8 @@ import torch
 import argparse
 import numpy as np
 from scipy.io.wavfile import write
+import matplotlib
+import matplotlib.pyplot as plt

 import sys

@ -208,14 +210,14 @@ def main():
    except:
        print("Could not read file")
        sys.exit(1)
-    
+
    if args.include_warmup:
        sequence = torch.randint(low=0, high=148, size=(1,50),
                                 dtype=torch.long).cuda()
        input_lengths = torch.IntTensor([sequence.size(1)]).cuda().long()
        for i in range(3):
            with torch.no_grad():
-                mel, mel_lengths = jitted_tacotron2(sequence, input_lengths)
+                mel, mel_lengths, _ = jitted_tacotron2(sequence, input_lengths)
                _ = waveglow(mel)

    measurements = {}
@ -223,7 +225,7 @@ def main():
    sequences_padded, input_lengths = prepare_input_sequence(texts)

    with torch.no_grad(), MeasureTime(measurements, "tacotron2_time"):
-        mel, mel_lengths = jitted_tacotron2(sequences_padded, input_lengths)
+        mel, mel_lengths, alignments = jitted_tacotron2(sequences_padded, input_lengths)

    with torch.no_grad(), MeasureTime(measurements, "waveglow_time"):
        audios = waveglow(mel, sigma=args.sigma_infer)
@ -240,7 +242,14 @@ def main():
    DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']})
    DLLogger.log(step=0, data={"latency": (measurements['tacotron2_time']+measurements['waveglow_time'])})

+    alignments = alignments.unfold(1, audios.size(0), audios.size(0)).transpose(0,2)
+
    for i, audio in enumerate(audios):
+
+        plt.imshow(alignments[i].float().data.cpu().numpy().T, aspect="auto", origin="lower")
+        figure_path = args.output+"alignment_"+str(i)+"_"+args.suffix+".png"
+        plt.savefig(figure_path)
+
        audio = audio[:mel_lengths[i]*args.stft_hop_length]
        audio = audio/torch.max(torch.abs(audio))
        audio_path = args.output+"audio_"+str(i)+"_"+args.suffix+".wav"
--- a/PyTorch/SpeechSynthesis/Tacotron2/inference_perf.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/inference_perf.py
@ -95,7 +95,7 @@ def main():
                                        dtype=torch.long).cuda()
            input_lengths = torch.IntTensor([text_padded.size(1)]*args.batch_size).cuda().long()
            with torch.no_grad(), MeasureTime(measurements, "inference_time"):
-                mels, _ = model(text_padded, input_lengths)
+                mels, _, _ = model(text_padded, input_lengths)
            num_items = mels.size(0)*mels.size(2)

        if args.model_name == 'WaveGlow':
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/README.md
@ -0,0 +1,233 @@
+
+## Model Preparation
+
+### Clone the repository
+
+```bash
+git clone https://github.com/NVIDIA/DeepLearningExamples.git
+cd DeepLearningExamples
+```
+
+You will build our ConversationalAI in the Tacotron2 folder:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai
+```
+
+### Download checkpoints
+
+Download the PyTorch checkpoints from [NGC](https://ngc.nvidia.com/models):
+* [Jasper](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_fp16/files)
+
+```bash
+wget https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_fp16/versions/1/files/jasper_fp16.pt
+```
+
+
+* [BERT](https://ngc.nvidia.com/catalog/models/nvidia:bert_large_pyt_amp_ckpt_squad_qa1_1/files?version=1)
+
+```bash
+wget https://api.ngc.nvidia.com/v2/models/nvidia/bert_large_pyt_amp_ckpt_squad_qa1_1/versions/1/files/bert_large_qa.pt
+```
+
+
+* [Tacotron 2](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16/files?version=2)
+```bash
+wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2pyt_fp16/versions/2/files/nvidia_tacotron2pyt_fp16_20190427
+```
+
+
+* [WaveGlow](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16/files)
+```bash
+wget https://api.ngc.nvidia.com/v2/models/nvidia/waveglow256pyt_fp16/versions/1/files/nvidia_waveglow256pyt_fp16
+```
+
+
+Move the downloaded checkpoints to `models` directory:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai
+bert_large_qa.pt nvidia_tacotron2pyt_fp16_20190427 nvidia_waveglow256pyt_fp16 models/
+```
+
+### Prepare Jasper
+
+First, let's generate a TensorRT engine for Jasper using TensorRT version 7.
+
+Download the Jasper checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_fp16/files) 
+and move it to `Jasper/checkpoints/` direcotry:
+
+```bash
+mkdir -p DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/checkpoints
+mv jasper_fp16.pt DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/checkpoints
+```
+
+Apply a patch to enable support of TensorRT 7:
+
+```bash 
+cd DeepLearningExamples/ 
+git apply --ignore-space-change --reject --whitespace=fix ../patch_jasper_trt7
+```
+
+Now, build a container for Jasper:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/
+bash trt/scripts/docker/build.sh
+```
+
+To run the container, type:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechRecognition/Jasper
+export JASPER_DIR=${PWD}
+export DATA_DIR=$JASPER_DIR/data/
+export CHECKPOINT_DIR=$JASPER_DIR/checkpoints/
+export RESULT_DIR=$JASPER_DIR/results/
+cd $JASPER_DIR
+mkdir -p $DATA_DIR $CHECKPOINT_DIR $RESULT_DIR
+bash trt/scripts/docker/launch.sh $DATA_DIR $CHECKPOINT_DIR $RESULT_DIR
+```
+
+Inside the container export Jasper TensorRT engine by executing:
+
+```bash
+mkdir -p /results/onnxs/ /results/engines/
+cd /jasper
+python trt/perf.py --batch_size 1 --engine_batch_size 1 --model_toml configs/jasper10x5dr_nomask.toml --ckpt_path /checkpoints/jasper_fp16.pt --trt_fp16 --pyt_fp16 --engine_path /results/engines/fp16_DYNAMIC.engine --onnx_path /results/onnxs/fp32_DYNAMIC.onnx --seq_len 3600 --make_onnx
+```
+
+After successful export, copy the engine to model_repo:
+
+```bash
+cd DeepLearningExamples/Pytorch
+mkdir -p SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/1
+cp SpeechRecognition/Jasper/results/engines/fp16_DYNAMIC.engine SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/1/jasper_fp16.engine
+```
+
+You will also need Jasper feature extractor and decoder. Download them from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_jit_fp16/files) and move to the model_repo:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/
+mkdir -p jasper-decoder/1 jasper-feature-extractor/1
+wget -P jasper-decoder/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-decoder/config.pbtxt
+wget -P jasper-decoder/1/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-decoder/1/jasper-decoder.pt
+wget -P jasper-feature-extractor/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-feature-extractor/config.pbtxt
+wget -P jasper-feature-extractor/1/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-feature-extractor/1/jasper-feature-extractor.pt
+```
+
+### Prepare BERT
+
+With the generated Jasper model, we can proceed to BERT.
+
+Download the BERT checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:bert_large_pyt_amp_ckpt_squad_qa1_1/files) 
+and move it to `BERT/checkpoints/` direcotry:
+
+```bash
+mkdir -p DeepLearningExamples/PyTorch/LanguageModeling/BERT/checkpoints/
+mv bert_large_qa.pt DeepLearningExamples/PyTorch/LanguageModeling/BERT/checkpoints/
+```
+
+Now, build a container for BERT:
+
+```bash
+cd PyTorch/LanguageModeling/BERT/
+bash scripts/docker/build.sh
+```
+
+Use the Triton export script to convert the model `checkpoints/bert_large_qa.pt` to ONNX:
+
+```bash
+bash triton/export_model.sh
+```
+
+The model will be saved in `results/triton_models/bertQA-onnx`, together with Triton configuration file. Copy the model and configuration file to the model_repo:
+
+```bash
+cd DeepLearningExamples
+cp -r PyTorch/LanguageModeling/BERT/results/triton_models/bertQA-onnx DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/
+```
+
+### Prepare Tacotron 2 and WaveGlow
+
+Now to the final part - TTS system.
+
+Download the [Tacotron 2](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16/files?version=2) and [WaveGlow](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16/files) checkpoints from [NGC](https://ngc.nvidia.com/catalog/models/) 
+and move them to `Tacotron2/checkpoints/` direcotry:
+
+```bash
+mkdir -p DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/checkpoints/
+mv nvidia_tacotron2pyt_fp16_20190427 nvidia_waveglow256pyt_fp16 DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/checkpoints/
+```
+
+Build the Tacotron 2 container:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/
+bash scripts/docker/build.sh
+```
+
+Run the container in th interactive mode by typing:
+```bash
+bash scripts/docker/interactive.sh
+```
+
+Export Tacotron 2 to TorchScript:
+
+```bash
+cd /workspace/tacotron2/
+mkdir -p output
+python exports/export_tacotron2_ts.py --tacotron2 checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/model.pt --amp-run
+```
+
+To export WaveGlow to TensorRT 7, install ONNX-TRT
+
+```bash
+cd /workspace && git clone https://github.com/onnx/onnx-tensorrt.git
+cd /workspace/onnx-tensorrt/ && git submodule update --init --recursive
+cd /workspace/onnx-tensorrt && mkdir -p build
+cd /workspace/onnx-tensorrt/build && cmake .. -DCMAKE_CXX_FLAGS=-isystem\\ /usr/local/cuda/include && make -j12 && make install
+cd /workspace/tacotron2
+```
+
+Export WaveGlow to ONNX intermediate representation:
+
+```bash
+python exports/export_waveglow_onnx.py --waveglow checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 --amp-run -o output/
+```
+
+Use the exported ONNX IR to generate TensorRT engine:
+
+```bash
+python trt/export_onnx2trt.py --waveglow output/waveglow.onnx -o output/ --fp16
+```
+
+After successful export, exit the container and copy the Tacotron 2 model and the WaveGlow engine to `model_repo`:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/
+mkdir -p notebooks/conversationalai/model_repo/tacotron2/1/ notebooks/conversationalai/model_repo/waveglow-trt/1/
+cp output/model.pt notebooks/conversationalai/model_repo/tacotron2/1/
+cp output/waveglow_fp16.engine mnotebooks/conversationalai/odel_repo/waveglow-trt/1/
+```
+## Deployment
+
+Will all models ready for deployment, go to the `conversationalai/client` folder and build the Triron client:
+
+```bash
+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client
+docker build -f Dockerfile --network=host -t speech_ai_client:demo .
+```
+
+From terminal start the Triton server:
+
+```bash
+NV_GPU=1 nvidia-docker run --ipc=host --network=host --rm -p8000:8000 -p8001:8001 \\
+-v /home/gkarch/dev/gtc2020/speechai/model_repo/:/models nvcr.io/nvidia/tensorrtserver:20.01-py3 trtserver --model-store=/models --log-verbose 1
+```
+
+In another another terminal run the client:
+
+```bash
+docker run -it --rm --network=host --device /dev/snd:/dev/snd --device /dev/usb:/dev/usb speech_ai_client:demo bash /workspace/speech_ai_demo/start_jupyter.sh
+```
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/Dockerfile
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/Dockerfile
@ -0,0 +1,41 @@
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM gitlab-master.nvidia.com:5005/dl/dgx/tensorrtserver:master-py3.1164446-client AS trtserver
+# FROM nvcr.io/nvidia/tensorrtserver:20.01-py3-clientsdk AS trtserver
+FROM continuumio/miniconda3
+RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract mc iputils-ping wget
+
+WORKDIR /workspace/speech_ai_demo/
+
+# Copy the perf_client over
+COPY --from=trtserver /workspace/install/ /workspace/install/
+ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
+
+# set up env variables
+ENV PATH="$PATH:/opt/conda/bin"
+RUN cd /workspace/speech_ai_demo/
+
+# jupyter lab extensions
+RUN conda install -c conda-forge jupyterlab=1.0 ipywidgets=7.5 nodejs python-sounddevice librosa unidecode inflect
+RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager
+RUN pip install /workspace/install/python/tensorrtserver*.whl
+
+# Copy the python wheel and install with pip
+COPY --from=trtserver /workspace/install/python/tensorrtserver*.whl /tmp/
+RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
+
+COPY start_jupyter.sh /workspace/speech_ai_demo/
+COPY speech_ai_demo/utils /workspace/speech_ai_demo/utils
+COPY speech_ai_demo/speech_ai_demo.ipynb /workspace/speech_ai_demo/
+RUN chmod a+x /workspace/speech_ai_demo/start_jupyter.sh
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/speech_ai_demo.ipynb
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/speech_ai_demo.ipynb
@ -0,0 +1,486 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import sounddevice as sd\n",
+    "print(sd.query_devices())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sd.default.device = 11"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import time\n",
+    "import numpy as np\n",
+    "import collections\n",
+    "%matplotlib inline\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib import cm as cm\n",
+    "from IPython.display import Audio, display, clear_output, Markdown, Image\n",
+    "import librosa\n",
+    "import librosa.display\n",
+    "import ipywidgets as widgets\n",
+    "# \n",
+    "# import tacotron2 preprocessing utilities\n",
+    "from utils.tacotron2.symbols import symbols\n",
+    "from utils.tacotron2 import text_to_sequence as text_to_sequence_internal\n",
+    "# import bert pre- and postprocessing utilities\n",
+    "from utils.bert.preprocessing import convert_example_to_feature, read_squad_example, get_predictions\n",
+    "from utils.bert.tokenization import BertTokenizer\n",
+    "# import jasper pre- and postprocessing utilities\n",
+    "from utils.jasper.speech_utils import AudioSegment, SpeechClient\n",
+    "# import trtis api\n",
+    "from tensorrtserver.api import *\n",
+    "\n",
+    "\n",
+    "defaults = {\n",
+    "    # settings\n",
+    "    'sigma_infer': 0.6,                         # don't touch this\n",
+    "    'sampling_rate': 22050,                     # don't touch this\n",
+    "    'stft_hop_length': 256,                     # don't touch this\n",
+    "    'url': 'localhost:8000',                    # don't touch this\n",
+    "    'protocol': 0,                              # 0: http, 1: grpc \n",
+    "    'autoplay': True,                           # autoplay\n",
+    "    'character_limit_min': 4,                   # don't touch this\n",
+    "    'character_limit_max': 124,                 # don't touch this\n",
+    "    'vocab_file': \"./utils/bert/vocab.txt\",     # don't touch this\n",
+    "    'do_lower_case': True,                      # don't touch this\n",
+    "    'version_2_with_negative': False,           # if true, the model may give 'i don't know' as an answer. the model has to be trained for it. \n",
+    "    'max_seq_length': 384,                      # the maximum total input sequence length after WordPiece tokenization. Sequences longer than this will be truncated, and sequences shorter than this will be padded. \n",
+    "    'doc_stride': 128,                          # when splitting up a long document into chunks, how much stride to take between chunks\n",
+    "    'max_query_length': 64,                     # the maximum number of tokens in the question. Questions longer than this will be truncated to this length\n",
+    "    'n_best_size': 10,                          # don't touch this\n",
+    "    'max_answer_length': 30,                    # don't touch this\n",
+    "    'do_lower_case': True,                      # don't touch this\n",
+    "    'null_score_diff_threshold': 0.0,           # don't touch this\n",
+    "    'jasper_batch_size': 1,                     # don't touch this\n",
+    "    'jasper_sampling_rate': 44100,              # don't touch this\n",
+    "    'record_maximum_seconds': 4.0               # maximum number of seconds to record\n",
+    "}\n",
+    "\n",
+    "\n",
+    "# create args object\n",
+    "class Struct:\n",
+    "    def __init__(self, **entries):\n",
+    "        self.__dict__.update(entries)\n",
+    "\n",
+    "\n",
+    "args = Struct(**defaults)\n",
+    "\n",
+    "\n",
+    "# create the inference context for the models\n",
+    "infer_ctx_bert = InferContext(args.url, args.protocol, 'bertQA-onnx', -1)\n",
+    "infer_ctx_tacotron2 = InferContext(args.url, args.protocol, 'tacotron2', -1)\n",
+    "infer_ctx_waveglow = InferContext(args.url, args.protocol, 'waveglow-trt', -1)\n",
+    "infer_jasper = SpeechClient(args.url, args.protocol, 'jasper-trt-ensemble', -1, \n",
+    "                            args.jasper_batch_size, 'pyt', verbose=False, \n",
+    "                            mode='asynchronous', from_features=False)\n",
+    "\n",
+    "\n",
+    "def display_sequences(sequences, labels, colors):\n",
+    "    ''' displays sequences on a dotted plot '''\n",
+    "    plt.figure(figsize=(10, 2.5))\n",
+    "    plt.tick_params(\n",
+    "        axis='both',\n",
+    "        which='both',\n",
+    "        bottom=False,\n",
+    "        top=False,\n",
+    "        left=False,\n",
+    "        right=False,\n",
+    "        labelbottom=False,\n",
+    "        labelleft=False)\n",
+    "    for sequence,color,label in zip(sequences,colors,labels):\n",
+    "        plt.plot(sequence, color, label=label)\n",
+    "    plt.legend(loc='upper right')\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "def display_heatmap(sequence, title='preprocessed text'):\n",
+    "    ''' displays sequence as a heatmap '''\n",
+    "    clear_output(wait=True)\n",
+    "    sequence = sequence[None, :]\n",
+    "    plt.figure(figsize=(10, 2.5))\n",
+    "    plt.title(title)\n",
+    "    plt.tick_params(\n",
+    "        axis='both',\n",
+    "        which='both',\n",
+    "        bottom=False,\n",
+    "        top=False,\n",
+    "        left=False,\n",
+    "        right=False,\n",
+    "        labelbottom=False,\n",
+    "        labelleft=False)\n",
+    "    plt.imshow(sequence, cmap='BrBG_r', interpolation='nearest')\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "def display_sound(signal, title, color):\n",
+    "    ''' displays signal '''\n",
+    "    clear_output(wait=True)\n",
+    "    plt.figure(figsize=(10, 2.5))\n",
+    "    plt.title(title)\n",
+    "    plt.tick_params(\n",
+    "        axis='both',\n",
+    "        which='both',\n",
+    "        bottom=True,\n",
+    "        top=False,\n",
+    "        left=False,\n",
+    "        right=False,\n",
+    "        labelbottom=True,\n",
+    "        labelleft=False)\n",
+    "    librosa.display.waveplot(signal, color=color)\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "def display_spectrogram(mel, title):\n",
+    "    ''' displays mel spectrogram '''\n",
+    "    clear_output(wait=True)\n",
+    "    fig = plt.figure(figsize=(10, 2.5))\n",
+    "    ax = fig.add_subplot(111)\n",
+    "#    plt.title(title)\n",
+    "    plt.tick_params(\n",
+    "        axis='both',\n",
+    "        which='both',\n",
+    "        bottom=True,\n",
+    "        top=False,\n",
+    "        left=False,\n",
+    "        right=False,\n",
+    "        labelbottom=True,\n",
+    "        labelleft=False)\n",
+    "    plt.xlabel('Time')\n",
+    "    cmap = cm.get_cmap('jet', 30)\n",
+    "    cax = ax.imshow(mel.astype(np.float32), interpolation=\"nearest\", cmap=cmap)\n",
+    "    ax.grid(True)\n",
+    "    plt.show()\n",
+    "\n",
+    "\n",
+    "def text_to_sequence(text):\n",
+    "    ''' preprocessor of tacotron2\n",
+    "        ::text:: the input str\n",
+    "        ::returns:: sequence, the preprocessed text\n",
+    "    '''\n",
+    "    sequence = text_to_sequence_internal(text, ['english_cleaners'])\n",
+    "    sequence = np.array(sequence, dtype=np.int64)\n",
+    "    return sequence\n",
+    "\n",
+    "\n",
+    "def sequence_to_mel(sequence):\n",
+    "    ''' calls tacotron2\n",
+    "        ::sequence:: int64 numpy array, contains the preprocessed text\n",
+    "        ::returns:: (mel, mel_lengths) pair\n",
+    "                     mel is the mel-spectrogram, np.array\n",
+    "                     mel_lengths contains the length of the unpadded mel, np.array\n",
+    "    '''\n",
+    "    input_lengths = [len(sequence)]\n",
+    "    input_lengths = np.array(input_lengths, dtype=np.int64)\n",
+    "    # prepare input/output\n",
+    "    input_dict = {}\n",
+    "    input_dict['sequence__0'] = (sequence,)\n",
+    "    input_dict['input_lengths__1'] = (input_lengths,)\n",
+    "    output_dict = {}\n",
+    "    output_dict['mel_outputs_postnet__0'] = InferContext.ResultFormat.RAW\n",
+    "    output_dict['mel_lengths__1'] = InferContext.ResultFormat.RAW\n",
+    "    batch_size = 1\n",
+    "    # call tacotron2\n",
+    "    result = infer_ctx_tacotron2.run(input_dict, output_dict, batch_size)\n",
+    "    # get results\n",
+    "    mel = result['mel_outputs_postnet__0'][0] # take only the first instance in the output batch\n",
+    "    mel_lengths = result['mel_lengths__1'][0] # take only the first instance in the output batch\n",
+    "    return mel, mel_lengths\n",
+    "\n",
+    "\n",
+    "def mel_to_signal(mel, mel_lengths):\n",
+    "    ''' calls waveglow\n",
+    "        ::mel:: mel spectrogram\n",
+    "        ::mel_lengths:: original length of mel spectrogram\n",
+    "        ::returns:: waveform\n",
+    "    '''\n",
+    "    # padding/trimming mel to dimension 620\n",
+    "    mel = mel[:,:,None]\n",
+    "    # prepare input/output\n",
+    "    input_dict = {}\n",
+    "    input_dict['mel'] = (mel,)\n",
+    "    stride = 256\n",
+    "    kernel_size = 1024\n",
+    "    n_group = 8\n",
+    "    z_size = (mel.shape[1]-1)*stride + (kernel_size-1) + 1 - (kernel_size-stride)\n",
+    "    z_size = z_size//n_group\n",
+    "    shape = (n_group,z_size,1)\n",
+    "    input_dict['z'] = np.random.normal(0.0, 1.0, shape).astype(mel.dtype)\n",
+    "    input_dict['z'] = (input_dict['z'],)\n",
+    "    output_dict = {}\n",
+    "    output_dict['audio'] = InferContext.ResultFormat.RAW\n",
+    "    batch_size = 1\n",
+    "    # call waveglow\n",
+    "    result = infer_ctx_waveglow.run(input_dict, output_dict, batch_size)\n",
+    "    # get the results\n",
+    "    signal = result['audio'][0] # take only the first instance in the output batch\n",
+    "    # postprocessing of waveglow: trimming signal to its actual size\n",
+    "    trimmed_length = mel_lengths[0] * args.stft_hop_length\n",
+    "    signal = signal[:trimmed_length] # trim\n",
+    "    signal = signal.astype(np.float32)\n",
+    "    return signal\n",
+    "\n",
+    "\n",
+    "def question_and_context_to_feature(question_text, context):\n",
+    "    tokenizer = BertTokenizer(args.vocab_file, do_lower_case=args.do_lower_case, max_len=512) # for bert large\n",
+    "    example = read_squad_example(question_text, \n",
+    "                                 context, \n",
+    "                                 version_2_with_negative=args.version_2_with_negative)\n",
+    "    feature = convert_example_to_feature(\n",
+    "        example=example, \n",
+    "        tokenizer=tokenizer, \n",
+    "        max_seq_length=args.max_seq_length, \n",
+    "        doc_stride=args.doc_stride, \n",
+    "        max_query_length=args.max_query_length)\n",
+    "    return example, feature\n",
+    "\n",
+    "\n",
+    "def button_rec_clicked(change):\n",
+    "    if record_seconds.value > 0.0:\n",
+    "        with plot_jasper_audio:\n",
+    "            clear_output(wait=True)\n",
+    "            recording = sd.rec(int(record_seconds.value*args.jasper_sampling_rate), samplerate=args.jasper_sampling_rate, channels=1)\n",
+    "            while record_seconds.value > 0:\n",
+    "                time.sleep(0.01)\n",
+    "                record_seconds.value -= 0.01\n",
+    "            sd.wait()\n",
+    "            recording = recording.squeeze()\n",
+    "            display_sound(recording,'recorded audio','orange')\n",
+    "            audio = AudioSegment(recording, args.jasper_sampling_rate).samples\n",
+    "        hypotheses = infer_jasper.recognize([audio], ['audio recording'])\n",
+    "        question_text.value = str(hypotheses[0]) + '? '\n",
+    "\n",
+    "\n",
+    "button_rec = widgets.Button(description=\"RECORD\")\n",
+    "button_rec.on_click(button_rec_clicked)\n",
+    "record_seconds = widgets.FloatSlider(min=0.0, max=args.record_maximum_seconds, value=args.record_maximum_seconds, \n",
+    "                                     step=0.1, continuous_update=True, description = \"seconds\")\n",
+    "buttons = widgets.HBox([button_rec, record_seconds])\n",
+    "\n",
+    "\n",
+    "question_text = widgets.Textarea(\n",
+    "    value='jasper output / bert input question',\n",
+    "    placeholder='',\n",
+    "    description='',\n",
+    "    disabled=False,\n",
+    "    continuous_update=True,\n",
+    "    layout=widgets.Layout(width='550px', height='40px')\n",
+    ")\n",
+    "\n",
+    "\n",
+    "context = widgets.Textarea(\n",
+    "    value='bert input context',\n",
+    "    placeholder='',\n",
+    "    description='',\n",
+    "    disabled=False,\n",
+    "    continuous_update=True,\n",
+    "    layout=widgets.Layout(width='550px', height='80px')\n",
+    ")\n",
+    "\n",
+    "question_context = widgets.HBox([question_text, context])\n",
+    "\n",
+    "response_text = widgets.Textarea(\n",
+    "    value='',\n",
+    "    placeholder='',\n",
+    "    description='',\n",
+    "    disabled=False,\n",
+    "    continuous_update=True,\n",
+    "    layout=widgets.Layout(width='550px', height='40px')\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def text_to_logits(input_ids_data, segment_ids_data, input_mask_data):\n",
+    "    # call bert\n",
+    "    input_dict = {}\n",
+    "    input_dict['input__0']   = (input_ids_data.astype(np.int64),)\n",
+    "    input_dict['input__1'] = (segment_ids_data.astype(np.int64),)\n",
+    "    input_dict['input__2']  = (input_mask_data.astype(np.int64),)\n",
+    "    batch_size = 1\n",
+    "    output_dict = {}\n",
+    "    output_dict['output__0'] = InferContext.ResultFormat.RAW\n",
+    "    output_dict['output__1']   = InferContext.ResultFormat.RAW\n",
+    "    # \n",
+    "    result = infer_ctx_bert.run(input_dict, output_dict, batch_size)\n",
+    "    # \n",
+    "    print(\"BANGLA\")\n",
+    "    start_logits = [float(x) for x in result[\"output__0\"][0].flat]\n",
+    "    end_logits = [float(x) for x in result[\"output__1\"][0].flat]\n",
+    "    return start_logits, end_logits\n",
+    "\n",
+    "\n",
+    "def question_text_change(change):\n",
+    "    text = change['new']\n",
+    "    text = text.strip(' ')\n",
+    "    length = len(text)\n",
+    "    if length < args.character_limit_min: # too short text\n",
+    "        return\n",
+    "    if text[-1] != '?':\n",
+    "        return\n",
+    "    # preprocess bert\n",
+    "    example, feature = question_and_context_to_feature(text, context.value)\n",
+    "    input_ids_data = np.array(feature.input_ids, dtype=np.int64)\n",
+    "    input_mask_data = np.array(feature.input_mask, dtype=np.int64)\n",
+    "    segment_ids_data = np.array(feature.segment_ids, dtype=np.int64)\n",
+    "    L = segment_ids_data.shape[0] - 1\n",
+    "    while L > 20 and segment_ids_data[L-20] == 0:\n",
+    "        L -= 20\n",
+    "    with plot_tensor:\n",
+    "        clear_output(wait=True)\n",
+    "        C = input_ids_data.max()\n",
+    "        sequences = (input_ids_data[:L],C//2*input_mask_data[:L],C*segment_ids_data[:L])\n",
+    "        display_sequences(sequences, ('input','mask','segment'), ('r.','b.','g.'))\n",
+    "        \n",
+    "    # call bert\n",
+    "    start_logits, end_logits = text_to_logits(input_ids_data, segment_ids_data, input_mask_data)\n",
+    "    with plot_logits:\n",
+    "        clear_output(wait=True)\n",
+    "        start = np.array(start_logits, dtype=np.float32)\n",
+    "        end = np.array(end_logits, dtype=np.float32)\n",
+    "        sequences = (start[:L], end[:L])\n",
+    "        display_sequences(sequences, ('start_logits', 'end_logits'), ('black', 'violet'))\n",
+    "    # postprocess bert\n",
+    "    prediction = get_predictions(example, feature, start_logits, end_logits, \n",
+    "                                 args.n_best_size, args.max_answer_length, args.do_lower_case, \n",
+    "                                 args.version_2_with_negative, args.null_score_diff_threshold)\n",
+    "    response_text.value = prediction[0][\"text\"] + '. \\n'\n",
+    "\n",
+    "\n",
+    "def context_change(change):\n",
+    "    text = change['new']\n",
+    "    length = len(text)\n",
+    "    if length < args.character_limit_min: # too short text\n",
+    "        return\n",
+    "    # inference\n",
+    "    question_text.value += ' '\n",
+    "\n",
+    "def response_text_change(change):\n",
+    "    ''' this gets called each time text_area.value changes '''\n",
+    "    text = change['new']\n",
+    "    text = text.strip(' ')\n",
+    "    length = len(text)\n",
+    "    if length < args.character_limit_min: # too short text\n",
+    "        return\n",
+    "    if length > args.character_limit_max: # too long text\n",
+    "        text_area.value = text[:args.character_limit_max]\n",
+    "        return\n",
+    "    # preprocess tacotron2\n",
+    "    sequence = text_to_sequence(text)\n",
+    "    with plot_response_text_preprocessed:\n",
+    "        display_heatmap(sequence)\n",
+    "    # run tacotron2\n",
+    "    mel, mel_lengths = sequence_to_mel(sequence)\n",
+    "    with plot_spectrogram:\n",
+    "        display_spectrogram(mel, change['new'])\n",
+    "    # run waveglow\n",
+    "    signal = mel_to_signal(mel, mel_lengths)\n",
+    "    with plot_signal:\n",
+    "        display_sound(signal, change['new'], 'green')\n",
+    "    with plot_play:\n",
+    "        clear_output(wait=True)\n",
+    "        display(Audio(signal, rate=args.sampling_rate, autoplay=args.autoplay))\n",
+    "\n",
+    "def get_output_widget(width, height, object_fit='fill'):\n",
+    "    ''' creates an output widget with default values and returns it '''\n",
+    "    layout = widgets.Layout(width=width,\n",
+    "                            height=height,\n",
+    "                            object_fit=object_fit,\n",
+    "                            object_position = '{center} {center}')\n",
+    "    ret = widgets.Output(layout=layout)\n",
+    "    return ret\n",
+    "\n",
+    "\n",
+    "plot_tensor = get_output_widget(width='5in',height='1.75in')\n",
+    "plot_logits = get_output_widget(width='5in',height='1.75in')\n",
+    "plot_response_text_preprocessed = get_output_widget(width='10in',height='1in')\n",
+    "plot_spectrogram = get_output_widget(width='10in',height='2.0in', object_fit='scale-down')\n",
+    "plot_jasper_audio = get_output_widget(width='10in',height='2.0in')\n",
+    "plot_signal = get_output_widget(width='10in',height='2.0in')\n",
+    "plot_play = get_output_widget(width='4in',height='1in')\n",
+    "\n",
+    "empty = widgets.VBox([], layout=widgets.Layout(height='1in'))\n",
+    "markdown_z0 = Markdown('**Jasper input**')\n",
+    "markdown_m0 = Markdown('**Jasper output / BERT input**')\n",
+    "markdown_bert = Markdown('**BERT**')\n",
+    "markdown_tacotron2 = Markdown('**Tacotron 2**')\n",
+    "markdown_3 = Markdown('**WaveGlow**')\n",
+    "\n",
+    "bert_widgets = widgets.HBox([plot_tensor, plot_logits])\n",
+    "tacotron2_widgets = widgets.HBox([response_text, plot_spectrogram])\n",
+    "\n",
+    "display(\n",
+    "    empty, \n",
+    "    markdown_z0, \n",
+    "    buttons, \n",
+    "    markdown_m0, question_context,\n",
+    "    markdown_bert,\n",
+    "    bert_widgets,\n",
+    "    markdown_tacotron2,\n",
+    "    tacotron2_widgets,\n",
+    "    markdown_3, \n",
+    "    plot_play, \n",
+    "    empty\n",
+    ")\n",
+    "\n",
+    "\n",
+    "def fill_initial_values():\n",
+    "    with plot_jasper_audio:\n",
+    "        display_sound(np.zeros(100),\"input audio\",'orange')\n",
+    "    # \n",
+    "    context.value = \"The man holding the telescope went into a shop to purchase some flowers on the occasion of all saints day. \"\n",
+    "    # context.value = \"William Shakespeare was an English poet, playwright and actor, widely regarded as the greatest writer in the English language and the world's greatest dramatist. He is often called England's national poet and the \\\"Bard of Avon\\\".\"\n",
+    "    question_text.value = \"\"\n",
+    "    \n",
+    "fill_initial_values()\n",
+    "\n",
+    "response_text.observe(response_text_change, names='value')\n",
+    "question_text.observe(question_text_change, names='value')\n",
+    "context.observe(context_change, names='value')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/LICENCE
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/LICENCE
@ -0,0 +1,19 @@
+Copyright (c) 2017 Keith Ito
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/preprocessing.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/preprocessing.py
@ -0,0 +1,554 @@
+# coding=utf-8
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import json
+import numpy as np
+import collections
+from utils.bert.tokenization import (BasicTokenizer, BertTokenizer, whitespace_tokenize)
+
+
+class SquadExample(object):
+    """
+    A single training/test example for the Squad dataset.
+    For examples without an answer, the start and end position are -1.
+    """
+
+    def __init__(self,
+                 qas_id,
+                 question_text,
+                 doc_tokens,
+                 orig_answer_text=None,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=None):
+        self.qas_id = qas_id
+        self.question_text = question_text
+        self.doc_tokens = doc_tokens
+        self.orig_answer_text = orig_answer_text
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __repr__(self):
+        s = ""
+        s += "qas_id: %s" % (self.qas_id)
+        s += ", question_text: %s" % (
+            self.question_text)
+        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
+        if self.start_position:
+            s += ", start_position: %d" % (self.start_position)
+        if self.end_position:
+            s += ", end_position: %d" % (self.end_position)
+        if self.is_impossible:
+            s += ", is_impossible: %r" % (self.is_impossible)
+        return s
+
+
+class InputFeatures(object):
+    """A single set of features of data."""
+
+    def __init__(self,
+                 unique_id,
+                 example_index,
+                 doc_span_index,
+                 tokens,
+                 token_to_orig_map,
+                 token_is_max_context,
+                 input_ids,
+                 input_mask,
+                 segment_ids,
+                 start_position=None,
+                 end_position=None,
+                 is_impossible=None):
+        self.unique_id = unique_id
+        self.example_index = example_index
+        self.doc_span_index = doc_span_index
+        self.tokens = tokens
+        self.token_to_orig_map = token_to_orig_map
+        self.token_is_max_context = token_is_max_context
+        self.input_ids = input_ids
+        self.input_mask = input_mask
+        self.segment_ids = segment_ids
+        self.start_position = start_position
+        self.end_position = end_position
+        self.is_impossible = is_impossible
+
+
+def read_squad_example(question_text, context, version_2_with_negative):
+    """ reads a question and a context, and turns it into a SquadExample """
+    # 
+    def is_whitespace(c):
+        if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
+            return True
+        return False
+    # 
+    doc_tokens = []
+    prev_is_whitespace = True
+    for c in context:
+        if is_whitespace(c):
+            prev_is_whitespace = True
+        else:
+            if prev_is_whitespace:
+                doc_tokens.append(c)
+            else:
+                doc_tokens[-1] += c
+            prev_is_whitespace = False
+    # 
+    example = SquadExample( 
+                            qas_id=0, 
+                            question_text=question_text, 
+                            doc_tokens=doc_tokens, 
+                            orig_answer_text=None, 
+                            start_position=None, 
+                            end_position=None, 
+                            is_impossible=False 
+                          )
+    return example
+
+
+def convert_example_to_feature(example, tokenizer, max_seq_length,
+                               doc_stride, max_query_length):
+    """ converts an example into a feature """
+
+    unique_id = 1000000000
+    examples = [example]
+    features = []
+    for (example_index, example) in enumerate(examples):
+        query_tokens = tokenizer.tokenize(example.question_text)
+
+        if len(query_tokens) > max_query_length:
+            query_tokens = query_tokens[0:max_query_length]
+
+        tok_to_orig_index = []
+        orig_to_tok_index = []
+        all_doc_tokens = []
+        for (i, token) in enumerate(example.doc_tokens):
+            orig_to_tok_index.append(len(all_doc_tokens))
+            sub_tokens = tokenizer.tokenize(token)
+            for sub_token in sub_tokens:
+                tok_to_orig_index.append(i)
+                all_doc_tokens.append(sub_token)
+
+        tok_start_position = None
+        tok_end_position = None
+
+        # The -3 accounts for [CLS], [SEP] and [SEP]
+        max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
+
+        # We can have documents that are longer than the maximum sequence length.
+        # To deal with this we do a sliding window approach, where we take chunks
+        # of the up to our max length with a stride of `doc_stride`.
+        _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
+            "DocSpan", ["start", "length"])
+        doc_spans = []
+        start_offset = 0
+        while start_offset < len(all_doc_tokens):
+            length = len(all_doc_tokens) - start_offset
+            if length > max_tokens_for_doc:
+                length = max_tokens_for_doc
+            doc_spans.append(_DocSpan(start=start_offset, length=length))
+            if start_offset + length == len(all_doc_tokens):
+                break
+            start_offset += min(length, doc_stride)
+
+        for (doc_span_index, doc_span) in enumerate(doc_spans):
+            tokens = []
+            token_to_orig_map = {}
+            token_is_max_context = {}
+            segment_ids = []
+            tokens.append("[CLS]")
+            segment_ids.append(0)
+            for token in query_tokens:
+                tokens.append(token)
+                segment_ids.append(0)
+            tokens.append("[SEP]")
+            segment_ids.append(0)
+
+            for i in range(doc_span.length):
+                split_token_index = doc_span.start + i
+                token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
+
+                is_max_context = _check_is_max_context(doc_spans, doc_span_index,
+                                                       split_token_index)
+                token_is_max_context[len(tokens)] = is_max_context
+                tokens.append(all_doc_tokens[split_token_index])
+                segment_ids.append(1)
+            tokens.append("[SEP]")
+            segment_ids.append(1)
+
+            input_ids = tokenizer.convert_tokens_to_ids(tokens)
+
+            # The mask has 1 for real tokens and 0 for padding tokens. Only real
+            # tokens are attended to.
+            input_mask = [1] * len(input_ids)
+
+            # Zero-pad up to the sequence length.
+            while len(input_ids) < max_seq_length:
+                input_ids.append(0)
+                input_mask.append(0)
+                segment_ids.append(0)
+
+            assert len(input_ids) == max_seq_length
+            assert len(input_mask) == max_seq_length
+            assert len(segment_ids) == max_seq_length
+
+            start_position = None
+            end_position = None
+
+            features.append(
+                InputFeatures(
+                    unique_id=unique_id,
+                    example_index=example_index,
+                    doc_span_index=doc_span_index,
+                    tokens=tokens,
+                    token_to_orig_map=token_to_orig_map,
+                    token_is_max_context=token_is_max_context,
+                    input_ids=input_ids,
+                    input_mask=input_mask,
+                    segment_ids=segment_ids,
+                    start_position=start_position,
+                    end_position=end_position,
+                    is_impossible=example.is_impossible))
+            unique_id += 1
+    assert len(features) == 1, "too large input"
+    return features[0]
+
+
+def _check_is_max_context(doc_spans, cur_span_index, position):
+    """Check if this is the 'max context' doc span for the token."""
+    best_score = None
+    best_span_index = None
+    for (span_index, doc_span) in enumerate(doc_spans):
+        end = doc_span.start + doc_span.length - 1
+        if position < doc_span.start:
+            continue
+        if position > end:
+            continue
+        num_left_context = position - doc_span.start
+        num_right_context = end - position
+        score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
+        if best_score is None or score > best_score:
+            best_score = score
+            best_span_index = span_index
+    return cur_span_index == best_span_index
+
+
+RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
+
+def get_predictions(example, feature, start_logits, end_logits, n_best_size,
+                      max_answer_length, do_lower_case, 
+                      version_2_with_negative, null_score_diff_threshold):
+    """Write final predictions to the json file and log-odds of null if needed."""
+    all_examples = [example]
+    all_features = [feature]
+    all_results = [RawResult(unique_id=1000000000,start_logits=start_logits,end_logits=end_logits)]
+    
+    example_index_to_features = collections.defaultdict(list)
+    for feature in all_features:
+        example_index_to_features[feature.example_index].append(feature)
+
+    unique_id_to_result = {}
+    for result in all_results:
+        unique_id_to_result[result.unique_id] = result
+
+    _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+        "PrelimPrediction",
+        ["feature_index", "start_index", "end_index", "start_logit", "end_logit"])
+
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    scores_diff_json = collections.OrderedDict()
+
+    for (example_index, example) in enumerate(all_examples):
+        features = example_index_to_features[example_index]
+
+        prelim_predictions = []
+        # keep track of the minimum score of null start+end of position 0
+        score_null = 1000000  # large and positive
+        min_null_feature_index = 0  # the paragraph slice with min mull score
+        null_start_logit = 0  # the start logit at the slice with min null score
+        null_end_logit = 0  # the end logit at the slice with min null score
+        for (feature_index, feature) in enumerate(features):
+            result = unique_id_to_result[feature.unique_id]
+            start_indexes = _get_indices_of_largest_logits(result.start_logits)
+            end_indexes = _get_indices_of_largest_logits(result.end_logits)
+            # if we could have irrelevant answers, get the min score of irrelevant
+            if version_2_with_negative:
+                feature_null_score = result.start_logits[0] + result.end_logits[0]
+                if feature_null_score < score_null:
+                    score_null = feature_null_score
+                    min_null_feature_index = feature_index
+                    null_start_logit = result.start_logits[0]
+                    null_end_logit = result.end_logits[0]
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # We could hypothetically create invalid predictions, e.g., predict
+                    # that the start of the span is in the question. We throw out all
+                    # invalid predictions.
+                    if start_index >= len(feature.tokens):
+                        continue
+                    if end_index >= len(feature.tokens):
+                        continue
+                    if start_index not in feature.token_to_orig_map:
+                        continue
+                    if end_index not in feature.token_to_orig_map:
+                        continue
+                    if not feature.token_is_max_context.get(start_index, False):
+                        continue
+                    if end_index < start_index:
+                        continue
+                    length = end_index - start_index + 1
+                    if length > max_answer_length:
+                        continue
+                    prelim_predictions.append(
+                        _PrelimPrediction(
+                            feature_index=feature_index,
+                            start_index=start_index,
+                            end_index=end_index,
+                            start_logit=result.start_logits[start_index],
+                            end_logit=result.end_logits[end_index]))
+        if version_2_with_negative:
+            prelim_predictions.append(
+                _PrelimPrediction(
+                    feature_index=min_null_feature_index,
+                    start_index=0,
+                    end_index=0,
+                    start_logit=null_start_logit,
+                    end_logit=null_end_logit))
+        prelim_predictions = sorted(
+            prelim_predictions,
+            key=lambda x: (x.start_logit + x.end_logit),
+            reverse=True)
+
+        _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
+            "NbestPrediction", ["text", "start_logit", "end_logit"])
+
+        seen_predictions = {}
+        nbest = []
+        for pred in prelim_predictions:
+            if len(nbest) >= n_best_size:
+                break
+            feature = features[pred.feature_index]
+            if pred.start_index > 0:  # this is a non-null prediction
+                tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
+                orig_doc_start = feature.token_to_orig_map[pred.start_index]
+                orig_doc_end = feature.token_to_orig_map[pred.end_index]
+                orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
+                tok_text = " ".join(tok_tokens)
+
+                # De-tokenize WordPieces that have been split off.
+                tok_text = tok_text.replace(" ##", "")
+                tok_text = tok_text.replace("##", "")
+
+                # Clean whitespace
+                tok_text = tok_text.strip()
+                tok_text = " ".join(tok_text.split())
+                orig_text = " ".join(orig_tokens)
+
+                final_text = get_final_text(tok_text, orig_text, do_lower_case)
+                if final_text in seen_predictions:
+                    continue
+
+                seen_predictions[final_text] = True
+            else:
+                final_text = ""
+                seen_predictions[final_text] = True
+
+            nbest.append(
+                _NbestPrediction(
+                    text=final_text,
+                    start_logit=pred.start_logit,
+                    end_logit=pred.end_logit))
+        # if we didn't include the empty option in the n-best, include it
+        if version_2_with_negative:
+            if "" not in seen_predictions:
+                nbest.append(
+                    _NbestPrediction(
+                        text="",
+                        start_logit=null_start_logit,
+                        end_logit=null_end_logit))
+
+            # In very rare edge cases we could only have single null prediction.
+            # So we just create a nonce prediction in this case to avoid failure.
+            if len(nbest) == 1:
+                nbest.insert(0,
+                             _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+
+        # In very rare edge cases we could have no valid predictions. So we
+        # just create a nonce prediction in this case to avoid failure.
+        if not nbest:
+            nbest.append(
+                _NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
+
+        assert len(nbest) >= 1
+
+        total_scores = []
+        best_non_null_entry = None
+        for entry in nbest:
+            total_scores.append(entry.start_logit + entry.end_logit)
+            if not best_non_null_entry:
+                if entry.text:
+                    best_non_null_entry = entry
+
+        probs = _compute_softmax(total_scores)
+
+        nbest_json = []
+        for (i, entry) in enumerate(nbest):
+            output = collections.OrderedDict()
+            output["text"] = entry.text
+            output["probability"] = probs[i]
+            output["start_logit"] = entry.start_logit
+            output["end_logit"] = entry.end_logit
+            nbest_json.append(output)
+
+        assert len(nbest_json) >= 1
+
+        if not version_2_with_negative:
+            all_predictions[example.qas_id] = nbest_json[0]["text"]
+        else:
+            # predict "" iff the null score - the score of best non-null > threshold
+            score_diff = score_null - best_non_null_entry.start_logit - (
+                best_non_null_entry.end_logit)
+            scores_diff_json[example.qas_id] = score_diff
+            if score_diff > null_score_diff_threshold:
+                all_predictions[example.qas_id] = ""
+            else:
+                all_predictions[example.qas_id] = best_non_null_entry.text
+            all_nbest_json[example.qas_id] = nbest_json
+    
+    return nbest_json
+
+
+def get_final_text(pred_text, orig_text, do_lower_case):
+    """Project the tokenized prediction back to the original text."""
+
+    # When we created the data, we kept track of the alignment between original
+    # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
+    # now `orig_text` contains the span of our original text corresponding to the
+    # span that we predicted.
+    #
+    # However, `orig_text` may contain extra characters that we don't want in
+    # our prediction.
+    #
+    # For example, let's say:
+    #   pred_text = steve smith
+    #   orig_text = Steve Smith's
+    #
+    # We don't want to return `orig_text` because it contains the extra "'s".
+    #
+    # We don't want to return `pred_text` because it's already been normalized
+    # (the SQuAD eval script also does punctuation stripping/lower casing but
+    # our tokenizer does additional normalization like stripping accent
+    # characters).
+    #
+    # What we really want to return is "Steve Smith".
+    #
+    # Therefore, we have to apply a semi-complicated alignment heruistic between
+    # `pred_text` and `orig_text` to get a character-to-charcter alignment. This
+    # can fail in certain cases in which case we just return `orig_text`.
+
+    def _strip_spaces(text):
+        ns_chars = []
+        ns_to_s_map = collections.OrderedDict()
+        for (i, c) in enumerate(text):
+            if c == " ":
+                continue
+            ns_to_s_map[len(ns_chars)] = i
+            ns_chars.append(c)
+        ns_text = "".join(ns_chars)
+        return (ns_text, ns_to_s_map)
+
+    # We first tokenize `orig_text`, strip whitespace from the result
+    # and `pred_text`, and check if they are the same length. If they are
+    # NOT the same length, the heuristic has failed. If they are the same
+    # length, we assume the characters are one-to-one aligned.
+
+    tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+
+    tok_text = " ".join(tokenizer.tokenize(orig_text))
+
+    start_position = tok_text.find(pred_text)
+    if start_position == -1:
+        return orig_text
+    end_position = start_position + len(pred_text) - 1
+
+    (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
+    (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
+
+    if len(orig_ns_text) != len(tok_ns_text):
+        return orig_text
+
+    # We then project the characters in `pred_text` back to `orig_text` using
+    # the character-to-character alignment.
+    tok_s_to_ns_map = {}
+    for (i, tok_index) in tok_ns_to_s_map.items():
+        tok_s_to_ns_map[tok_index] = i
+
+    orig_start_position = None
+    if start_position in tok_s_to_ns_map:
+        ns_start_position = tok_s_to_ns_map[start_position]
+        if ns_start_position in orig_ns_to_s_map:
+            orig_start_position = orig_ns_to_s_map[ns_start_position]
+
+    if orig_start_position is None:
+        return orig_text
+
+    orig_end_position = None
+    if end_position in tok_s_to_ns_map:
+        ns_end_position = tok_s_to_ns_map[end_position]
+        if ns_end_position in orig_ns_to_s_map:
+            orig_end_position = orig_ns_to_s_map[ns_end_position]
+
+    if orig_end_position is None:
+        return orig_text
+
+    output_text = orig_text[orig_start_position:(orig_end_position + 1)]
+    return output_text
+
+
+def _compute_softmax(scores):
+    """Compute softmax probability over raw logits."""
+    if not scores:
+        return []
+
+    max_score = None
+    for score in scores:
+        if max_score is None or score > max_score:
+            max_score = score
+
+    exp_scores = []
+    total_sum = 0.0
+    for score in scores:
+        x = math.exp(score - max_score)
+        exp_scores.append(x)
+        total_sum += x
+
+    probs = []
+    for score in exp_scores:
+        probs.append(score / total_sum)
+    return probs
+
+
+def _get_indices_of_largest_logits(logits):
+    """ sort logits and return the indices of the sorted array """
+    indices_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True)
+    indices = map(lambda x: x[0], indices_and_score)
+    indices = list(indices)
+    return indices
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/tokenization.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/tokenization.py
@ -0,0 +1,391 @@
+# coding=utf-8
+# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
+# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tokenization classes."""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import collections
+import logging
+import os
+import unicodedata
+import six
+from io import open
+
+logger = logging.getLogger(__name__)
+
+PRETRAINED_VOCAB_ARCHIVE_MAP = {
+    'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
+    'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
+    'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
+    'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
+    'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
+    'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
+    'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
+}
+PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = {
+    'bert-base-uncased': 512,
+    'bert-large-uncased': 512,
+    'bert-base-cased': 512,
+    'bert-large-cased': 512,
+    'bert-base-multilingual-uncased': 512,
+    'bert-base-multilingual-cased': 512,
+    'bert-base-chinese': 512,
+}
+VOCAB_NAME = 'vocab.txt'
+
+def convert_to_unicode(text):
+  """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+  if six.PY3:
+    if isinstance(text, str):
+      return text
+    elif isinstance(text, bytes):
+      return text.decode("utf-8", "ignore")
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  elif six.PY2:
+    if isinstance(text, str):
+      return text.decode("utf-8", "ignore")
+    elif isinstance(text, unicode):
+      return text
+    else:
+      raise ValueError("Unsupported string type: %s" % (type(text)))
+  else:
+    raise ValueError("Not running on Python2 or Python 3?")
+
+
+def load_vocab(vocab_file):
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    index = 0
+    with open(vocab_file, "r", encoding="utf-8") as reader:
+        while True:
+            token = reader.readline()
+            if not token:
+                break
+            token = token.strip()
+            vocab[token] = index
+            index += 1
+    return vocab
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a piece of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class BertTokenizer(object):
+    """Runs end-to-end tokenization: punctuation splitting + wordpiece"""
+
+    def __init__(self, vocab_file, do_lower_case=True, max_len=None,
+                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                "Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
+                "model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
+        self.vocab = load_vocab(vocab_file)
+        self.ids_to_tokens = collections.OrderedDict(
+            [(ids, tok) for tok, ids in self.vocab.items()])
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case,
+                                              never_split=never_split)
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+        self.max_len = max_len if max_len is not None else int(1e12)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        """Converts a sequence of tokens into ids using the vocab."""
+        ids = []
+        for token in tokens:
+            ids.append(self.vocab[token])
+        if len(ids) > self.max_len:
+            raise ValueError(
+                "Token indices sequence length is longer than the specified maximum "
+                " sequence length for this BERT model ({} > {}). Running this"
+                " sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
+            )
+        return ids
+
+    def convert_ids_to_tokens(self, ids):
+        """Converts a sequence of ids in wordpiece tokens using the vocab."""
+        tokens = []
+        for i in ids:
+            tokens.append(self.ids_to_tokens[i])
+        return tokens
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None, *inputs, **kwargs):
+        """
+        Instantiate a PreTrainedBertModel from a pre-trained model file.
+        Download and cache the pre-trained model file if needed.
+        """
+        if pretrained_model_name_or_path in PRETRAINED_VOCAB_ARCHIVE_MAP:
+            vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name_or_path]
+        else:
+            vocab_file = pretrained_model_name_or_path
+        if os.path.isdir(vocab_file):
+            vocab_file = os.path.join(vocab_file, VOCAB_NAME)
+        # redirect to the cache, if necessary
+        try:
+            resolved_vocab_file = vocab_file
+        except EnvironmentError:
+            logger.error(
+                "Model name '{}' was not found in model name list ({}). "
+                "We assumed '{}' was a path or url but couldn't find any file "
+                "associated to this path or url.".format(
+                    pretrained_model_name_or_path,
+                    ', '.join(PRETRAINED_VOCAB_ARCHIVE_MAP.keys()),
+                    vocab_file))
+            return None
+        if resolved_vocab_file == vocab_file:
+            logger.info("loading vocabulary file {}".format(vocab_file))
+        else:
+            logger.info("loading vocabulary file {} from cache at {}".format(
+                vocab_file, resolved_vocab_file))
+        if pretrained_model_name_or_path in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP:
+            # if we're using a pretrained model, ensure the tokenizer wont index sequences longer
+            # than the number of positional embeddings
+            max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name_or_path]
+            kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
+        # Instantiate tokenizer.
+        tokenizer = cls(resolved_vocab_file, *inputs, **kwargs)
+        return tokenizer
+
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self,
+                 do_lower_case=True,
+                 never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
+        """Constructs a BasicTokenizer.
+
+        Args:
+          do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+        self.never_split = never_split
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text."""
+        text = self._clean_text(text)
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case and token not in self.never_split:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        if text in self.never_split:
+            return [text]
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #   https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+                (cp >= 0x3400 and cp <= 0x4DBF) or  #
+                (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+                (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+                (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+                (cp >= 0x2B820 and cp <= 0x2CEAF) or
+                (cp >= 0xF900 and cp <= 0xFAFF) or  #
+                (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenization."""
+
+    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+
+    def tokenize(self, text):
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+          input = "unaffable"
+          output = ["un", "##aff", "##able"]
+
+        Args:
+          text: A single token or whitespace separated tokens. This should have
+            already been passed through `BasicTokenizer`.
+
+        Returns:
+          A list of wordpiece tokens.
+        """
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start > 0:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+            (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/vocab.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/bert/vocab.txt
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/jasper/LICENCE
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/jasper/LICENCE
@ -0,0 +1,19 @@
+Copyright (c) 2017 Keith Ito
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/jasper/speech_utils.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/jasper/speech_utils.py
@ -0,0 +1,446 @@
+#!/usr/bin/python
+
+# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import librosa
+import soundfile as sf
+import math
+from os import system
+import numpy as np
+from tensorrtserver.api import *
+import tensorrtserver.api.model_config_pb2 as model_config
+import grpc
+from tensorrtserver.api import api_pb2
+from tensorrtserver.api import grpc_service_pb2
+from tensorrtserver.api import grpc_service_pb2_grpc
+
+WINDOWS_FNS = {"hanning": np.hanning, "hamming": np.hamming, "none": None}
+
+
+def model_dtype_to_np(model_dtype):
+    if model_dtype == model_config.TYPE_BOOL:
+        return np.bool
+    elif model_dtype == model_config.TYPE_INT8:
+        return np.int8
+    elif model_dtype == model_config.TYPE_INT16:
+        return np.int16
+    elif model_dtype == model_config.TYPE_INT32:
+        return np.int32
+    elif model_dtype == model_config.TYPE_INT64:
+        return np.int64
+    elif model_dtype == model_config.TYPE_UINT8:
+        return np.uint8
+    elif model_dtype == model_config.TYPE_UINT16:
+        return np.uint16
+    elif model_dtype == model_config.TYPE_UINT32:
+        return np.uint32
+    elif model_dtype == model_config.TYPE_FP16:
+        return np.float16
+    elif model_dtype == model_config.TYPE_FP32:
+        return np.float32
+    elif model_dtype == model_config.TYPE_FP64:
+        return np.float64
+    elif model_dtype == model_config.TYPE_STRING:
+        return np.dtype(object)
+    return None
+
+
+def ctc_decoder_predictions_tensor(prediction_cpu_tensor, batch_size, labels):
+    """
+    Takes output of greedy ctc decoder and performs ctc decoding algorithm to
+    remove duplicates and special symbol. Returns prediction
+    Args:
+        tensor: model output tensor
+        label: A list of labels
+    Returns:
+        prediction
+    """
+    blank_id = len(labels) - 1
+    hypotheses = []
+    labels_map = dict([(i, labels[i]) for i in range(len(labels))])
+    # iterate over batch
+    prediction_cpu_tensor = prediction_cpu_tensor.reshape((batch_size, int(prediction_cpu_tensor.size/batch_size)))
+    for ind in range(batch_size):
+        prediction = prediction_cpu_tensor[ind].tolist()
+        # CTC decoding procedure
+        decoded_prediction = []
+        previous = len(labels) - 1 # id of a blank symbol
+        for p in prediction:
+            if (p != previous or previous == blank_id) and p != blank_id:
+                decoded_prediction.append(p)
+            previous = p
+        hypothesis = ''.join([labels_map[c] for c in decoded_prediction])
+        hypotheses.append(hypothesis)
+    return hypotheses
+
+class SpeechClient(object):
+
+    def __init__(self, url, protocol, model_name, model_version, batch_size,
+                 model_platform=None, verbose=False,
+                 mode="batch",
+                 from_features=True):
+
+        self.model_name = model_name
+        self.model_version = model_version
+        self.verbose = verbose
+        self.batch_size = batch_size
+        self.transpose_audio_features = False
+        self.grpc_stub = None
+        self.ctx = None
+        self.correlation_id = 0
+        self.first_run = True
+        if mode == "streaming" or mode == "asynchronous":
+            self.correlation_id = 1
+
+        self.buffer = []
+
+        self.ctx = InferContext(url, protocol, model_name, model_version,
+                                verbose, self.correlation_id, False)
+        server_ctx = ServerStatusContext(url, protocol, model_name,
+                                         verbose)
+        server_status = server_ctx.get_server_status()
+
+        self.audio_signals_name, self.num_samples_name, self.transcripts_name, \
+        self.audio_signals_type, self.num_samples_type, self.transcripts_type =  self.parse_model(server_status, model_name,
+                                                                                                  batch_size, model_platform, verbose)
+        self.labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'", "<BLANK>"]
+
+    def postprocess(self, results, labels):
+
+        if len(results) != 1:
+            raise Exception("expected 1 result, got {}".format(len(results)))
+
+        transcript_values = results['TRANSCRIPT']
+
+        for transcript, filename in zip(transcript_values,
+                                        labels):
+            hypotheses = ctc_decoder_predictions_tensor(transcript, self.batch_size, self.labels)
+            print('---')
+            print('File: ', filename)
+            print("Final transcript: ", hypotheses)
+            print('---')
+            
+        return hypotheses
+
+    def check_num_samples(self, num_samples):
+        if num_samples.data_type != model_config.TYPE_UINT32 and num_samples.data_type != model_config.TYPE_INT32:
+             raise Exception(
+                    "expecting num_samples datatype to be TYPE_UINT32/TYPE_INT32, "
+                    "model '" + model_name + "' output type is " +
+                    model_config.DataType.Name(num_samples.data_type))
+        if len(num_samples.dims) != 1:
+            raise Exception("Expecting num_samples to have 1 dimension, "
+                            "model '{}' num_samples has {}".format(
+                                model_name,len(num_samples.dims)))
+
+    def parse_model(self, server_status,
+                    model_name, batch_size,
+                    model_platform=None, verbose=False):
+        """
+        Check the configuration of the ensemble model
+        """
+
+        if model_name not in server_status.model_status:
+            raise Exception("unable to get status for '" + model_name + "'")
+
+        status = server_status.model_status[model_name]
+        config = status.config
+
+        self.model_platform = model_platform
+
+        # Inputs are:
+        #   1) audio_signal: raw audio samples [num_samples]
+        #   2) sample_rate: sample rate of audio
+        #   3) num_samples: length of audio
+
+        if len(config.input) < 2:
+            raise Exception(
+                "expecting 2-3 inputs, got {}".format(len(config.input)))
+
+        # Outputs are:
+        #   1) transcripts:        candidate transcripts
+
+        if len(config.output) != 1:
+            raise Exception(
+                "expecting 1 output, got {}".format(len(config.output)))
+
+        audio_signal = config.input[0]
+
+        if len(config.input) > 1:
+            num_samples = config.input[1]
+            self.check_num_samples(num_samples);
+            
+        transcripts = config.output[0]
+
+        expected_audio_signal_dim = 1
+        expected_audio_signal_type = model_config.TYPE_FP32
+
+        if audio_signal.data_type != expected_audio_signal_type:
+            raise Exception("expecting audio_signal datatype to be " +
+                            model_config.DataType.Name(
+                                expected_audio_signal_type) +
+                            "model '" + model_name + "' output type is " +
+                            model_config.DataType.Name(audio_signal.data_type))
+
+
+        # Model specifying maximum batch size of 0 indicates that batching
+        # is not supported and so the input tensors do not expect an "N"
+        # dimension (and 'batch_size' should be 1 so that only a single
+        # image instance is inferred at a time).
+        max_batch_size = config.max_batch_size
+        if max_batch_size == 0:
+            if batch_size != 1:
+                raise Exception(
+                    "batching not supported for model '" + model_name + "'")
+        else:  # max_batch_size > 0
+            if batch_size > max_batch_size:
+                raise Exception(
+                    "expecting batch size <= {} for model {}".format(
+                        max_batch_size, model_name))
+
+        if len(audio_signal.dims) != expected_audio_signal_dim:
+            raise Exception("Expecting audio signal to have {} dimensions, "
+                            "model '{}' audio_signal has {}".format(
+                expected_audio_signal_dim,
+                model_name,
+                len(audio_signal.dims)))
+
+        return (audio_signal.name, num_samples.name, transcripts.name, 
+                model_dtype_to_np(audio_signal.data_type),
+                model_dtype_to_np(num_samples.data_type),
+                model_dtype_to_np(transcripts.data_type),
+                )
+
+
+    def update_audio_request(self, request, audio_generator):
+
+        for audio_signal, sample_rate, start, end in audio_generator:
+            # Delete the current inputs
+
+            input_batch = [audio_signal.astype(self.audio_signals_type)]
+            num_samples_batch = audio_signal.shape[0]
+            num_samples_batch = [np.asarray([num_samples_batch],
+                                            dtype=self.num_samples_type)]
+
+
+            flags = InferRequestHeader.FLAG_NONE
+            input_batch[0] = np.expand_dims(input_batch[0], axis=0)
+
+            audio_bytes = input_batch[0].tobytes()
+            num_samples_bytes = num_samples_batch[0].tobytes()
+
+            request.meta_data.input[0].dims[0] = audio_signal.shape[0]
+            request.meta_data.input[0].batch_byte_size = len(audio_bytes)
+
+            request.meta_data.input[1].dims[0] = 1
+            request.meta_data.input[1].batch_byte_size = len(num_samples_bytes)
+
+            if start:
+                request.meta_data.flags = flags | \
+                                          InferRequestHeader.FLAG_SEQUENCE_START
+            else:
+                request.meta_data.flags = flags;
+
+            # Send request with audio signal
+            del request.raw_input[:]
+            request.raw_input.extend([audio_bytes])
+            request.raw_input.extend([num_samples_bytes])
+            yield request
+
+            # If end, send empty request to flush out remaining audio
+            if end:
+                request.meta_data.flags = flags | \
+                                          InferRequestHeader.FLAG_SEQUENCE_END
+                zero_bytes = np.zeros(shape=input_batch[0].shape,
+                                      dtype=input_batch[0].dtype).tobytes()
+                del request.raw_input[:]
+                request.raw_input.extend([zero_bytes])
+                request.raw_input.extend([num_samples_bytes])
+                yield request
+
+    def recognize(self, audio_signal, filenames):
+        # Send requests of FLAGS.batch_size audio signals. If the number of
+        # audios isn't an exact multiple of FLAGS.batch_size then just
+        # start over with the first audio until the batch is filled.
+
+        flags = InferRequestHeader.FLAG_NONE
+        flags = flags | InferRequestHeader.FLAG_SEQUENCE_START
+
+        input_batch = []
+        input_filenames = []
+        max_num_samples_batch = 0
+
+        for idx in range(self.batch_size):
+            input_batch.append(audio_signal[idx].astype(
+                self.audio_signals_type))
+            input_filenames.append(filenames[idx])
+            num_samples = audio_signal[idx].shape[0]
+
+            if (num_samples > max_num_samples_batch):
+                max_num_samples_batch = num_samples
+
+        for idx in range(self.batch_size):
+            num_samples = input_batch[idx].shape[0]
+            print("num_samples : ", num_samples)
+            # input_batch[idx] = np.pad(input_batch[idx],
+            #                          ((0,
+            #                            max_num_samples_batch -
+            #                            num_samples)),
+            #                          mode='constant')
+
+            mean = np.mean(input_batch[idx])
+            std_var = np.std(input_batch[idx])
+            gauss_noise = np.random.normal(
+                mean,std_var,
+                max_num_samples_batch-num_samples)
+
+            input_batch[idx]= np.concatenate(
+                (input_batch[idx], gauss_noise.astype(
+                    self.audio_signals_type)))
+
+        max_num_samples_batch = np.asarray([max_num_samples_batch],
+                                           dtype=self.num_samples_type)
+
+        num_samples_batch = [max_num_samples_batch] * self.batch_size
+
+        #print(num_samples_batch)
+        #print(input_batch)
+        #print(input_sample_rates)
+
+        # Send request
+        print("Sending request to transcribe file(s):", ",".join(
+            input_filenames))
+
+        if (self.model_platform == "obsolete_pyt"):
+            result = self.ctx.run(
+                {self.audio_signals_name: input_batch,
+                 self.num_samples_name: num_samples_batch},
+                {self.transcripts_name: InferContext.ResultFormat.RAW},
+                self.batch_size, flags)
+        else:
+            result = self.ctx.run(
+                {self.audio_signals_name: input_batch,
+                 self.num_samples_name: num_samples_batch},
+                {self.transcripts_name: InferContext.ResultFormat.RAW},
+                self.batch_size, flags)
+
+        hypotheses = self.postprocess(result, input_filenames)
+
+        return hypotheses
+
+
+def preemphasis(signal, coeff=0.97):
+    return np.append(signal[0], signal[1:] - coeff * signal[:-1])
+
+
+def normalize_signal(signal, gain=None):
+    """
+    Normalize float32 signal to [-1, 1] range
+    """
+    if gain is None:
+        gain = 1.0 / (np.max(np.abs(signal)) + 1e-5)
+    return signal * gain
+
+
+
+class AudioSegment(object):
+    """Monaural audio segment abstraction.
+    :param samples: Audio samples [num_samples x num_channels].
+    :type samples: ndarray.float32
+    :param sample_rate: Audio sample rate.
+    :type sample_rate: int
+    :raises TypeError: If the sample data type is not float or int.
+    """
+
+    def __init__(self, samples, sample_rate, target_sr=16000, trim=False,
+                 trim_db=60):
+        """Create audio segment from samples.
+        Samples are convert float32 internally, with int scaled to [-1, 1].
+        """
+        samples = self._convert_samples_to_float32(samples)
+        if target_sr is not None and target_sr != sample_rate:
+            samples = librosa.core.resample(samples, sample_rate, target_sr)
+            sample_rate = target_sr
+        if trim:
+            samples, _ = librosa.effects.trim(samples, trim_db)
+        self._samples = samples
+        self._sample_rate = sample_rate
+        if self._samples.ndim >= 2:
+            self._samples = np.mean(self._samples, 1)
+
+    @staticmethod
+    def _convert_samples_to_float32(samples):
+        """Convert sample type to float32.
+        Audio sample type is usually integer or float-point.
+        Integers will be scaled to [-1, 1] in float32.
+        """
+        float32_samples = samples.astype('float32')
+        if samples.dtype in np.sctypes['int']:
+            bits = np.iinfo(samples.dtype).bits
+            float32_samples *= (1. / 2 ** (bits - 1))
+        elif samples.dtype in np.sctypes['float']:
+            pass
+        else:
+            raise TypeError("Unsupported sample type: %s." % samples.dtype)
+        return float32_samples
+
+    @classmethod
+    def from_file(cls, filename, target_sr=16000, int_values=False, offset=0,
+                  duration=0, trim=False):
+        """
+        Load a file supported by librosa and return as an AudioSegment.
+        :param filename: path of file to load
+        :param target_sr: the desired sample rate
+        :param int_values: if true, load samples as 32-bit integers
+        :param offset: offset in seconds when loading audio
+        :param duration: duration in seconds when loading audio
+        :return: numpy array of samples
+        """
+        with sf.SoundFile(filename, 'r') as f:
+            dtype = 'int32' if int_values else 'float32'
+            sample_rate = f.samplerate
+            if offset > 0:
+                f.seek(int(offset * sample_rate))
+            if duration > 0:
+                samples = f.read(int(duration * sample_rate), dtype=dtype)
+            else:
+                samples = f.read(dtype=dtype)
+
+        samples = samples.transpose()
+        return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
+
+    @property
+    def samples(self):
+        return self._samples.copy()
+
+    @property
+    def sample_rate(self):
+        return self._sample_rate
+
+# define our clear function
+def clear_screen():
+    _ = system('clear')
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/LICENCE
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/LICENCE
@ -0,0 +1,19 @@
+Copyright (c) 2017 Keith Ito
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/init.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/init.py
@ -0,0 +1,74 @@
+""" from https://github.com/keithito/tacotron """
+import re
+from utils.tacotron2 import cleaners
+from utils.tacotron2.symbols import symbols
+
+
+# Mappings from symbol to numeric ID and vice versa:
+_symbol_to_id = {s: i for i, s in enumerate(symbols)}
+_id_to_symbol = {i: s for i, s in enumerate(symbols)}
+
+# Regular expression matching text enclosed in curly braces:
+_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
+
+
+def text_to_sequence(text, cleaner_names):
+  '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
+
+    The text can optionally have ARPAbet sequences enclosed in curly braces embedded
+    in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street."
+
+    Args:
+      text: string to convert to a sequence
+      cleaner_names: names of the cleaner functions to run the text through
+
+    Returns:
+      List of integers corresponding to the symbols in the text
+  '''
+  sequence = []
+
+  # Check for curly braces and treat their contents as ARPAbet:
+  while len(text):
+    m = _curly_re.match(text)
+    if not m:
+      sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
+      break
+    sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
+    sequence += _arpabet_to_sequence(m.group(2))
+    text = m.group(3)
+
+  return sequence
+
+
+def sequence_to_text(sequence):
+  '''Converts a sequence of IDs back to a string'''
+  result = ''
+  for symbol_id in sequence:
+    if symbol_id in _id_to_symbol:
+      s = _id_to_symbol[symbol_id]
+      # Enclose ARPAbet back in curly braces:
+      if len(s) > 1 and s[0] == '@':
+        s = '{%s}' % s[1:]
+      result += s
+  return result.replace('}{', ' ')
+
+
+def _clean_text(text, cleaner_names):
+  for name in cleaner_names:
+    cleaner = getattr(cleaners, name)
+    if not cleaner:
+      raise Exception('Unknown cleaner: %s' % name)
+    text = cleaner(text)
+  return text
+
+
+def _symbols_to_sequence(symbols):
+  return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
+
+
+def _arpabet_to_sequence(text):
+  return _symbols_to_sequence(['@' + s for s in text.split()])
+
+
+def _should_keep_symbol(s):
+  return s in _symbol_to_id and s is not '_' and s is not '~'
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/cleaners.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/cleaners.py
@ -0,0 +1,90 @@
+""" from https://github.com/keithito/tacotron """
+
+'''
+Cleaners are transformations that run over the input text at both training and eval time.
+
+Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
+hyperparameter. Some cleaners are English-specific. You'll typically want to use:
+  1. "english_cleaners" for English text
+  2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
+     the Unidecode library (https://pypi.python.org/pypi/Unidecode)
+  3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
+     the symbols in symbols.py to match your data).
+'''
+
+import re
+from unidecode import unidecode
+from .numbers import normalize_numbers
+
+
+# Regular expression matching whitespace:
+_whitespace_re = re.compile(r'\s+')
+
+# List of (regular expression, replacement) pairs for abbreviations:
+_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
+  ('mrs', 'misess'),
+  ('mr', 'mister'),
+  ('dr', 'doctor'),
+  ('st', 'saint'),
+  ('co', 'company'),
+  ('jr', 'junior'),
+  ('maj', 'major'),
+  ('gen', 'general'),
+  ('drs', 'doctors'),
+  ('rev', 'reverend'),
+  ('lt', 'lieutenant'),
+  ('hon', 'honorable'),
+  ('sgt', 'sergeant'),
+  ('capt', 'captain'),
+  ('esq', 'esquire'),
+  ('ltd', 'limited'),
+  ('col', 'colonel'),
+  ('ft', 'fort'),
+]]
+
+
+def expand_abbreviations(text):
+  for regex, replacement in _abbreviations:
+    text = re.sub(regex, replacement, text)
+  return text
+
+
+def expand_numbers(text):
+  return normalize_numbers(text)
+
+
+def lowercase(text):
+  return text.lower()
+
+
+def collapse_whitespace(text):
+  return re.sub(_whitespace_re, ' ', text)
+
+
+def convert_to_ascii(text):
+  return unidecode(text)
+
+
+def basic_cleaners(text):
+  '''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
+  text = lowercase(text)
+  text = collapse_whitespace(text)
+  return text
+
+
+def transliteration_cleaners(text):
+  '''Pipeline for non-English text that transliterates to ASCII.'''
+  text = convert_to_ascii(text)
+  text = lowercase(text)
+  text = collapse_whitespace(text)
+  return text
+
+
+def english_cleaners(text):
+  '''Pipeline for English text, including number and abbreviation expansion.'''
+  text = convert_to_ascii(text)
+  text = lowercase(text)
+  text = expand_numbers(text)
+  text = expand_abbreviations(text)
+  text = collapse_whitespace(text)
+  return text
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/cmudict.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/cmudict.py
@ -0,0 +1,65 @@
+""" from https://github.com/keithito/tacotron """
+
+import re
+
+
+valid_symbols = [
+  'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2',
+  'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2',
+  'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY',
+  'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1',
+  'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0',
+  'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW',
+  'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH'
+]
+
+_valid_symbol_set = set(valid_symbols)
+
+
+class CMUDict:
+  '''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict'''
+  def __init__(self, file_or_path, keep_ambiguous=True):
+    if isinstance(file_or_path, str):
+      with open(file_or_path, encoding='latin-1') as f:
+        entries = _parse_cmudict(f)
+    else:
+      entries = _parse_cmudict(file_or_path)
+    if not keep_ambiguous:
+      entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
+    self._entries = entries
+
+
+  def __len__(self):
+    return len(self._entries)
+
+
+  def lookup(self, word):
+    '''Returns list of ARPAbet pronunciations of the given word.'''
+    return self._entries.get(word.upper())
+
+
+
+_alt_re = re.compile(r'\([0-9]+\)')
+
+
+def _parse_cmudict(file):
+  cmudict = {}
+  for line in file:
+    if len(line) and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"):
+      parts = line.split('  ')
+      word = re.sub(_alt_re, '', parts[0])
+      pronunciation = _get_pronunciation(parts[1])
+      if pronunciation:
+        if word in cmudict:
+          cmudict[word].append(pronunciation)
+        else:
+          cmudict[word] = [pronunciation]
+  return cmudict
+
+
+def _get_pronunciation(s):
+  parts = s.strip().split(' ')
+  for part in parts:
+    if part not in _valid_symbol_set:
+      return None
+  return ' '.join(parts)
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/numbers.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/numbers.py
@ -0,0 +1,71 @@
+""" from https://github.com/keithito/tacotron """
+
+import inflect
+import re
+
+
+_inflect = inflect.engine()
+_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
+_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
+_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
+_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
+_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
+_number_re = re.compile(r'[0-9]+')
+
+
+def _remove_commas(m):
+  return m.group(1).replace(',', '')
+
+
+def _expand_decimal_point(m):
+  return m.group(1).replace('.', ' point ')
+
+
+def _expand_dollars(m):
+  match = m.group(1)
+  parts = match.split('.')
+  if len(parts) > 2:
+    return match + ' dollars'  # Unexpected format
+  dollars = int(parts[0]) if parts[0] else 0
+  cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
+  if dollars and cents:
+    dollar_unit = 'dollar' if dollars == 1 else 'dollars'
+    cent_unit = 'cent' if cents == 1 else 'cents'
+    return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
+  elif dollars:
+    dollar_unit = 'dollar' if dollars == 1 else 'dollars'
+    return '%s %s' % (dollars, dollar_unit)
+  elif cents:
+    cent_unit = 'cent' if cents == 1 else 'cents'
+    return '%s %s' % (cents, cent_unit)
+  else:
+    return 'zero dollars'
+
+
+def _expand_ordinal(m):
+  return _inflect.number_to_words(m.group(0))
+
+
+def _expand_number(m):
+  num = int(m.group(0))
+  if num > 1000 and num < 3000:
+    if num == 2000:
+      return 'two thousand'
+    elif num > 2000 and num < 2010:
+      return 'two thousand ' + _inflect.number_to_words(num % 100)
+    elif num % 100 == 0:
+      return _inflect.number_to_words(num // 100) + ' hundred'
+    else:
+      return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
+  else:
+    return _inflect.number_to_words(num, andword='')
+
+
+def normalize_numbers(text):
+  text = re.sub(_comma_number_re, _remove_commas, text)
+  text = re.sub(_pounds_re, r'\1 pounds', text)
+  text = re.sub(_dollars_re, _expand_dollars, text)
+  text = re.sub(_decimal_number_re, _expand_decimal_point, text)
+  text = re.sub(_ordinal_re, _expand_ordinal, text)
+  text = re.sub(_number_re, _expand_number, text)
+  return text
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/symbols.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/speech_ai_demo/utils/tacotron2/symbols.py
@ -0,0 +1,18 @@
+""" from https://github.com/keithito/tacotron """
+
+'''
+Defines the set of symbols used in text input to the model.
+
+The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. '''
+from utils.tacotron2 import cmudict
+
+_pad        = '_'
+_punctuation = '!\'(),.:;? '
+_special = '-'
+_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
+
+# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
+_arpabet = ['@' + s for s in cmudict.valid_symbols]
+
+# Export all symbols:
+symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters) + _arpabet
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/start_jupyter.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client/start_jupyter.sh
@ -0,0 +1 @@
+jupyter lab --allow-root --ip=0.0.0.0 --no-browser speech_ai_demo.ipynb
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_preparation/patch_jasper_trt7
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_preparation/patch_jasper_trt7
@ -0,0 +1,68 @@
+diff --git a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
+index e598a67..562be83 100644
+--- a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
+++ b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
+@@ -1,4 +1,4 @@
+-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3 
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.01-py3
+ FROM ${FROM_IMAGE_NAME}
+ 
+ RUN apt-get update && apt-get install -y python3
+@@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y python3
+ WORKDIR /tmp/onnx-trt
+ COPY trt/onnx-trt.patch .
+ RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git submodule update --init --recursive && \
+-    patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
+    mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
+ 
+ 
+ # Here's a good place to install pip reqs from JoC repo.
+diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perf.py b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
+index 426ee66..5917a1f 100755
+--- a/PyTorch/SpeechRecognition/Jasper/trt/perf.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
+@@ -64,6 +64,9 @@ def main(args):
+             print("TRANSCRIPT: ", hypotheses)
+             return
+ 
+    if pyt_components['data_layer'] is None:
+        return
+    
+     wer, preds, times = perfprocedures.compare_times_trt_pyt_exhaustive(engine,
+                                                                         pyt_components,
+                                                                         args)
+diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
+index 0e44c7f..62e7446 100755
+--- a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
+++ b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
+@@ -1,5 +1,5 @@
+ #!/bin/bash
+ 
+ # Constructs a docker image containing dependencies for execution of JASPER through TRT
+-echo "docker build . -f ./trt/Dockerfile -t jasper:trt6"
+-docker build . -f ./trt/Dockerfile -t jasper:trt6
+echo "docker build . -f ./trt/Dockerfile -t jasper:trt7"
+docker build . -f ./trt/Dockerfile -t jasper:trt7
+diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
+index 9959062..ed5e711 100755
+--- a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
+++ b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
+@@ -40,4 +40,4 @@ nvidia-docker run -it --rm \
+   -v $RESULT_DIR:/results/ \
+   -v ${JASPER_REPO}:/jasper \
+   ${EXTRA_JASPER_ENV} \
+-  jasper:trt6 bash $PROGRAM_PATH
+  jasper:trt7 bash $PROGRAM_PATH
+diff --git a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
+index 92460b2..01c8b6a 100644
+--- a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
+@@ -40,7 +40,7 @@ def build_engine_from_parser(args):
+     '''
+     TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if args.verbose else trt.Logger(trt.Logger.WARNING)
+     builder = trt.Builder(TRT_LOGGER)
+-    builder.max_batch_size = 64
+    builder.max_batch_size = 16
+ 
+     if args.trt_fp16:
+         builder.fp16_mode = True
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/bert-onnx/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/bert-onnx/config.pbtxt
@ -0,0 +1,44 @@
+name: "bert-onnx"
+platform: "onnxruntime_onnx"
+max_batch_size: 8
+input [
+{
+    name: "input__0"
+    data_type: TYPE_INT64
+    dims: [384]
+},
+{
+    name: "input__1"
+    data_type: TYPE_INT64
+    dims: [384]
+},
+{
+    name: "input__2"
+    data_type: TYPE_INT64
+    dims: [384]
+}
+]
+output [
+{
+    name: "output__0"
+    data_type: TYPE_FP16
+    dims: [384]
+}, 
+{
+    name: "output__1"
+    data_type: TYPE_FP16
+    dims: [384]
+}
+]
+optimization {
+  cuda {
+    graphs: 0
+  }
+}
+instance_group [
+    {
+        count: 1
+        kind: KIND_GPU
+        gpus: [ 0 ]
+    }
+]
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-decoder/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-decoder/config.pbtxt
@ -0,0 +1,45 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+default_model_filename: "jasper-decoder.pt"
+name: "jasper-decoder"
+platform: "pytorch_libtorch"
+
+max_batch_size: 16
+input [
+  {
+    name: "CLASS_LOGITS__0"
+    data_type: TYPE_FP32
+    dims: [ -1, 29 ]
+  }
+]
+output [
+  {
+    name: "CANDIDATE_TRANSCRIPT__0"
+    data_type: TYPE_INT32
+    dims: [ -1]
+  }
+]
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-feature-extractor/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-feature-extractor/config.pbtxt
@ -0,0 +1,32 @@
+name: "jasper-feature-extractor"
+platform: "pytorch_libtorch"
+default_model_filename: "jasper-feature-extractor.pt"
+max_batch_size: 16
+
+input [ {
+  name: "AUDIO_SIGNAL__0"
+  data_type: TYPE_FP32
+  dims: [ -1 ]
+},
+{
+  name: "NUM_SAMPLES__1"
+  data_type: TYPE_INT32
+  dims: [ 1 ]
+  reshape { shape: [] }
+}
+]
+
+output [
+{
+  name: "AUDIO_FEATURES__0"
+  data_type: TYPE_FP32
+  dims: [64, -1]
+}
+,
+  {	
+    name: "NUM_TIME_STEPS__1"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+    reshape: { shape: [] }
+  }
+]
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt-ensemble/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt-ensemble/config.pbtxt
@ -0,0 +1,60 @@
+name: "jasper-trt-ensemble"
+platform: "ensemble"
+max_batch_size: 1
+input {
+  name: "AUDIO_SIGNAL"
+  data_type: TYPE_FP32
+  dims: -1
+}
+input {
+    name: "NUM_SAMPLES"
+    data_type: TYPE_INT32
+    dims: [ 1 ]
+}
+output {
+  name: "TRANSCRIPT"
+  data_type: TYPE_INT32
+  dims: [-1]
+}
+ensemble_scheduling {
+ step {
+    model_name: "jasper-feature-extractor"
+    model_version: -1
+    input_map {
+      key: "AUDIO_SIGNAL__0"
+      value: "AUDIO_SIGNAL"
+    }
+    input_map {
+      key: "NUM_SAMPLES__1"
+      value: "NUM_SAMPLES"
+    }
+    output_map {
+      key: "AUDIO_FEATURES__0"
+      value: "AUDIO_FEATURES"
+    }    
+  }
+  step {
+    model_name: "jasper-trt"
+    model_version: -1
+    input_map {
+      key: "FEATURES"
+      value: "AUDIO_FEATURES"
+    }
+    output_map {
+      key: "LOGITS"
+      value: "CHARACTER_PROBABILITIES"
+    }
+  }
+  step {
+    model_name: "jasper-decoder"
+    model_version: -1
+    input_map {
+      key: "CLASS_LOGITS__0"
+      value: "CHARACTER_PROBABILITIES"
+    }
+    output_map {
+      key: "CANDIDATE_TRANSCRIPT__0"
+      value: "TRANSCRIPT"
+    }
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/config.pbtxt
@ -0,0 +1,52 @@
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+name: "jasper-trt"
+platform: "tensorrt_plan"
+default_model_filename: "jasper_fp16.engine"
+
+max_batch_size: 16
+
+input [
+      {
+        name: "FEATURES"
+        data_type: TYPE_FP32
+        dims: [64, -1]
+      }
+]
+
+output [
+      {
+        name: "LOGITS"
+        data_type: TYPE_FP32
+        dims: [-1, 29 ]
+      }
+]
+
+cc_model_filenames: [
+   { key: "7.0"
+     value: "jasper_fp16.engine"}
+]
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/tacotron2/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/tacotron2/config.pbtxt
@ -0,0 +1,31 @@
+name: "tacotron2"
+platform: "pytorch_libtorch"
+default_model_filename: "tacotron2_fp16.pt"
+
+max_batch_size: 8
+input [
+  {
+    name: "sequence__0"
+    data_type: TYPE_INT64
+    dims: [-1]
+  },
+  {
+    name: "input_lengths__1"
+    data_type: TYPE_INT64
+    dims: [1]
+    reshape: { shape: [ ] }
+  }
+]
+output [
+  {
+    name: "mel_outputs_postnet__0"
+    data_type: TYPE_FP16
+    dims: [80,-1]
+  },
+  {
+    name: "mel_lengths__1"
+    data_type: TYPE_INT32
+    dims: [1]
+    reshape: { shape: [ ] }
+  }
+]
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/waveglow-trt/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/waveglow-trt/config.pbtxt
@ -0,0 +1,21 @@
+name: "waveglow-trt"
+platform: "tensorrt_plan"
+default_model_filename: "waveglow_fp16.engine"
+
+max_batch_size: 1
+
+input {
+  name: "mel"
+  data_type: TYPE_FP16
+  dims: [80, -1, 1]
+}
+input {
+  name: "z"
+  data_type: TYPE_FP16
+  dims: [8, -1, 1]
+}
+output {
+  name: "audio"
+  data_type: TYPE_FP16
+  dims: [-1]
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/notebooks/trtis/notebook.ipynb
+++ b/PyTorch/SpeechSynthesis/Tacotron2/notebooks/trtis/notebook.ipynb
@ -123,9 +123,10 @@
    "def sequence_to_mel(sequence):\n",
    "    ''' calls tacotron2\n",
    "        ::sequence:: int64 numpy array, contains the preprocessed text\n",
-    "        ::returns:: (mel, mel_lengths) pair\n",
+    "        ::returns:: (mel, mel_lengths, alignments) tuple\n",
    "                     mel is the mel-spectrogram, np.array\n",
    "                     mel_lengths contains the length of the unpadded mel, np.array\n",
+    "                     alignments contains attention weigths, np.array\n",
    "    '''\n",
    "    input_lengths = [len(sequence)]\n",
    "    input_lengths = np.array(input_lengths, dtype=np.int64)\n",
@ -136,13 +137,15 @@
    "    output_dict = {}\n",
    "    output_dict['mel_outputs_postnet__0'] = InferContext.ResultFormat.RAW\n",
    "    output_dict['mel_lengths__1'] = InferContext.ResultFormat.RAW\n",
+    "    output_dict['alignments__2'] = InferContext.ResultFormat.RAW\n",
    "    batch_size = 1\n",
    "    # call tacotron2\n",
    "    result = infer_ctx_tacotron2.run(input_dict, output_dict, batch_size)\n",
    "    # get results\n",
    "    mel = result['mel_outputs_postnet__0'][0] # take only the first instance in the output batch\n",
    "    mel_lengths = result['mel_lengths__1'][0] # take only the first instance in the output batch\n",
-    "    return mel, mel_lengths\n",
+    "    alignments = result['alignments__2'][0] # take only the first instance in the output batch\n",
+    "    return mel, mel_lengths, alignments\n",
    "\n",
    "\n",
    "def force_to_shape(mel, length):\n",
@ -239,7 +242,7 @@
    "    with plot_text_area_preprocessed:\n",
    "        display_heatmap(sequence)\n",
    "    # run tacotron2\n",
-    "    mel, mel_lengths = sequence_to_mel(sequence)\n",
+    "    mel, mel_lengths, alignments = sequence_to_mel(sequence)\n",
    "    with plot_spectrogram:\n",
    "        display_spectrogram(mel, change['new'])\n",
    "    # run waveglow\n",
--- a/PyTorch/SpeechSynthesis/Tacotron2/tacotron2/model.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/tacotron2/model.py
@ -685,4 +685,4 @@ class Tacotron2(nn.Module):
        mel_outputs_postnet = self.postnet(mel_outputs)
        mel_outputs_postnet = mel_outputs + mel_outputs_postnet

-        return mel_outputs_postnet, mel_lengths
+        return mel_outputs_postnet, mel_lengths, alignments
--- a/PyTorch/SpeechSynthesis/Tacotron2/test_infer.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/test_infer.py
@ -187,7 +187,7 @@ def main():
        with torch.no_grad():
            with MeasureTime(measurements, "latency"):
                with MeasureTime(measurements, "tacotron2_latency"):
-                    mel, mel_lengths = tacotron2.infer(sequences_padded, input_lengths)
+                    mel, mel_lengths, _ = tacotron2.infer(sequences_padded, input_lengths)

                with MeasureTime(measurements, "waveglow_latency"):
                    audios = waveglow.infer(mel, sigma=args.sigma_infer)
--- a/PyTorch/SpeechSynthesis/Tacotron2/train.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/train.py
@ -81,8 +81,6 @@ def parse_args(parser):
                          help='Number of epochs per checkpoint')
    training.add_argument('--checkpoint-path', type=str, default='',
                          help='Checkpoint path to resume training')
-    training.add_argument('--seed', type=int, default=1234,
-                          help='Seed for PyTorch random number generators')
    training.add_argument('--dynamic-loss-scaling', type=bool, default=True,
                          help='Enable dynamic loss scaling')
    training.add_argument('--amp-run', action='store_true',
@ -196,12 +194,13 @@ def save_checkpoint(model, optimizer, epoch, config, amp_run, filepath):
    torch.save(checkpoint, filepath)


-def load_checkpoint(model, optimizer, epoch, config, amp_run, filepath):
+def load_checkpoint(model, optimizer, epoch, config, amp_run, filepath, rank):

    checkpoint = torch.load(filepath, map_location='cpu')

    epoch[0] = checkpoint['epoch']+1
-    torch.cuda.set_rng_state_all(checkpoint['cuda_rng_state_all'])
+    device_id = rank % torch.cuda.device_count()
+    torch.cuda.set_rng_state(checkpoint['cuda_rng_state_all'][device_id])
    torch.random.set_rng_state(checkpoint['random_rng_state'])
    config = checkpoint['config']
    model.load_state_dict(checkpoint['state_dict'])
@ -355,7 +354,7 @@ def main():

    if args.checkpoint_path is not "":
        load_checkpoint(model, optimizer, start_epoch, model_config,
-                        args.amp_run, args.checkpoint_path)
+                        args.amp_run, args.checkpoint_path, local_rank)

    start_epoch = start_epoch[0]

@ -475,7 +474,7 @@ def main():
        DLLogger.log(step=(epoch,), data={'train_loss': (train_epoch_avg_loss/num_iters if num_iters > 0 else 0.0)})
        DLLogger.log(step=(epoch,), data={'train_epoch_time': epoch_time})

-        val_loss = validate(model, criterion, valset, epoch, i,
+        val_loss = validate(model, criterion, valset, epoch, iteration,
                            args.batch_size, world_size, collate_fn,
                            distributed_run, local_rank, batch_to_gpu)

--- a/PyTorch/SpeechSynthesis/Tacotron2/trt/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trt/README.md
@ -52,13 +52,13 @@ NVIDIA TensorRT is a platform for high-performance deep learning inference. It i

 	```bash
   mkdir -p output
-	python exports/export_tacotron2_onnx.py --tacotron2 ./checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/
+	python exports/export_tacotron2_onnx.py --tacotron2 ./checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/ --fp16
 	```

    Export WaveGlow to ONNX IR:

 	```bash
-	python exports/export_waveglow_onnx.py --waveglow ./checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 -o output/
+	python exports/export_waveglow_onnx.py --waveglow ./checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 -o output/ --fp16
 	```

 	After running the above commands, there should be four new ONNX files in `./output/` directory:
@ -76,7 +76,7 @@ NVIDIA TensorRT is a platform for high-performance deep learning inference. It i
 8. Run TTS inference pipeline with fp16:

 	```bash
-	python trt/inference_trt.py -i phrases/phrase.txt --encoder output/encoder_fp16.engine --decoder output/decoder_iter_fp16.engine --postnet output/postnet_fp16.engine --waveglow output/waveglow_fp16.engine -o output/
+	python trt/inference_trt.py -i phrases/phrase.txt --encoder output/encoder_fp16.engine --decoder output/decoder_iter_fp16.engine --postnet output/postnet_fp16.engine --waveglow output/waveglow_fp16.engine -o output/ --fp16
 	```

 ## Inference performance: NVIDIA T4
@ -85,5 +85,5 @@ Our results were obtained by running the `./trt/run_latency_tests_trt.sh` script

 |Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)|Latency confidence interval 90% (s)|Latency confidence interval 95% (s)|Latency confidence interval 99% (s)|Throughput (samples/sec)|Speed-up PyT+TRT/TRT|Avg mels generated (81 mels=1 sec of speech)|Avg audio length (s)|Avg RTF|
 |---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
-|PyT+TRT|1| 128| FP16| 1.14| 0.02| 1.16| 1.16| 1.21| 137,050| 1.45| 611| 7.09| 6.20|
+|PyT+TRT|1| 128| FP16| 1.13| 0.01| 1.15| 1.15| 1.16| 137,738| 1.45| 608| 7.06| 6.24|
 |PyT    |1| 128| FP16| 1.63| 0.07| 1.71| 1.73| 1.81|  94,758| 1.00| 601| 6.98| 4.30|
--- a/PyTorch/SpeechSynthesis/Tacotron2/trt/inference_trt.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trt/inference_trt.py
@ -71,12 +71,16 @@ def parse_args(parser):
                        help='Sampling rate')
    parser.add_argument('--stft-hop-length', type=int, default=256,
                        help='STFT hop length for estimating audio length from mel size')
+    parser.add_argument('--fp16', action='store_true',
+                        help='inference with FP16')

    return parser


 def init_decoder_inputs(memory, processed_memory, memory_lengths):

+    device = memory.device
+    dtype = memory.dtype
    bs = memory.size(0)
    seq_len = memory.size(1)
    attention_rnn_dim = 1024
@ -84,15 +88,15 @@ def init_decoder_inputs(memory, processed_memory, memory_lengths):
    encoder_embedding_dim = 512
    n_mel_channels = 80

-    attention_hidden = torch.zeros(bs, attention_rnn_dim).cuda().float()
-    attention_cell = torch.zeros(bs, attention_rnn_dim).cuda().float()
-    decoder_hidden = torch.zeros(bs, decoder_rnn_dim).cuda().float()
-    decoder_cell = torch.zeros(bs, decoder_rnn_dim).cuda().float()
-    attention_weights = torch.zeros(bs, seq_len).cuda().float()
-    attention_weights_cum = torch.zeros(bs, seq_len).cuda().float()
-    attention_context = torch.zeros(bs, encoder_embedding_dim).cuda().float()
-    mask = get_mask_from_lengths(memory_lengths).cuda()
-    decoder_input = torch.zeros(bs, n_mel_channels).cuda().float()
+    attention_hidden = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
+    attention_cell = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
+    decoder_hidden = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
+    decoder_cell = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
+    attention_weights = torch.zeros(bs, seq_len, device=device, dtype=dtype)
+    attention_weights_cum = torch.zeros(bs, seq_len, device=device, dtype=dtype)
+    attention_context = torch.zeros(bs, encoder_embedding_dim, device=device, dtype=dtype)
+    mask = get_mask_from_lengths(memory_lengths).to(device)
+    decoder_input = torch.zeros(bs, n_mel_channels, device=device, dtype=dtype)

    return (decoder_input, attention_hidden, attention_cell, decoder_hidden,
            decoder_cell, attention_weights, attention_weights_cum,
@ -100,6 +104,8 @@ def init_decoder_inputs(memory, processed_memory, memory_lengths):

 def init_decoder_outputs(memory, memory_lengths):

+    device = memory.device
+    dtype = memory.dtype
    bs = memory.size(0)
    seq_len = memory.size(1)
    attention_rnn_dim = 1024
@ -107,15 +113,15 @@ def init_decoder_outputs(memory, memory_lengths):
    encoder_embedding_dim = 512
    n_mel_channels = 80

-    attention_hidden = torch.zeros(bs, attention_rnn_dim).cuda().float()
-    attention_cell = torch.zeros(bs, attention_rnn_dim).cuda().float()
-    decoder_hidden = torch.zeros(bs, decoder_rnn_dim).cuda().float()
-    decoder_cell = torch.zeros(bs, decoder_rnn_dim).cuda().float()
-    attention_weights = torch.zeros(bs, seq_len).cuda().float()
-    attention_weights_cum = torch.zeros(bs, seq_len).cuda().float()
-    attention_context = torch.zeros(bs, encoder_embedding_dim).cuda().float()
-    decoder_output = torch.zeros(bs, n_mel_channels).cuda().float()
-    gate_prediction = torch.zeros(bs, 1).cuda().float()
+    attention_hidden = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
+    attention_cell = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
+    decoder_hidden = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
+    decoder_cell = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
+    attention_weights = torch.zeros(bs, seq_len, device=device, dtype=dtype)
+    attention_weights_cum = torch.zeros(bs, seq_len, device=device, dtype=dtype)
+    attention_context = torch.zeros(bs, encoder_embedding_dim, device=device, dtype=dtype)
+    decoder_output = torch.zeros(bs, n_mel_channels, device=device, dtype=dtype)
+    gate_prediction = torch.zeros(bs, 1, device=device, dtype=dtype)

    return (attention_hidden, attention_cell, decoder_hidden,
            decoder_cell, attention_weights, attention_weights_cum,
@ -178,10 +184,15 @@ def swap_inputs_outputs(decoder_inputs, decoder_outputs):

 def infer_tacotron2_trt(encoder, decoder_iter, postnet,
                        encoder_context, decoder_context, postnet_context,
-                        sequences, sequence_lengths, measurements):
+                        sequences, sequence_lengths, measurements, fp16):

-    memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],512)).cuda().float()
-    processed_memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],128)).cuda().float()
+    memory = torch.zeros((len(sequence_lengths), sequence_lengths[0], 512)).cuda()
+    if fp16:
+        memory = memory.half()
+    device = memory.device
+    dtype = memory.dtype
+
+    processed_memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],128), device=device, dtype=dtype)
    lens = torch.zeros_like(sequence_lengths)

    encoder_tensors = {
@ -237,7 +248,7 @@ def infer_tacotron2_trt(encoder, decoder_iter, postnet,

        decoder_inputs, decoder_outputs = swap_inputs_outputs(decoder_inputs, decoder_outputs)

-    mel_outputs_postnet = torch.zeros_like(mel_outputs).cuda().float()
+    mel_outputs_postnet = torch.zeros_like(mel_outputs, device=device, dtype=dtype)

    postnet_tensors = {
        # inputs
@ -254,7 +265,7 @@ def infer_tacotron2_trt(encoder, decoder_iter, postnet,
    return mel_outputs_postnet, mel_lengths


-def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements):
+def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, fp16):

    mel = mel.unsqueeze(3)
    mel_size = mel.size(2)
@ -268,7 +279,7 @@ def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements):
    z = torch.randn(batch_size, n_group, z_size, 1).cuda()
    audios = torch.zeros(batch_size, mel_size*stride).cuda()

-    if "HALF" in str(waveglow.get_binding_dtype(waveglow.get_binding_index("mel"))):
+    if fp16:
        z = z.half()
        mel = mel.half()
        audios = audios.half()
@ -337,8 +348,8 @@ def main():
    with MeasureTime(measurements, "latency"):
        mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet,
                                               encoder_context, decoder_context, postnet_context,
-                                               sequences, sequence_lengths, measurements)
-        audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements)
+                                               sequences, sequence_lengths, measurements, args.fp16)
+        audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, args.fp16)

    with encoder_context, decoder_context,  postnet_context, waveglow_context:
        pass
@ -365,7 +376,7 @@ def main():
        DLLogger.log(step=0, data={"denoiser": measurements['denoiser']})
    DLLogger.flush()

-    prec = "fp16" if "fp16" in args.encoder else "fp32"
+    prec = "fp16" if args.fp16 else "fp32"
    latency = measurements['latency']
    throughput = audios.size(1)/latency
    log_data = "1,"+str(sequence_lengths[0].item())+","+prec+","+str(latency)+","+str(throughput)+","+str(mel_lengths[0].item())+"\n"
--- a/PyTorch/SpeechSynthesis/Tacotron2/trt/trt_utils.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trt/trt_utils.py
@ -68,8 +68,9 @@ def engine_info(engine_filepath):
  dims: {dims}
 }}"""
    type_mapping = {"DataType.HALF": "TYPE_FP16",
-    "DataType.FLOAT": "TYPE_FP32",
-    "DataType.INT32": "TYPE_INT32"}
+                    "DataType.FLOAT": "TYPE_FP32",
+                    "DataType.INT32": "TYPE_INT32",
+                    "DataType.BOOL" : "TYPE_BOOL"}

    print("engine name", engine.name)
    print("has_implicit_batch_dimension", engine.has_implicit_batch_dimension)
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/.clang-format
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/.clang-format
@ -0,0 +1,36 @@
+BasedOnStyle: LLVM 
+IndentWidth: 2
+ColumnLimit: 80
+ContinuationIndentWidth: 4
+BinPackParameters: false
+BinPackArguments: false
+AllowAllParametersOfDeclarationOnNextLine: true
+BreakBeforeBraces: Custom 
+BraceWrapping:
+  AfterEnum: true 
+  AfterStruct: true
+  AfterClass: true
+  AfterControlStatement: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterUnion: true
+  AfterExternBlock: false 
+  BeforeElse: false
+  IndentBraces: false
+  SplitEmptyRecord: true
+# BreakInheritanceList: AfterColon
+DerivePointerAlignment: false
+PointerAlignment: Left 
+AlignAfterOpenBracket: AlwaysBreak
+NamespaceIndentation: None
+SpaceBeforeParens: true
+# SpaceBeforeRangeBasedForLoopColon: true
+# SpaceBeforeInheritanceColon: true
+AlwaysBreakTemplateDeclarations: true
+BreakBeforeBinaryOperators: true 
+AllowShortBlocksOnASingleLine: false
+AllowShortFunctionsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AllowShortIfStatementsOnASingleLine: false
+BreakConstructorInitializers: AfterColon
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/.gitignore
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/.gitignore
@ -0,0 +1,18 @@
+*.swp
+*.swo
+*.swn
+*.swm
+*.pyc
+*.csv
+*.wav
+test.json
+__pycache__
+build
+models
+engines
+logs
+audio
+mels
+Makefile
+trtis_sdk
+sampleTacotron2WaveGlow
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/CMakeLists.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/CMakeLists.txt
@ -0,0 +1,60 @@
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
+project(tacotron2_inference LANGUAGES CXX CUDA)
+
+if (DEFINED DEVEL AND NOT DEVEL EQUAL 0) 
+  if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
+    # g++ warnings
+    set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Wall")
+    set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Werror")
+    set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Wpedantic")
+    set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Weffc++")
+    set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Wextra")
+    set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -DDEVEL=1")
+
+    # nvcc warnings
+    set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Wall")
+    set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Werror")
+    set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Weffc++")
+    set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Wextra")
+    set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-DDEVEL=1")
+  endif()
+endif()
+
+set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
+set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CUDA_STANDARD 11)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CPP_DEVEL_FLAGS} -fPIC")
+set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_DEVEL_FLAGS} -rdc=true -Xcompiler=-fPIC")
+
+enable_testing()
+
+add_subdirectory("src")
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/Dockerfile.export_weights
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/Dockerfile.export_weights
@ -0,0 +1,15 @@
+FROM nvcr.io/nvidia/pytorch:20.01-py3
+
+# Make sure we have all needed modules
+RUN python3 -c "import torch; import onnx; import scipy; import numpy; import librosa"
+
+WORKDIR "/workspace"
+
+ADD ./tacotron2 ./tacotron2
+ADD ./waveglow ./waveglow
+ADD ./common ./common
+ADD ./trtis_cpp/scripts ./trtis_cpp/scripts
+
+WORKDIR "/workspace/trtis_cpp"
+
+ENTRYPOINT ["/bin/bash", "-c"]
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/Dockerfile.trtis
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/Dockerfile.trtis
@ -0,0 +1,37 @@
+ARG TRTIS_IMAGE=nvcr.io/nvidia/tensorrtserver:20.02-py3
+
+FROM ${TRTIS_IMAGE}
+
+RUN mkdir -p /workspace/trt-tacotron2-waveglow
+WORKDIR /workspace/trt-tacotron2-waveglow
+
+# Download custom backend SDK
+RUN wget https://github.com/NVIDIA/tensorrt-inference-server/releases/download/v1.11.0/v1.11.0_ubuntu1804.custombackend.tar.gz
+RUN tar xf v1.11.0_ubuntu1804.custombackend.tar.gz && mv custom-backend-sdk ./trtis_sdk
+
+# install cmake
+RUN apt-get update && apt-get install -qy cmake && apt-get clean
+
+# build the source code
+ADD src/ "./src"
+ADD CMakeLists.txt "./"
+ADD configure "./"
+
+RUN ./configure --trtis
+RUN make
+
+ARG TACOTRON2_MODEL="tacotron.json"
+ARG WAVEGLOW_MODEL="waveglow.onnx"
+ARG DENOISER_MODEL="denoiser.json"
+
+RUN mkdir -p "/models" "/engines"
+
+ADD "${TACOTRON2_MODEL}" /models/
+ADD "${WAVEGLOW_MODEL}" /models/
+ADD "${DENOISER_MODEL}" /models/
+
+ADD model-config/tacotron2waveglow /models/tacotron2waveglow
+RUN mkdir -p /models/tacotron2waveglow/1
+RUN cp -v "./build/lib/libtt2i_trtis.so" /models/tacotron2waveglow/1/
+
+ADD scripts "./scripts"
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/README.md
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/README.md
@ -0,0 +1,247 @@
+# Tacotron2+WaveGlow Inference Using TensorRT Inference Server with TensorRT
+
+This is a subfolder of the Tacotron2 for PyTorch repository that provides
+scripts to deploy high-performance inference using NVIDIA TensorRT Inference
+Server with a custom TensorRT
+[backend](https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/build.html#building-a-custom-backend).
+
+## Table of contents
+* [Model overview](#model-overview)
+  - [Tacotron2 plugins](#tacotron2-plugins)
+* [Setup](#setup)
+  - [Requirements](#requirements)
+* [Quick Start Guide](#quick-start-guide)
+  - [Export the models](#export-the-models)
+  - [Setup the TRTIS server](#setup-the-trtis-server)
+  - [Setup the TRTIS client](#setup-the-trtis-client)
+  - [Starting the TRTIS server](#starting-the-trtis-server)
+  - [Running the TRTIS client](#running-the-trtis-client)
+* [Advanced](#advanced)
+  - [Code structure](#code-structure)
+  - [Precision](#precision)
+* [Performance](#performance)
+  - [Performance on NVIDIA T4](#performance-on-nvidia-t4)
+  - [Running the benchmark](#running-the-benchmark)
+
+
+## Model overview
+
+The Tacotron2 and WaveGlow models form a text-to-speech system that enables
+users to synthesize natural sounding speech from raw transcripts without any
+additional information such as patterns and/or rhythms of speech.
+In this implementation, the Tacotron2 network is split into three sub-networks,
+the encoder, decoder, and postnet.
+This is followed by WaveGlow as a vocoder, and a Denoiser network using a
+[STFT](https://en.wikipedia.org/wiki/Short-time_Fourier_transform)
+to remove noise from the audio output.
+More information on the Tacotron2 and WaveGlow architectures can be found in
+[Tacotron2 PyTorch README](../README.md), as well as information about
+training.
+
+### Tacotron2 plugins
+
+Because the size of the layers in Tacotron2's decoder, are quite small, many
+deep learning frameworks fail achieve high throughput for a batch size of one,
+as the overhead
+associated with executing each of these small layers can dominate the runtime. 
+
+TensorRT supports custom layers through its 
+[plugin](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#pluginv2-layer)
+interface, which not only allows custom operations, but also allows
+developers to manually tune and/or fuse specific layers in their
+networks while still using TensorRT to perform automated optimizations on the
+other layers, and to manage and execute the entire network.
+This implementation uses several plugins for Tacotron2's decoder,
+including fusing layers of the Prenet and Attention, as well as creating LSTM
+Cell kernels optimized specifically for the dimensions used in Tacotron2.
+
+
+## Setup
+
+### Requirements
+
+Building and running the container requires `docker`, `nvidia-docker` and `bash`.
+In addition to this, the host machine must have a Volta or Turing based GPU.
+
+
+## Quick Start Guide
+
+### Clone the repository
+
+```bash
+git clone https://github.com/NVIDIA/DeepLearningExamples
+cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp
+```
+
+### Export the models
+
+You can either train models yourself, or download pretrained checkpoints from [NGC](https://ngc.nvidia.com/catalog/models) and copy them to the `./checkpoints` directory:
+
+- [Tacotron2 checkpoint](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16)
+- [WaveGlow checkpoint](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16)
+
+```bash
+mkdir checkpoints
+cp <Tacotron2_checkpoint> ./checkpoints/
+cp <WaveGlow_checkpoint> ./checkpoints/
+```
+
+Next you will need to export the PyTorch checkpoints so that they can be used to build TensorRT engines. This can be done via the script `export_weights.sh` script:
+
+```bash
+mkdir models
+./export_weights.sh checkpoints/nvidia_tacotron2pyt_fp16_20190427 checkpoints/nvidia_waveglow256pyt_fp16 models/
+```
+
+### Setup the TRTIS server
+```bash
+./build_trtis.sh models/tacotron2.json models/waveglow.onnx models/denoiser.json
+```
+This will take some time as TensorRT tries out different tactics for best
+performance while building the engines.
+
+### Setup the TRTIS client
+
+Next you need to build the client docker container. To do this, enter the
+`trtis_client` directory and run the script `build_trtis_client.sh`.
+
+```bash
+cd trtis_client
+./build_trtis_client.sh
+cd ..
+```
+
+### Run the TRTIS server
+
+To run the server locally, use the script `run_trtis_server.sh`:
+```bash
+./run_trtis_server.sh
+```
+
+You can use the environment variable `NVIDIA_VISIBLE_DEVICES` to set which GPUs
+the TRTIS server sees.
+
+
+### Run the TRTIS client
+
+Leave the server running. In another terminal, type:
+```bash
+cd trtis_client/
+./run_trtis_client.sh phrases.txt
+```
+
+This will generate one WAV file per line in the file `phrases.txt`, named after
+the line number (e.g., 1.wav through 8.wav for a 8 line file) in the `audio/`
+directory. It is
+important that each line in the file end with a period, or Tacotron2 may fail
+to detect the end of the phrase.
+
+## Advanced
+
+
+### Code structure
+
+The `src/` contains the following sub-directories:
+* `trtis`: The directory containing code for the custom TRTIS backend.
+* `trt/tacotron2`: The directory containing the Tacotron2 implementation in TensorRT.
+* `trt/waveglow`: The directory containing the WaveGlow implementation in TensorRT.
+* `trt/denoiser`: The directory containing the Denoiser (STFT) implementation in TensorRT.
+* `trt/plugins`: The directory containing plugins used by the TensorRT engines.
+* `trt/helpers`: The directory containing scripts for exporting models from
+PyTorch.
+
+The `trtis_client/` directory contains the code for running the client.
+
+### Precision
+
+By default the `./build_trtis.sh` script builds the TensorRT engines with FP16 mode enabled, which allows some operations to be performed in lower precision, in order to increase throughput. To use engines with only FP32 precision, add `0` to `./build_trtis.sh`’s arguments:
+
+```bash
+./build_trtis.sh models/tacotron2.json models/waveglow.onnx models/denoiser.json 0
+```
+
+## Performance
+
+The following tables show inference statistics for the Tacotron2 and WaveGlow
+text-to-speech system.
+The tables include average latency, latency standard deviation,
+and latency confidence intervals. Throughput is measured as the number of
+generated audio samples per second. RTF is the real-time factor which
+tells how many seconds of speech are generated in 1 second of processing time.
+For all tests in these tables, we used WaveGlow with 256 residual channels.
+
+### Performance on NVIDIA T4
+
+#### TensorRT \w Plugins in TRTIS
+
+Latency in this table is measured from the client sending the request, to it
+receiving back the generated audio. 
+
+|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)| Latency interval 90% (s)|Latency interval 95% (s)|Latency interval 99% (s)|Avg mels generated |Avg audio length (s)|Avg RTF|
+|---:|----:|-----:|------:|------:|------:|------:|------:|----:|------:|-------:|
+| 1  | 128 | FP16 | 0.49 | 0.00 | 0.49 | 0.49 | 0.50 | 564 | 6.59 | 13.48 |
+| 4  | 128 | FP16 | 1.37 | 0.01 | 1.38 | 1.38 | 1.38 | 563 | 6.54 |  4.77 |
+| 1  | 128 | FP32 | 1.30 | 0.01 | 1.30 | 1.30 | 1.31 | 567 | 6.58 |  5.08 |
+| 4  | 128 | FP32 | 3.63 | 0.01 | 3.64 | 3.64 | 3.64 | 568 | 6.59 |  1.82 |
+
+To reproduce this table, see [Running the benchmark](#running-the-benchmark)
+below.
+
+
+#### TensorRT \w Plugins vs. PyTorch
+
+Latency in this table is measured from just before the input sequence starts
+being copied from host memory to the GPU,
+to just after the generated audio finishes being copied back to the host
+memory.
+That is, what is taking place in the custom backend inside of TRTIS.
+
+|Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)| Latency interval 90% (s)|Latency interval 95% (s)|Latency interval 99% (s)| Throughput (samples/sec) | Speed-up vs. PyT FP32 | Speed-up vs. PyT FP16 | Avg mels generated |Avg audio length (s)|Avg RTF|
+|------:|----:|-----:|-----------:|--------:|------:|------:|------:|------:|------:|------:|----:|------:|-------:|---:|
+| TRT \w plugins | 1  | 128 | FP16 | 0.45 | 0.00 | 0.45 | 0.45 | 0.46 | 320,950 | __3.72x__ | __3.39x__ | 564 | 6.55 | 14.59 |
+| TRT \w plugins | 1  | 128 | FP32 | 1.26 | 0.01 | 1.27 | 1.27 | 1.27 | 115,150 | __1.33x__ | __1.21x__ | 567 | 6.58 |  5.22 |
+| PyTorch        | 1  | 128 | FP16 | 1.63 | 0.07 | 1.71 | 1.73 | 1.81 | 94,758 | __1.10x__ | __1.00x__ | 601 | 6.98 |  4.30 |
+| PyTorch        | 1  | 128 | FP32 | 1.77 | 0.08 | 1.88 | 1.92 | 2.00 | 86,705 | __1.00x__ | __0.91x__ | 600 | 6.96 |  3.92 |
+
+That is a __3.72x__ speedup when using TensorRT FP16 with plugins when compared to
+PyTorch FP32, and still a __3.39x__ speedup when compared to PyTorch FP16.
+
+The TensorRT entries in this table can be reproduced by using the output of
+the TRTIS server, when performing the steps for [Running the
+benchmark](#running-the-benchmark) below.
+The PyTorch entries can be reproduced by following the instructions
+[here](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2).
+
+
+### Running the benchmark
+
+Once you have performed the steps in [Setup the TRTIS server](#setup-the-trtis-server) and
+[Setup the TRTIS client](#setup-the-trtis-client), you can run the benchmark by starting the TRTIS server via:
+```bash
+./run_trtis_server.sh
+```
+
+Leave the server running, and in another terminal run the script `trtis_client/run_trtis_benchmark_client.sh`:
+
+```bash
+cd trtis_client/
+./run_trtis_benchmark_client.sh <batch size>
+```
+
+Replace <batch size> with the desired batch size between 1 and 32. The engines are built with a maximum batch size of 32 in the `./build_trtis.sh` script.
+
+After some time this should produce output like:
+```
+Performed 1000 runs.
+batch size = 1
+input size = 128
+avg latency (s) = 0.485718
+latency std (s) = 0.00448834
+latency interval 50% (s) = 0.485836
+latency interval 90% (s) = 0.489517
+latency interval 95% (s) = 0.490613
+latency interval 99% (s) = 0.494721
+average mels generated = 564
+average audio generated (s) = 6.54803
+average real-time factor = 13.4811
+```
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/build_trtis.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/build_trtis.sh
@ -0,0 +1,79 @@
+#!/bin/bash
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}"
+IMAGE_NAME="trt-tacotron2-waveglow.trtis"
+CONTAINER_NAME="trt-tacotron2-waveglow.trtis.container"
+
+die() {
+  echo "ERROR: ${@}" 1>&2
+  exit 1
+}
+
+if [[ $# != 4 && $# != 3 ]]; then
+  echo "Unexpected number of arguments: $#"
+  echo "USAGE:"
+  echo "\t${0} <tacotron2 model> <waveglow model> <denoiser model> [use amp 0/1]"
+  exit 1
+fi
+
+
+# remove container if it exists
+if [[ "$(docker ps -f "name=${CONTAINER_NAME}" -qa | wc -l)" != "0" ]]; then
+  docker rm "${CONTAINER_NAME}"
+fi
+
+
+TACOTRON2_MODEL="${1}"
+WAVEGLOW_MODEL="${2}"
+DENOISER_MODEL="${3}"
+AMP="${4:-1}"
+
+# copy models to build context
+mkdir -p tmp/
+
+cp -v "${TACOTRON2_MODEL}" tmp/tacotron2.json && TACOTRON2_MODEL="tmp/tacotron2.json" || die "Failed to copy ${TACOTRON2_MODEL}"
+cp -v "${WAVEGLOW_MODEL}" tmp/waveglow.onnx && WAVEGLOW_MODEL="tmp/waveglow.onnx" || die "Failed to copy ${WAVEGLOW_MODEL}"
+cp -v "${DENOISER_MODEL}" tmp/denoiser.json && DENOISER_MODEL="tmp/denoiser.json" || die "Failed to copy ${DENOISER_MODEL}"
+
+
+
+docker build \
+    --build-arg TACOTRON2_MODEL="${TACOTRON2_MODEL}" \
+    --build-arg WAVEGLOW_MODEL="${WAVEGLOW_MODEL}" \
+    --build-arg DENOISER_MODEL="${DENOISER_MODEL}" \
+    -f Dockerfile.trtis . -t "${IMAGE_NAME}" || die "Failed to build docker container."
+
+nvidia-docker run \
+              -e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
+              --name "${CONTAINER_NAME}" \
+              "${IMAGE_NAME}" "./scripts/build_engines.sh" "${AMP}" || die "Failed to build engines."
+
+docker commit "${CONTAINER_NAME}" "${IMAGE_NAME}" || die "Failed commit changes."
+docker rm "${CONTAINER_NAME}"
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/configure
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/configure
@ -0,0 +1,60 @@
+#!/bin/bash
+
+BUILD_DIR="build/"
+MAKEFILE="./Makefile"
+
+OPTIONS="-DCMAKE_VERBOSE_MAKEFILE=1"
+DEBUG=0
+
+while [[ $# -gt 0 ]]; do
+  flag="${1}"
+  case "${flag}" in
+    --cc=*)
+      OPTIONS="${OPTIONS} -DCMAKE_C_COMPILER=${flag#*=}"
+      ;;
+    --cxx=*)
+      OPTIONS="${OPTIONS} -DCMAKE_CXX_COMPILER=${flag#*=}"
+      ;;
+    --devel)
+      OPTIONS="${OPTIONS} -DDEVEL=1"
+      ;;
+    --debug)
+      DEBUG=1
+      ;;
+    --trtis)
+      OPTIONS="${OPTIONS} -DBUILD_TRTIS=1"
+      ;;
+    *)
+      echo "Unknown argument '${flag}'."
+      exit 1
+      ;;
+  esac
+  shift
+done
+
+if [[ "${DEBUG}" == "1" ]]; then
+  OPTIONS="${OPTIONS} -DCMAKE_BUILD_TYPE=Debug"
+else
+  OPTIONS="${OPTIONS} -DCMAKE_BUILD_TYPE=Release"
+fi
+
+if [[ -d "${BUILD_DIR}" ]]; then
+  rm -rf "${BUILD_DIR}"
+fi
+
+mkdir -p "${BUILD_DIR}"
+
+pushd "${BUILD_DIR}"
+cmake --version
+echo "cmake .. ${OPTIONS}"
+cmake .. ${OPTIONS}
+popd
+
+echo "# Auto generated makefile" > "${MAKEFILE}"
+echo "" >> "${MAKEFILE}"
+echo "all test:" >> "${MAKEFILE}"
+echo "	make -C \"${BUILD_DIR}\" \$@" >> "${MAKEFILE}"
+echo "" >> "${MAKEFILE}"
+echo ".PHONY: all" >> "${MAKEFILE}"
+echo "" >> "${MAKEFILE}"
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/export_weights.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/export_weights.sh
@ -0,0 +1,104 @@
+#!/bin/bash
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-0}"
+DOCKER_FILE="$(realpath Dockerfile.export_weights)"
+IMAGE_NAME="trt-tacotron2-waveglow.weight_export"
+CONTAINER_NAME="trt-tacotron2-waveglow.weight_export.container"
+
+die() {
+  echo "ERROR: ${@}" 1>&2
+  exit 1
+}
+
+die_and_remove_image() {
+  #docker rmi "${IMAGE_NAME}"
+  die "${@}"
+}
+
+if [[ "${#}" != 3 ]]; then
+  echo "Invalid arguments: ${@}"
+  echo "USAGE:"
+  echo "    ${0} <tacotron2 checkpoint> <waveglow checkpoint> <output directory>"
+  exit 1
+fi
+
+TACOTRON2_PT="${1}"
+WAVEGLOW_PT="${2}"
+MODEL_DIR="$(realpath ${3})"
+
+TACOTRON2_DIR="$(dirname $(realpath ${TACOTRON2_PT}))"
+TACOTRON2_NAME="$(basename ${TACOTRON2_PT})"
+WAVEGLOW_DIR="$(dirname $(realpath ${WAVEGLOW_PT}))"
+WAVEGLOW_NAME="$(basename ${WAVEGLOW_PT})"
+
+DLE_DIR="../"
+
+# remove docker container if it exists
+docker rm "${CONTAINER_NAME}" &> /dev/null
+
+pushd "${DLE_DIR}"
+
+docker build . -f "${DOCKER_FILE}" -t "${IMAGE_NAME}" || die "Failed to build container"
+
+# export taoctron2
+nvidia-docker run \
+              --rm \
+              -e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
+              --name "${CONTAINER_NAME}" \
+              -v "${TACOTRON2_DIR}:/checkpoints" \
+              -v "${MODEL_DIR}:/models" \
+              "${IMAGE_NAME}" "./scripts/tacotron2_to_json.py \"/checkpoints/${TACOTRON2_NAME}\" /models/tacotron2.json" || \
+              die_and_remove_image "Failed to export tacotron2."
+
+# export waveglow 
+nvidia-docker run \
+              --rm \
+              -e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
+              --name "${CONTAINER_NAME}" \
+              -v "${WAVEGLOW_DIR}:/checkpoints" \
+              -v "${MODEL_DIR}:/models" \
+              "${IMAGE_NAME}" \
+              "./scripts/waveglow_to_onnx.py -W \"${DLE_DIR}\" -w \"/checkpoints/${WAVEGLOW_NAME}\" -o /models/waveglow.onnx" || \
+              die_and_remove_image "Failed to export waveglow."
+
+# export denoiser
+nvidia-docker run \
+              --rm \
+              -e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
+              --name "${CONTAINER_NAME}" \
+              -v "${WAVEGLOW_DIR}:/checkpoints" \
+              -v "${MODEL_DIR}:/models" \
+              "${IMAGE_NAME}" \
+              "./scripts/denoiser_to_json.py \"${DLE_DIR}\" \"/checkpoints/${WAVEGLOW_NAME}\" /models/denoiser.json" || \
+              die_and_remove_image "Failed to export the denoiser."
+
+
+docker rmi "${IMAGE_NAME}"
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/model-config/tacotron2waveglow/config.pbtxt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/model-config/tacotron2waveglow/config.pbtxt
@ -0,0 +1,37 @@
+name: "tacotron2waveglow"
+platform: "custom"
+default_model_filename: "libtt2i_trtis.so"
+max_batch_size: 32
+input [
+  {
+    name: "INPUT"
+    data_type: TYPE_STRING
+    dims: [ -1 ]
+  }
+]
+output [
+  {
+    name: "OUTPUT"
+    data_type: TYPE_FP32
+    dims: [ -1 ]
+  },
+  {
+    name: "OUTPUT_LENGTH"
+    data_type: TYPE_INT32
+    dims: [ -1 ]
+  }
+]
+parameters [
+  {
+    key: "engine_path"
+    value: { string_value: "/engines" }
+  },
+  {
+    key: "mapping_path"
+    value: { string_value: "/models/tacotron2waveglow/mapping.txt" }
+  },
+  {
+    key: "use_denoiser"
+    value: { string_value: "yes" }
+  }
+]
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/model-config/tacotron2waveglow/mapping.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/model-config/tacotron2waveglow/mapping.txt
@ -0,0 +1,149 @@
+# sequence-number symbol
+0 _
+1 -
+2 !
+3 '
+4 (
+5 )
+6 ,
+7 .
+8 :
+9 ;
+10 ?
+11  
+38 A
+39 B
+40 C
+41 D
+42 E
+43 F
+44 G
+45 H
+46 I
+47 J
+48 K
+49 L
+50 M
+51 N
+52 O
+53 P
+54 Q
+55 R
+56 S
+57 T
+58 U
+59 V
+60 W
+61 X
+62 Y
+63 Z
+38 a
+39 b
+40 c
+41 d
+42 e
+43 f
+44 g
+45 h
+46 i
+47 j
+48 k
+49 l
+50 m
+51 n
+52 o
+53 p
+54 q
+55 r
+56 s
+57 t
+58 u
+59 v
+60 w
+61 x
+62 y
+63 z
+64 @AA
+65 @AA0
+66 @AA1
+67 @AA2
+68 @AE
+69 @AE0
+70 @AE1
+71 @AE2
+72 @AH
+73 @AH0
+74 @AH1
+75 @AH2
+76 @AO
+77 @AO0
+78 @AO1
+79 @AO2
+80 @AW
+81 @AW0
+82 @AW1
+83 @AW2
+84 @AY
+85 @AY0
+86 @AY1
+87 @AY2
+88 @B
+89 @CH
+90 @D
+91 @DH
+92 @EH
+93 @EH0
+94 @EH1
+95 @EH2
+96 @ER
+97 @ER0
+98 @ER1
+99 @ER2
+100 @EY
+101 @EY0
+102 @EY1
+103 @EY2
+104 @F
+105 @G
+106 @HH
+107 @IH
+108 @IH0
+109 @IH1
+110 @IH2
+111 @IY
+112 @IY0
+113 @IY1
+114 @IY2
+115 @JH
+116 @K
+117 @L
+118 @M
+119 @N
+120 @NG
+121 @OW
+122 @OW0
+123 @OW1
+124 @OW2
+125 @OY
+126 @OY0
+127 @OY1
+128 @OY2
+129 @P
+130 @R
+131 @S
+132 @SH
+133 @T
+134 @TH
+135 @UH
+136 @UH0
+137 @UH1
+138 @UH2
+139 @UW
+140 @UW0
+141 @UW1
+142 @UW2
+143 @V
+144 @W
+145 @Y
+146 @Z
+147 @ZH
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/run_trtis_server.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/run_trtis_server.sh
@ -0,0 +1,45 @@
+#!/bin/bash
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+IMAGE_NAME="trt-tacotron2-waveglow.trtis"
+
+NVIDIA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-all}"
+
+nvidia-docker run \
+              --rm \
+              --shm-size=1g \
+              --ulimit memlock=-1 \
+              --ulimit stack=67108864 \
+              -p8000:8000 \
+              -p8001:8001 \
+              -p8002:8002 \
+              -e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
+              -e "LD_LIBRARY_PATH=/opt/tensorrtserver/lib" \
+              "${IMAGE_NAME}" trtserver --model-store=/models
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/build_benchmark_engines.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/build_benchmark_engines.sh
@ -0,0 +1,123 @@
+#!/bin/bash
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+MODEL_DIR="/models/"
+ENGINE_DIR="/engines/"
+
+TACOTRON2_ID="1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA"
+WAVEGLOW_ID="1WsibBTsuRg_SF2Z6L6NFRTT-NjEy1oTx"
+
+TACOTRON2_PT="${MODEL_DIR}/tacotron2.pt"
+WAVEGLOW_PT="${MODEL_DIR}/waveglow.pt"
+TACOTRON2_JSON="${MODEL_DIR}/tacotron2.json"
+WAVEGLOW_ONNX="${MODEL_DIR}/waveglow.onnx"
+DENOISER_JSON="${MODEL_DIR}/denoiser.json"
+
+HELPER_DIR="src/trt/helpers"
+
+BIN_DIR="./build/bin"
+BENCHMARK_BIN="${BIN_DIR}/benchmark"
+
+MAX_BATCH_SIZE=32
+
+SCRIPT_DIR="$(dirname "${0}")"
+ENGINE_BUILD_SCRIPT="${SCRIPT_DIR}/build_engines.sh"
+
+die() {
+  echo "ERROR: ${@}" 1>&2
+  exit 1
+}
+
+download_gfile() {
+  which curl &> /dev/null || die "Failed to find 'curl'."
+
+  # download file from google drive
+  local GOID="${1}"
+  local filename="${2}"
+  local GURL='https://drive.google.com/uc?export=download'
+  local cookie="$(mktemp)"
+  curl -sc "${cookie}" "${GURL}&id=${GOID}"
+  local getcode="$(awk '/_warning_/ {print $NF}' "${cookie}")"
+  curl -Lb "${cookie}" "${GURL}&confirm=${getcode}&id=${GOID}" -o "${filename}"
+  rm "${cookie}"
+}
+
+mkdir -p "${ENGINE_DIR}" "${MODEL_DIR}"
+
+apt-get update -qy
+apt-get install -y libsndfile1 || die "Failed to install libsndfile"
+apt-get clean
+
+git clone --depth=1 https://github.com/NVIDIA/DeepLearningExamples
+TACO2_DIR="./DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/"
+
+# install required packages
+pip3 install "torch==1.3" onnx scipy librosa || die "Failed while installing python packages."
+
+# test packages
+python3 -c "import torch; import onnx; import scipy; import numpy; import librosa" || die "Python packages fail to import" 
+
+## build tacotron2 engine
+
+# download model
+download_gfile "${TACOTRON2_ID}" "${TACOTRON2_PT}" || die "Failed to get tacotron2.pt"
+
+# convert model to importable format
+${HELPER_DIR}/tacotron2_to_json.py "${TACOTRON2_PT}" "${TACOTRON2_JSON}" || die "Failed to export tacotron2 to json."
+
+rm -v "${TACOTRON2_PT}"
+
+
+## build wave glow engine
+
+# download model
+download_gfile "${WAVEGLOW_ID}" "${WAVEGLOW_PT}" || die "Failed to get waveglow.pt"
+
+# convert model to importable format
+${HELPER_DIR}/waveglow_to_onnx.py \
+      -w "${WAVEGLOW_PT}" \
+      -W "${TACO2_DIR}" \
+      -o "${WAVEGLOW_ONNX}" \
+      --length_mels=160 || die "Failed to export waveglow to onnx."
+
+
+## build denoiser engine
+
+${HELPER_DIR}/denoiser_to_json.py "${TACO2_DIR}" "${WAVEGLOW_PT}" "${DENOISER_JSON}" || die "Failed to export denoiser to json."
+
+# wait to remove wave glow until after denoiser is finished
+rm -v "${WAVEGLOW_PT}"
+rm -rvf "./DeepLearningExamples"
+
+pip3 uninstall -qy torch onnx scipy
+
+apt-get purge -y libsndfile1
+
+"${ENGINE_BUILD_SCRIPT}" || die "Failed to build engines"
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/build_engines.sh
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/build_engines.sh
@ -0,0 +1,91 @@
+#!/bin/bash
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+MODEL_DIR="/models/"
+ENGINE_DIR="/engines/"
+
+TACOTRON2_JSON="${MODEL_DIR}/tacotron2.json"
+WAVEGLOW_ONNX="${MODEL_DIR}/waveglow.onnx"
+DENOISER_JSON="${MODEL_DIR}/denoiser.json"
+
+TACOTRON2_ENG="${ENGINE_DIR}/tacotron2.eng"
+WAVEGLOW_ENG="${ENGINE_DIR}/waveglow_chunk160_fp16.eng"
+DENOISER_ENG="${ENGINE_DIR}/denoiser.eng"
+
+BIN_DIR="./build/bin"
+BENCHMARK_BIN="${BIN_DIR}/benchmark"
+BUILD_TACOTRON2_BIN="${BIN_DIR}/build_tacotron2"
+BUILD_WAVEGLOW_BIN="${BIN_DIR}/build_waveglow"
+
+MAX_BATCH_SIZE=32
+
+die() {
+  echo "ERROR: ${@}" 1>&2
+  exit 1
+}
+
+AMP="amp"
+
+if [[ "${#}" == "1" ]]; then
+  if [[ "${1}" == "0" || "${1}" == "no" ]]; then
+    AMP="fp32"
+  elif [[ "${1}" == "1" || "${1}" == "yes" ]]; then
+    AMP="amp"
+  else
+    echo "Invalid arguments."
+    exit 1
+  fi 
+fi
+
+echo
+echo "Building with -F${AMP}"
+echo
+
+## build tacotron2 engine
+
+./build/bin/build_tacotron2 "${TACOTRON2_JSON}" "${TACOTRON2_ENG}" -B ${MAX_BATCH_SIZE} -I 400 -F${AMP} || die "Failed to build tacotron2 engine."
+
+rm -v "${TACOTRON2_JSON}"
+
+
+## build wave glow engine
+
+./build/bin/build_waveglow "${WAVEGLOW_ONNX}" "${WAVEGLOW_ENG}" -B ${MAX_BATCH_SIZE} -F${AMP} || die "Failed to build waveglow engine."
+
+rm -v "${WAVEGLOW_ONNX}"
+
+## build denoiser engine
+
+./build/bin/build_denoiser "${DENOISER_JSON}" "${DENOISER_ENG}" -B ${MAX_BATCH_SIZE} -F${AMP} || die "Failed to build waveglow engine."
+
+rm -v "${DENOISER_JSON}"
+
+ls "${TACOTRON2_ENG}" "${WAVEGLOW_ENG}" "${DENOISER_ENG}" || die "Unable to access built engines."
+
+echo "Successfully built '${TACOTRON2_ENG}', '${WAVEGLOW_ENG}', and '${DENOISER_ENG}'"
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/denoiser_to_json.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/denoiser_to_json.py
@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+
+import json
+import torch
+import sys
+import os
+from scipy.signal import get_window
+import librosa.util as librosa_util
+
+WAVEGLOW_CONFIG = {
+    "n_mel_channels": 80,
+    "n_flows": 12,
+    "n_group": 8,
+    "n_early_every": 4,
+    "n_early_size": 2,
+    "WN_config": {
+        "n_layers": 8,
+        "kernel_size": 3,
+        "n_channels": 256
+    }
+}
+
+
+def gen_win_sq(
+        denoiser):
+    window = denoiser.stft.window
+    win_length = denoiser.stft.win_length
+    n_fft = denoiser.stft.filter_length
+
+    # Compute the squared window at the desired length
+    win_sq = get_window(window, win_length, fftbins=True)
+    win_sq = librosa_util.normalize(win_sq, norm=None)**2
+    win_sq = librosa_util.pad_center(win_sq, n_fft)
+
+    return win_sq
+
+
+if len(sys.argv) < 4 or len(sys.argv) > 5:
+    print("USAGE:")
+    print(
+        "\t%s <tacotron2 directory> <waveglow checkpoint> <json output> [strength, default=0.1]" % sys.argv[0])
+    sys.exit(1)
+
+json_path = sys.argv[3]
+
+sys.path.append(sys.argv[1])
+
+# must be imported after path is modified
+from import_utils import load_waveglow
+from waveglow.denoiser import Denoiser
+
+strength = 0.1
+if len(sys.argv) == 5:
+    strength = float(sys.argv[4])
+
+
+print("Building denoiser")
+
+waveglow = load_waveglow(sys.argv[2], WAVEGLOW_CONFIG)
+
+denoiser = Denoiser(waveglow).cuda()
+
+statedict = {}
+
+statedict["denoiser.stft.forward_basis"] = denoiser.stft.forward_basis.cpu(
+).numpy().tolist()
+statedict["denoiser.stft.inverse_basis"] = denoiser.stft.inverse_basis.cpu(
+).numpy().tolist()
+statedict["denoiser.stft.win_sq"] = gen_win_sq(denoiser).tolist()
+statedict["denoiser.bias_spec"] = (
+    denoiser.bias_spec*strength).cpu().numpy().tolist()
+
+with open(json_path, "w") as fout:
+    json.dump(statedict, fout, indent=2)
+
+print("Wrote to '%s'" % json_path)
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/export_symbols.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/export_symbols.py
@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+import sys
+
+if len(sys.argv) != 3:
+    print("Must specify path to PyTorch Tacotron2 containing 'text' module o load and text file to write")
+    sys.exit(1)
+
+modulePath = sys.argv[1]
+outputPath = sys.argv[2]
+
+sys.path.append(modulePath)
+
+
+from text import symbols
+
+i=0
+with open(outputPath, "w") as fout:
+  print("# sequence-number symbol", file=fout)
+  for s in symbols:
+    print("%d %s" % (i, s), file=fout)
+    i+=1
+
+print("Successfully wrote %d symbols to '%s'." % (i, outputPath))
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/import_utils/init.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/import_utils/init.py
@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+from .waveglow import load_waveglow
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/import_utils/waveglow.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/import_utils/waveglow.py
@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+import pickle
+import torch
+from waveglow.model import WaveGlow
+
+def split_cond_layers(model):
+    for WN in model.WN:
+        if hasattr(WN, "cond_layer"):
+            n_layers = len(WN.res_skip_layers)
+            conv_weights = WN.cond_layer.weight
+            conv_bias = WN.cond_layer.bias
+            conv_stride = WN.cond_layer.stride
+            conv_dilation = WN.cond_layer.dilation
+            conv_padding = WN.cond_layer.padding
+            num_in_channels = conv_weights.size(1)
+            num_out_channels = conv_weights.size(0)//n_layers
+            kernel_size = conv_weights.size(2)
+            WN.cond_layers = []
+            for i in range(n_layers):
+                layer = torch.nn.Conv1d(
+                    in_channels=num_in_channels,
+                    out_channels=num_out_channels,
+                    kernel_size=kernel_size,
+                    stride=conv_stride,
+                    padding=conv_padding,
+                    dilation=conv_dilation)
+                layer.weight.data[:, :, :] = conv_weights.data[
+                        i*num_out_channels:(i+1)*num_out_channels, :, :]
+                layer.bias.data[:] = conv_bias.data[
+                        i*num_out_channels:(i+1)*num_out_channels]
+                layer = torch.nn.utils.weight_norm(layer, name='weight')
+                WN.cond_layers.append(layer)
+    return model
+
+
+
+def load_waveglow(filename, waveglow_config):
+    class RenamingUnpickler(pickle.Unpickler):
+        def find_class(self, module, name):
+            if module == 'glow':
+                module = 'waveglow.model'
+            return super().find_class(module, name)
+
+    class RenamingPickleModule:
+        def load(self, f, *args, **kw_args):
+            return self.Unpickler(f, *args, **kw_args).load()
+
+        def Unpickler(self, f, **pickle_load_args):
+            return RenamingUnpickler(f, **pickle_load_args)
+
+    pickle_module = RenamingPickleModule()
+    blob = torch.load(filename, pickle_module=pickle_module)
+
+    if 'state_dict' in blob:
+        waveglow = WaveGlow(**waveglow_config).cuda()
+        state_dict = {}
+        for key, value in blob["state_dict"].items():
+            newKey = key
+            if key.startswith("module."):
+                newKey = key[len("module."):]
+            state_dict[newKey] = value
+        waveglow.load_state_dict(state_dict)
+    else:
+        waveglow = blob['model']
+
+    waveglow = split_cond_layers(waveglow)
+    waveglow = waveglow.remove_weightnorm(waveglow)
+    waveglow.cuda().eval()
+
+    return waveglow
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/tacotron2_to_json.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/tacotron2_to_json.py
@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+import json
+import torch
+import sys
+
+if len(sys.argv) != 3:
+    print("Must specify statedict to load and json to write")
+    sys.exit(1)
+
+statedict_path = sys.argv[1]
+json_path = sys.argv[2]
+
+print("Reading from '%s' and writing to '%s'." % (statedict_path, json_path))
+
+statedict = dict(torch.load(statedict_path)["state_dict"])
+
+outdict = {}
+for k, v in dict(statedict).items():
+    if k.startswith("module."):
+        k = k[len("module."):]
+    print(k)
+
+    outdict[k] = v.cpu().numpy().tolist()
+
+with open(json_path, "w") as fout:
+    json.dump(outdict, fout)
+
+print("Wrote to '%s'" % json_path)
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/waveglow_to_onnx.py
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/scripts/waveglow_to_onnx.py
@ -0,0 +1,329 @@
+#!/usr/bin/env python3
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     # Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     # Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     # Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# 
+
+
+
+import json
+import sys
+import onnx
+import numpy as np
+from scipy.io.wavfile import write
+import argparse
+import torch
+
+args = None
+
+
+def convert_conv_1d_to_2d(conv1d):
+    conv2d = torch.nn.Conv2d(conv1d.weight.size(1),
+                             conv1d.weight.size(0),
+                             (conv1d.weight.size(2), 1),
+                             stride=(conv1d.stride[0], 1),
+                             dilation=(conv1d.dilation[0], 1),
+                             padding=(conv1d.padding[0], 0))
+    conv2d.weight.data[:, :, :, 0] = conv1d.weight.data
+    conv2d.bias.data = conv1d.bias.data
+    return conv2d
+
+
+def convert_WN_1d_to_2d_(WN):
+    """
+    Modifies the WaveNet like affine coupling layer in-place to use 2-d convolutions
+    """
+    WN.start = convert_conv_1d_to_2d(WN.start)
+    WN.end = convert_conv_1d_to_2d(WN.end)
+
+    for i in range(len(WN.in_layers)):
+        WN.in_layers[i] = convert_conv_1d_to_2d(WN.in_layers[i])
+
+    for i in range(len(WN.res_skip_layers)):
+        WN.res_skip_layers[i] = convert_conv_1d_to_2d(WN.res_skip_layers[i])
+
+    for i in range(len(WN.res_skip_layers)):
+        WN.cond_layers[i] = convert_conv_1d_to_2d(WN.cond_layers[i])
+
+
+def convert_convinv_1d_to_2d(convinv):
+    """
+    Takes an invertible 1x1 1-d convolution and returns a 2-d convolution that does
+    the inverse
+    """
+    conv2d = torch.nn.Conv2d(convinv.W_inverse.size(1),
+                             convinv.W_inverse.size(0),
+                             1, bias=False)
+    conv2d.weight.data[:, :, :, 0] = convinv.W_inverse.data
+    return conv2d
+
+
+def convert_1d_to_2d_(glow):
+    """
+    Caffe2 and TensorRT don't seem to support 1-d convolutions or properly 
+    convert ONNX exports with 1d convolutions to 2d convolutions yet, so we 
+    do the conversion to 2-d convolutions before ONNX export
+    """
+    # Convert upsample to 2d
+    upsample = torch.nn.ConvTranspose2d(glow.upsample.weight.size(0),
+                                        glow.upsample.weight.size(1),
+                                        (glow.upsample.weight.size(2), 1),
+                                        stride=(glow.upsample.stride[0], 1))
+    upsample.weight.data[:, :, :, 0] = glow.upsample.weight.data
+    upsample.bias.data = glow.upsample.bias.data
+    glow.upsample = upsample
+
+    # Convert WN to 2d
+    for WN in glow.WN:
+        convert_WN_1d_to_2d_(WN)
+
+    # Convert invertible conv to 2d
+    for i in range(len(glow.convinv)):
+        glow.convinv[i] = convert_convinv_1d_to_2d(glow.convinv[i])
+
+
+def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
+    in_act = input_a+input_b
+    in_left = in_act[:, 0:n_channels, :, :]
+    in_right = in_act[:, n_channels:2*n_channels, :, :]
+    t_act = torch.tanh(in_left)
+    s_act = torch.sigmoid(in_right)
+    acts = t_act * s_act
+    return acts
+
+
+def WN_forward(self, forward_input):
+    """
+    This is a forward replacement for the WN forward.  This is required because
+    the code was written for 1d convs which isn't yet supported from ONNX
+    exports.
+    """
+    audio, spect = forward_input
+    audio = self.start(audio)
+
+    for i in range(self.n_layers):
+        acts = fused_add_tanh_sigmoid_multiply(
+            self.in_layers[i](audio),
+            self.cond_layers[i](spect),
+            self.n_channels)
+
+        res_skip_acts = self.res_skip_layers[i](acts)
+        if i < self.n_layers - 1:
+            audio = res_skip_acts[:, 0:self.n_channels, :, :] + audio
+            skip_acts = res_skip_acts[:,
+                                      self.n_channels:2*self.n_channels, :, :]
+        else:
+            skip_acts = res_skip_acts
+
+        if i == 0:
+            output = skip_acts
+        else:
+            output = skip_acts + output
+    return self.end(output)
+
+
+def infer_o(self, spect, z):
+    """
+    In order to for the trace to work running through ONNX with 2d convolutions
+    we need to overwrite the forward method.  All shape information is
+    pre-calculated so ONNX doesn't export "Dynamic" outputs which are not yet
+    suported by TensorRT
+    """
+
+    spect = self.upsample(spect)
+    spect = torch.squeeze(spect, 3)
+    spect = spect.view(self.view_size_1)
+    spect = spect.permute(0, 2, 1, 3)
+    spect = spect.contiguous()
+    spect = spect.view(self.view_size_2)
+    spect = spect.permute(0, 2, 1)
+    spect = spect.reshape([
+        self.batch_size,
+        self.upsample_weight_size*self.n_group,
+        self.length_spect_group,
+        1])
+
+    audio = z[:, 0:self.n_remaining_channels, :, :]
+    z = z[:, self.n_remaining_channels:self.n_group, :, :]
+
+    for k in reversed(range(self.n_flows)):
+        n_half = self.n_halves[k]
+        audio_0 = audio[:, 0:n_half, :, :]
+        audio_1 = audio[:, n_half:2*n_half, :, :]
+
+        output = self.WN[k]((audio_0, spect))
+        s = output[:, n_half:2*n_half, :, :]
+        b = output[:, 0:n_half, :, :]
+        audio_1 = (audio_1 - b)/torch.exp(s)
+        audio = torch.cat([audio_0, audio_1], 1)
+
+        audio = self.convinv[k](audio)
+
+        if k % self.n_early_every == 0 and k > 0:
+            audio = torch.cat((z[:, 0:self.n_early_size, :, :], audio), 1)
+            z = z[:, self.n_early_size:self.n_group -
+                  self.n_remaining_channels, :, :]
+
+    audio = torch.squeeze(audio, 3)
+    audio = audio.permute(0, 2, 1).contiguous().view(
+        1, (self.length_spect_group * self.n_group))
+    return audio
+
+
+def main(waveglow_path, output_path, batch_size, length_mels):
+    """
+    Takes a waveglow model, a batch size, and a length in mels about outputs a static
+    ONNX representation using 2D convoultions
+    """
+    torch.manual_seed(0)
+
+    model = load_waveglow(waveglow_path, waveglow_config)
+    model.batch_size = batch_size
+
+    length_spect = length_mels
+    length_samples = 768 + 256*length_spect
+
+    model.upsample_weight_size = model.upsample.weight.size(0)
+
+    spect = torch.cuda.FloatTensor(
+        batch_size, model.upsample_weight_size, length_spect).normal_()
+    spect = torch.autograd.Variable(spect.cuda(), requires_grad=False)
+
+    # Run inference because it forces inverses to be calculated
+    with torch.no_grad():
+        test_out1 = model.infer(spect)
+    assert(length_samples % model.n_group == 0)
+
+    model.length_spect_group = int(length_samples / model.n_group)
+
+    # Pre-calculating the sizes of noise to use so it's not dynamic
+    n_halves = []
+    n_half = int(model.n_remaining_channels/2)
+    for k in reversed(range(model.n_flows)):
+        n_halves.append(n_half)
+
+        if k % model.n_early_every == 0 and k > 0:
+            n_half = n_half + int(model.n_early_size/2)
+    n_halves.reverse()
+    model.n_halves = n_halves
+
+    model.view_size_1 = torch.Size(
+        [model.batch_size, model.upsample_weight_size,  model.length_spect_group, model.n_group])
+    model.view_size_2 = torch.Size(
+        [model.batch_size, model.length_spect_group, model.upsample_weight_size*model.n_group])
+
+    # Replace old forward with inference
+    glow.WaveGlow.forward = infer_o
+    glow.WN.forward = WN_forward
+
+    # Convert whole model to 2d convolutions
+    convert_1d_to_2d_(model)
+    model.cuda()
+
+    spect = torch.cuda.FloatTensor(
+        batch_size, model.upsample.weight.size(0), length_spect, 1).normal_()
+    z = torch.cuda.FloatTensor(
+        1, model.n_group, model.length_spect_group, 1).normal_()
+    spect = torch.autograd.Variable(spect.cuda(), requires_grad=False)
+    z = torch.autograd.Variable(z, requires_grad=False)
+
+    # Get output for comparison with Caffe2
+    with torch.no_grad():
+        test_out2 = model(spect, z)
+
+    # Export model
+    torch.onnx.export(model, (spect, z), 
+            output_path,
+            dynamic_axes={'spect': [0], 'z': [0]},
+            input_names=['spect', 'z'],
+            output_names=['audio'],
+            opset_version=10,
+            verbose=True)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-w', '--waveglow_path',
+                        help='Path to waveglow decoder checkpoint with model',
+                        required=True)
+    parser.add_argument('-W', '--tacotron2_home', help='Path to DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2 directory.',
+                        required=True)
+    parser.add_argument('-o', "--onnx_path",
+                        help="Path to output ONNX file", required=True)
+    parser.add_argument("--batch_size", default=1, type=int)
+    parser.add_argument("--length_mels", default=160, type=int)
+
+    # add wave glow arguments
+    waveglow = parser.add_argument_group("WaveGlow parameters")
+    waveglow.add_argument('--n-mel-channels', default=80, type=int,
+                          help='Number of bins in mel-spectrograms')
+
+    # glow parameters
+    waveglow.add_argument('--flows', default=12, type=int,
+                          help='Number of steps of flow')
+    waveglow.add_argument('--groups', default=8, type=int,
+                          help='Number of samples in a group processed by the steps of flow')
+    waveglow.add_argument('--early-every', default=4, type=int,
+                          help='Determines how often (i.e., after how many coupling layers) \
+                        a number of channels (defined by --early-size parameter) are output\
+                        to the loss function')
+    waveglow.add_argument('--early-size', default=2, type=int,
+                          help='Number of channels output to the loss function')
+    waveglow.add_argument('--sigma', default=1.0, type=float,
+                          help='Standard deviation used for sampling from Gaussian')
+    waveglow.add_argument('--segment-length', default=4000, type=int,
+                          help='Segment length (audio samples) processed per iteration')
+
+    # wavenet parameters
+    wavenet = waveglow.add_argument_group('WaveNet parameters')
+    wavenet.add_argument('--wn-kernel-size', default=3, type=int,
+                         help='Kernel size for dialted convolution in the affine coupling layer (WN)')
+    wavenet.add_argument('--wn-channels', default=256, type=int,
+                         help='Number of channels in WN')
+    wavenet.add_argument('--wn-layers', default=8, type=int,
+                         help='Number of layers in WN')
+
+    args = parser.parse_args()
+
+    # do imports as needed
+    sys.path.append(args.tacotron2_home)
+
+    import waveglow.model as glow
+    from import_utils import load_waveglow
+
+    global waveglow_config
+    waveglow_config = {
+        "n_mel_channels": args.n_mel_channels,
+        "n_flows": args.flows,
+        "n_group": args.groups,
+        "n_early_every": args.early_every,
+        "n_early_size": args.early_size,
+        "WN_config": {
+            "n_layers": args.wn_layers,
+            "kernel_size": args.wn_kernel_size,
+            "n_channels": args.wn_channels
+        }
+    }
+
+    main(args.waveglow_path, args.onnx_path, args.batch_size, args.length_mels)
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/CMakeLists.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/CMakeLists.txt
@ -0,0 +1,43 @@
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+# include headers in current directory
+include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
+
+
+# sub-pieces 
+add_subdirectory("trt")
+add_subdirectory("bin")
+
+# build trtis
+if (DEFINED BUILD_TRTIS AND NOT BUILD_TRTIS EQUAL 0)
+  message("Building TRTIS backend")
+  add_subdirectory("trtis")
+endif()
+
+# build tests
+add_subdirectory("test")
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/CMakeLists.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/CMakeLists.txt
@ -0,0 +1,48 @@
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+function(add_binary bin_file)
+  get_filename_component(bin_name "${bin_file}" NAME_WE)
+  add_executable(${bin_name} ${bin_file})
+  target_link_libraries(${bin_name} tt2i)
+  target_include_directories(${bin_name} PRIVATE
+      ../trt/
+      ../trt/util
+      ../trt/tacotron2
+      ../trt/waveglow
+      ../trt/denoiser
+      ../trt/common
+  )
+  set_property(TARGET ${bin_name} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
+endfunction()
+
+# build benchmark executable
+file(GLOB binaries *.cpp)
+
+foreach (file ${binaries})
+  add_binary(${file})
+endforeach()
+
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/build_denoiser.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/build_denoiser.cpp
@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cudaUtils.h"
+#include "denoiserBuilder.h"
+#include "engineCache.h"
+#include "jsonModelImporter.h"
+#include "logging.h"
+
+#include "NvInfer.h"
+
+#include <iostream>
+#include <memory>
+
+using namespace nvinfer1;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+bool matches(const std::string& arg, const std::string& flag)
+{
+  return arg.length() >= flag.length() && arg.substr(0, flag.length()) == flag;
+}
+
+int parseNumFlag(
+    const int argc, const char** argv, const std::string& flag, int* i)
+{
+  int value;
+  const std::string arg(argv[*i]);
+  if (arg.length() > flag.length()) {
+    value = std::stol(arg.substr(flag.length()));
+  } else if (*i + 1 < argc) {
+    ++(*i);
+    value = std::stol(argv[*i]);
+  } else {
+    throw std::runtime_error("Missing argument for '" + flag + "'.");
+  }
+  return value;
+}
+
+int parseAmpFlag(
+    const int argc, const char** argv, const std::string& flag, int* i)
+{
+  std::string str;
+  const std::string arg(argv[*i]);
+  if (arg.length() > flag.length()) {
+    str = arg.substr(flag.length());
+  } else if (*i + 1 < argc) {
+    ++(*i);
+    str = argv[*i];
+  } else {
+    throw std::runtime_error("Missing argument for '" + flag + "'.");
+  }
+
+  int value;
+  if (str == "fp32") {
+    value = 0;
+  } else if (str == "amp") {
+    value = 1;
+  } else {
+    throw std::runtime_error(
+        "Invalid argument for precision (amp|fp32): " + str);
+  }
+
+  return value;
+}
+
+void usage(const std::string& binName)
+{
+  std::cerr << "usage: " << std::endl;
+  std::cerr << "    " << binName << " <model file> <engine file> [options]\n";
+  std::cerr << "options:" << std::endl;
+  std::cerr << "  -B<batch size>" << std::endl;
+  std::cerr << "  -F<precision (fp32|amp)>" << std::endl;
+  std::cerr << "  -h" << std::endl;
+}
+
+void parseArgs(
+    const int argc,
+    const char** const argv,
+    std::string* model,
+    std::string* enginePath,
+    int* batchSize,
+    int* useAMP)
+{
+  bool modelSet = false;
+  bool enginePathSet = false;
+
+  for (int i = 1; i < argc; ++i) {
+    const std::string arg(argv[i]);
+    if (matches(arg, "-B")) {
+      *batchSize = parseNumFlag(argc, argv, "-B", &i);
+    } else if (matches(arg, "-F")) {
+      *useAMP = parseAmpFlag(argc, argv, "-F", &i);
+    } else if (matches(arg, "-h")) {
+      usage(argv[0]);
+      exit(0);
+    } else {
+      if (!modelSet) {
+        *model = arg;
+        modelSet = true;
+      } else if (!enginePathSet) {
+        *enginePath = arg;
+        enginePathSet = true;
+      } else {
+        throw std::runtime_error("Unknown extra argument '" + arg + "'.");
+      }
+    }
+  }
+}
+
+/******************************************************************************
+ * MAIN ***********************************************************************
+ *****************************************************************************/
+
+int main(int argc, const char* argv[])
+{
+  std::string denoiserModelPath;
+  std::string enginePath;
+
+  int batchSize = 1;
+  int useFP16 = true;
+
+  parseArgs(argc, argv, &denoiserModelPath, &enginePath, &batchSize, &useFP16);
+
+  if (denoiserModelPath.empty() || enginePath.empty()) {
+    usage(argv[0]);
+    return 1;
+  }
+
+  CudaUtils::printDeviceInformation();
+
+  try {
+    std::shared_ptr<Logger> logger(new Logger(ILogger::Severity::kERROR));
+
+    TRTPtr<IBuilder> builder(createInferBuilder(*logger));
+    builder->setMaxBatchSize(batchSize);
+
+    TRTPtr<IBuilderConfig> config(builder->createBuilderConfig());
+    config->setMaxWorkspaceSize(1ULL << 30);
+
+    uint32_t flags = 0;
+    if (useFP16) {
+      flags |= (1U << static_cast<int>(BuilderFlag::kFP16));
+    }
+    config->setFlags(flags);
+
+    EngineCache cache(logger);
+
+    JSONModelImporter importer(denoiserModelPath);
+    const int denoiserWindowSize = 2 << 13;
+    DenoiserBuilder denoiserBuilder(denoiserWindowSize);
+
+    const TRTPtr<ICudaEngine> engine
+        = denoiserBuilder.build(importer, *builder, batchSize, useFP16);
+
+    cache.save(*engine, enginePath);
+  } catch (const std::exception& e) {
+    std::cerr << "Exception: " << e.what() << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/build_tacotron2.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/build_tacotron2.cpp
@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cudaUtils.h"
+#include "engineCache.h"
+#include "logging.h"
+#include "tacotron2Builder.h"
+
+#include "NvInfer.h"
+
+#include <iostream>
+#include <memory>
+#include <vector>
+
+using namespace nvinfer1;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+bool matches(const std::string& arg, const std::string& flag)
+{
+  return arg.length() >= flag.length() && arg.substr(0, flag.length()) == flag;
+}
+
+int parseNumFlag(
+    const int argc, const char** argv, const std::string& flag, int* i)
+{
+  int value;
+  const std::string arg(argv[*i]);
+  if (arg.length() > flag.length()) {
+    value = std::stol(arg.substr(flag.length()));
+  } else if (*i + 1 < argc) {
+    ++(*i);
+    value = std::stol(argv[*i]);
+  } else {
+    throw std::runtime_error("Missing argument for '" + flag + "'.");
+  }
+  return value;
+}
+
+int parseAmpFlag(
+    const int argc, const char** argv, const std::string& flag, int* i)
+{
+  std::string str;
+  const std::string arg(argv[*i]);
+  if (arg.length() > flag.length()) {
+    str = arg.substr(flag.length());
+  } else if (*i + 1 < argc) {
+    ++(*i);
+    str = argv[*i];
+  } else {
+    throw std::runtime_error("Missing argument for '" + flag + "'.");
+  }
+
+  int value;
+  if (str == "fp32") {
+    value = 0;
+  } else if (str == "amp") {
+    value = 1;
+  } else {
+    throw std::runtime_error(
+        "Invalid argument for precision (amp|fp32): " + str);
+  }
+
+  return value;
+}
+
+void usage(const std::string& binName)
+{
+  std::cerr << "usage: " << std::endl;
+  std::cerr << "    " << binName << " <model file> <engine file> [options]\n";
+  std::cerr << "options:" << std::endl;
+  std::cerr << "  -I<max input length>" << std::endl;
+  std::cerr << "  -B<batch size>" << std::endl;
+  std::cerr << "  -F<precision (fp32|amp)>" << std::endl;
+  std::cerr << "  -h" << std::endl;
+}
+
+void parseArgs(
+    const int argc,
+    const char** const argv,
+    std::string* model,
+    std::string* enginePath,
+    int* batchSize,
+    int* inputLength,
+    int* useAMP)
+{
+  bool modelSet = false;
+  bool enginePathSet = false;
+
+  for (int i = 1; i < argc; ++i) {
+    const std::string arg(argv[i]);
+    if (matches(arg, "-I")) {
+      *inputLength = parseNumFlag(argc, argv, "-I", &i);
+    } else if (matches(arg, "-B")) {
+      *batchSize = parseNumFlag(argc, argv, "-B", &i);
+    } else if (matches(arg, "-F")) {
+      *useAMP = parseAmpFlag(argc, argv, "-F", &i);
+    } else if (matches(arg, "-h")) {
+      usage(argv[0]);
+      exit(0);
+    } else {
+      if (!modelSet) {
+        *model = arg;
+        modelSet = true;
+      } else if (!enginePathSet) {
+        *enginePath = arg;
+        enginePathSet = true;
+      } else {
+        throw std::runtime_error("Unknown extra argument '" + arg + "'.");
+      }
+    }
+  }
+}
+
+/******************************************************************************
+ * MAIN ***********************************************************************
+ *****************************************************************************/
+
+int main(int argc, const char* argv[])
+{
+  std::string tacotron2ModelPath;
+  std::string enginePath;
+
+  int batchSize = 1;
+  int inputLength = 400;
+  int useFP16 = true;
+
+  parseArgs(
+      argc,
+      argv,
+      &tacotron2ModelPath,
+      &enginePath,
+      &batchSize,
+      &inputLength,
+      &useFP16);
+
+  CudaUtils::printDeviceInformation();
+
+  try {
+    std::shared_ptr<Logger> logger(new Logger(ILogger::Severity::kERROR));
+
+    TRTPtr<IBuilder> builder(createInferBuilder(*logger));
+
+    EngineCache cache(logger);
+
+    Tacotron2Builder tacotron2Builder(tacotron2ModelPath);
+    const std::vector<TRTPtr<ICudaEngine>> engines
+        = tacotron2Builder.build(inputLength, *builder, batchSize, useFP16);
+
+    cache.save(engines, enginePath);
+  } catch (const std::exception& e) {
+    std::cerr << "Exception: " << e.what() << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/build_waveglow.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/bin/build_waveglow.cpp
@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "cudaUtils.h"
+#include "engineCache.h"
+#include "logging.h"
+#include "waveGlowBuilder.h"
+
+#include "NvInfer.h"
+
+#include <iostream>
+#include <memory>
+
+using namespace nvinfer1;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+bool matches(const std::string& arg, const std::string& flag)
+{
+  return arg.length() >= flag.length() && arg.substr(0, flag.length()) == flag;
+}
+
+int parseNumFlag(
+    const int argc, const char** argv, const std::string& flag, int* i)
+{
+  int value;
+  const std::string arg(argv[*i]);
+  if (arg.length() > flag.length()) {
+    value = std::stol(arg.substr(flag.length()));
+  } else if (*i + 1 < argc) {
+    ++(*i);
+    value = std::stol(argv[*i]);
+  } else {
+    throw std::runtime_error("Missing argument for '" + flag + "'.");
+  }
+  return value;
+}
+
+int parseAmpFlag(
+    const int argc, const char** argv, const std::string& flag, int* i)
+{
+  std::string str;
+  const std::string arg(argv[*i]);
+  if (arg.length() > flag.length()) {
+    str = arg.substr(flag.length());
+  } else if (*i + 1 < argc) {
+    ++(*i);
+    str = argv[*i];
+  } else {
+    throw std::runtime_error("Missing argument for '" + flag + "'.");
+  }
+
+  int value;
+  if (str == "fp32") {
+    value = 0;
+  } else if (str == "amp") {
+    value = 1;
+  } else {
+    throw std::runtime_error(
+        "Invalid argument for precision (amp|fp32): " + str);
+  }
+
+  return value;
+}
+
+void usage(const std::string& binName)
+{
+  std::cerr << "usage: " << std::endl;
+  std::cerr << "    " << binName << " <model file> <engine file> [options]\n";
+  std::cerr << "options:" << std::endl;
+  std::cerr << "  -B<batch size>" << std::endl;
+  std::cerr << "  -F<precision (fp32|amp)>" << std::endl;
+  std::cerr << "  -h" << std::endl;
+}
+
+void parseArgs(
+    const int argc,
+    const char** const argv,
+    std::string* model,
+    std::string* enginePath,
+    int* batchSize,
+    int* useAMP)
+{
+  bool modelSet = false;
+  bool enginePathSet = false;
+
+  for (int i = 1; i < argc; ++i) {
+    const std::string arg(argv[i]);
+    if (matches(arg, "-B")) {
+      *batchSize = parseNumFlag(argc, argv, "-B", &i);
+    } else if (matches(arg, "-F")) {
+      *useAMP = parseAmpFlag(argc, argv, "-F", &i);
+    } else if (matches(arg, "-h")) {
+      usage(argv[0]);
+      exit(0);
+    } else {
+      if (!modelSet) {
+        *model = arg;
+        modelSet = true;
+      } else if (!enginePathSet) {
+        *enginePath = arg;
+        enginePathSet = true;
+      } else {
+        throw std::runtime_error("Unknown extra argument '" + arg + "'.");
+      }
+    }
+  }
+}
+
+/******************************************************************************
+ * MAIN ***********************************************************************
+ *****************************************************************************/
+
+int main(int argc, const char* argv[])
+{
+  std::string waveglowModelPath;
+  std::string enginePath;
+
+  int batchSize = 1;
+  int useFP16 = true;
+
+  parseArgs(argc, argv, &waveglowModelPath, &enginePath, &batchSize, &useFP16);
+  if (waveglowModelPath.empty() || enginePath.empty()) {
+    usage(argv[0]);
+    return 1;
+  }
+
+  CudaUtils::printDeviceInformation();
+
+  try {
+    std::shared_ptr<Logger> logger(new Logger(ILogger::Severity::kERROR));
+
+    TRTPtr<IBuilder> builder(createInferBuilder(*logger));
+
+    EngineCache cache(logger);
+
+    WaveGlowBuilder waveglowBuilder(waveglowModelPath, logger);
+    const TRTPtr<ICudaEngine> wgEng
+        = waveglowBuilder.build(*builder, batchSize, useFP16);
+
+    cache.save(*wgEng, enginePath);
+  } catch (const std::exception& e) {
+    std::cerr << "Exception: " << e.what() << std::endl;
+    return 1;
+  }
+
+  return 0;
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Blending_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Blending_test.cpp
@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "blending.h"
+#include "cudaMemory.h"
+
+#include <vector>
+
+using namespace tts;
+
+/******************************************************************************
+ * UNIT TEST ******************************************************************
+ *****************************************************************************/
+
+TEST(noOverlapNoOffsetBatchSize1)
+{
+  const int chunkSize = 4000;
+  const int batchSize = 1;
+
+  std::vector<float> samplesHost(chunkSize * batchSize);
+  for (size_t i = 0; i < samplesHost.size(); ++i) {
+    samplesHost[i] = static_cast<float>(i % 1001) / 1000.0f;
+  }
+  CudaMemory<float> samplesDevice(samplesHost);
+  CudaMemory<float> outDevice(samplesHost.size());
+
+  Blending::linear(
+      batchSize,
+      samplesDevice.data(),
+      outDevice.data(),
+      chunkSize,
+      0,
+      chunkSize,
+      0,
+      0);
+
+  const std::vector<float> outHost = outDevice.toHost();
+  for (size_t i = 0; i < samplesHost.size(); ++i) {
+    EXPECT_NEAR(samplesHost[i], outHost[i], 1e-6f);
+  }
+}
+
+TEST(noOverlapNoOffsetBatchSize4)
+{
+  const int chunkSize = 4000;
+  const int batchSize = 4;
+
+  std::vector<float> samplesHost(chunkSize * batchSize);
+  for (size_t i = 0; i < samplesHost.size(); ++i) {
+    samplesHost[i] = static_cast<float>(i % 1001) / 1000.0f;
+  }
+  CudaMemory<float> samplesDevice(samplesHost);
+  CudaMemory<float> outDevice(samplesHost.size());
+
+  Blending::linear(
+      batchSize,
+      samplesDevice.data(),
+      outDevice.data(),
+      chunkSize,
+      0,
+      chunkSize,
+      0,
+      0);
+
+  const std::vector<float> outHost = outDevice.toHost();
+  for (size_t i = 0; i < samplesHost.size(); ++i) {
+    EXPECT_NEAR(samplesHost[i], outHost[i], 1e-6f);
+  }
+}
+
+TEST(noOverlapOneOffsetBatchSize4)
+{
+  const int chunkSize = 4000;
+  const int batchSize = 4;
+
+  std::vector<float> samplesHost(chunkSize * batchSize);
+  for (size_t i = 0; i < samplesHost.size(); ++i) {
+    samplesHost[i] = static_cast<float>(i % 1001) / 1000.0f;
+  }
+  CudaMemory<float> samplesDevice(samplesHost);
+  CudaMemory<float> outDevice(samplesHost.size() * 2);
+  outDevice.zero();
+
+  Blending::linear(
+      batchSize,
+      samplesDevice.data(),
+      outDevice.data(),
+      chunkSize,
+      0,
+      2 * chunkSize,
+      chunkSize,
+      0);
+
+  const std::vector<float> outHost = outDevice.toHost();
+  for (int b = 0; b < batchSize; ++b) {
+    for (int i = 0; i < chunkSize; ++i) {
+      const int j = b * (chunkSize * 2) + i;
+      EXPECT_EQ(0.0f, outHost[j]) << "i = " << i;
+    }
+    for (int i = chunkSize; i < chunkSize * 2; ++i) {
+      const int j = b * (chunkSize * 2) + i;
+      const int k = b * chunkSize + (i - chunkSize);
+      EXPECT_NEAR(samplesHost[k], outHost[j], 1e-6f) << "i = " << i;
+    }
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/CMakeLists.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/CMakeLists.txt
@ -0,0 +1,57 @@
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+function(add_unit_test test_file)
+  get_filename_component(test_name "${test_file}" NAME_WE)
+  add_executable(${test_name} ${test_file} UnitTest.cpp)
+  target_link_libraries(${test_name} tt2i)
+  add_test(NAME ${test_name} 
+           COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${test_name}"
+           WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}")
+endfunction()
+
+include_directories(
+    ../extra
+    ../trt/plugins/taco2AttentionPlugin/
+    ../trt/plugins/taco2DenoiseTransformPlugin/
+    ../trt/plugins/taco2LSTMCellPlugin/
+    ../trt/plugins/taco2ModulationRemovalPlugin/
+    ../trt/plugins/taco2PrenetPlugin/
+    ../trt/plugins/taco2ProjectionPlugin/
+    ../trt/plugins/common/
+    ../trt/
+    ../trt/util
+    ../trt/tacotron2
+    ../trt/waveglow
+    ../trt/denoiser
+    ../trt/common
+)
+
+file(GLOB tests *_test.cpp)
+
+foreach (file ${tests})
+  add_unit_test(${file})
+endforeach()
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/CharacterMappingReader_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/CharacterMappingReader_test.cpp
@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "CharacterMappingReader.hpp"
+#include "UnitTest.hpp"
+#include "characterMapping.h"
+
+#include <fstream>
+
+using namespace tts;
+
+/******************************************************************************
+ * UNIT TEST ******************************************************************
+ *****************************************************************************/
+
+TEST(LoadFromFile)
+{
+  {
+    std::ofstream fout("test.txt");
+    fout << "# ignored header" << std::endl;
+    fout << "0 long" << std::endl;
+    fout << "1 s" << std::endl;
+    fout << "2  " << std::endl;
+    fout << "3    " << std::endl;
+  }
+
+  CharacterMapping mapping = CharacterMappingReader::loadFromFile("test.txt");
+
+  EXPECT_EQ(mapping.get("long"), 0);
+  EXPECT_EQ(mapping.get("s"), 1);
+  EXPECT_EQ(mapping.get(" "), 2);
+  EXPECT_EQ(mapping.get("   "), 3);
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/CharacterMapping_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/CharacterMapping_test.cpp
@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "characterMapping.h"
+
+using namespace tts;
+
+/******************************************************************************
+ * UNIT TEST ******************************************************************
+ *****************************************************************************/
+
+TEST(MapAsciiTest)
+{
+  const std::string text(
+    "printing, in the only sense with which we are at present concerned, differs "
+    "from most if not from all the arts and crafts represented in the exhibition in "
+    "being comparatively modern.");
+
+  CharacterMapping cm = CharacterMapping::defaultMapping();
+
+  const std::vector<int32_t> sequence = cm.map(text);
+
+  const std::vector<int32_t> expSequence{
+53, 55, 46, 51, 57, 46, 51, 44, 6 , 11, 46, 51, 11, 57, 45, 42, 11, 52, 51, 49,
+62, 11, 56, 42, 51, 56, 42, 11, 60, 46, 57, 45, 11, 60, 45, 46, 40, 45, 11, 60,
+42, 11, 38, 55, 42, 11, 38, 57, 11, 53, 55, 42, 56, 42, 51, 57, 11, 40, 52, 51,
+40, 42, 55, 51, 42, 41, 6, 11, 41, 46, 43, 43, 42, 55, 56, 11, 43, 55, 52, 50,
+11, 50, 52, 56, 57, 11, 46, 43, 11, 51, 52, 57, 11, 43, 55, 52, 50, 11, 38, 49,
+49, 11, 57, 45, 42, 11, 38, 55, 57, 56, 11, 38, 51, 41, 11, 40, 55, 38, 43, 57,
+56, 11, 55, 42, 53, 55, 42, 56, 42, 51, 57, 42, 41, 11, 46, 51, 11, 57, 45, 42,
+11, 42, 61, 45, 46, 39, 46, 57, 46, 52, 51, 11, 46, 51, 11, 39, 42, 46, 51, 44,
+11, 40, 52, 50, 53, 38, 55, 38, 57, 46, 59, 42, 49, 62, 11, 50, 52, 41, 42, 55,
+51, 7 };
+
+  ASSERT_EQ(sequence.size(), expSequence.size());
+  for (size_t i = 0; i < expSequence.size(); ++i) {
+    EXPECT_EQ(expSequence[i], sequence[i]);
+  }
+}
+
+TEST(MapArpabetTest)
+{
+  const std::string text("Hello {@AE0}ther {@UW}{@AO}rld.");
+
+  CharacterMapping cm = CharacterMapping::defaultMapping();
+
+  const std::vector<int32_t> sequence = cm.map(text);
+
+  const std::vector<int32_t> expSequence{
+      45, 42, 49, 49, 52, 11, 69, 57, 45, 42, 55, 11, 139, 76, 55, 49, 41, 7};
+
+  ASSERT_EQ(sequence.size(), expSequence.size());
+  for (size_t i = 0; i < expSequence.size(); ++i) {
+    EXPECT_EQ(expSequence[i], sequence[i]);
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/DataShuffler_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/DataShuffler_test.cpp
@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "cudaMemory.h"
+#include "dataShuffler.h"
+
+#include <vector>
+
+using namespace tts;
+
+/******************************************************************************
+ * UNIT TEST ******************************************************************
+ *****************************************************************************/
+
+TEST(parseDecoderOutput)
+{
+  const int chunkSize = 89;
+  const int batchSize = 3;
+  const int numChannels = 80;
+  const int rows = chunkSize;
+  const int cols = (numChannels + 1) * batchSize;
+  std::vector<float> mat(rows * cols);
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      mat[i * cols + j] = static_cast<float>(i * cols + j);
+      if ((j % (numChannels + 1)) == numChannels) {
+        // gate
+        mat[i * cols + j] *= -1.0f;
+      }
+    }
+  }
+
+  CudaMemory<float> matInDev(mat);
+  CudaMemory<float> matOutDev(chunkSize * numChannels * batchSize);
+  CudaMemory<float> gateOutDev(chunkSize * batchSize);
+
+  DataShuffler::parseDecoderOutput(
+      matInDev.data(),
+      matOutDev.data(),
+      gateOutDev.data(),
+      batchSize,
+      chunkSize,
+      numChannels,
+      0);
+
+  const std::vector<float> act = matOutDev.toHost();
+
+  for (int i = 0; i < numChannels * batchSize; ++i) {
+    for (int j = 0; j < chunkSize; ++j) {
+      EXPECT_EQ(
+          act[i * chunkSize + j],
+          static_cast<float>(j * cols + (i + (i / numChannels))))
+          << "i = " << i << " j = " << j;
+    }
+  }
+
+  const std::vector<float> actGate = gateOutDev.toHost();
+  for (int i = 0; i < batchSize; ++i) {
+    for (int j = 0; j < chunkSize; ++j) {
+      EXPECT_EQ(
+          actGate[i * chunkSize + j],
+          -static_cast<float>(
+              ((i + 1) * numChannels + i)
+              + (j * (numChannels + 1) * batchSize)))
+          << "i = " << i << " j = " << j;
+    }
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/JSONModelImporter_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/JSONModelImporter_test.cpp
@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "jsonModelImporter.h"
+
+#include <fstream>
+
+using namespace tts;
+
+/******************************************************************************
+ * UNIT TESTS *****************************************************************
+ *****************************************************************************/
+
+TEST(ImportArraysTest)
+{
+  std::ofstream fout("test.json");
+
+  fout << "{" << std::endl;
+  fout << "\"test.layer.weight\" :" << std::endl;
+  fout << "[[[1.0, 3.0, -5.0], [2.0, 1.0, 0.0]]]," << std::endl;
+  fout << "\"test.layer.bias\" :" << std::endl;
+  fout << "[[2.0, -3.0, 1.0]]" << std::endl;
+  fout << "}" << std::endl;
+
+  fout.flush();
+  fout.close();
+
+  JSONModelImporter importer("test.json");
+
+  const LayerData * data = importer.getWeights({"test", "layer"});
+  ASSERT_TRUE(data != nullptr);
+
+  ASSERT_EQ(data->get("weight").count, 6);
+  EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[0], 1.0f);
+  EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[1], 3.0f);
+  EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[2], -5.0f);
+  EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[3], 2.0f);
+  EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[4], 1.0f);
+  EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[5], 0.0f);
+
+  ASSERT_EQ(data->get("bias").count, 3);
+  EXPECT_EQ(static_cast<const float*>(data->get("bias").values)[0], 2.0f);
+  EXPECT_EQ(static_cast<const float*>(data->get("bias").values)[1], -3.0f);
+  EXPECT_EQ(static_cast<const float*>(data->get("bias").values)[2], 1.0f);
+}
+
+TEST(ImportScalarTest)
+{
+  std::ofstream fout("test.json");
+
+  fout << "{" << std::endl;
+  fout << "\"test.layer.some_value\" :" << std::endl;
+  fout << "3" << std::endl;
+  fout << "}" << std::endl;
+
+  fout.flush();
+  fout.close();
+
+  JSONModelImporter importer("test.json");
+
+  const LayerData * data = importer.getWeights({"test", "layer"});
+  ASSERT_TRUE(data != nullptr);
+
+  ASSERT_EQ(data->get("some_value").count, 1);
+  EXPECT_EQ(static_cast<const float*>(data->get("some_value").values)[0], 3.0f);
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2DenoiseTransformLayerPlugin_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2DenoiseTransformLayerPlugin_test.cpp
@ -0,0 +1,229 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "cudaMemory.h"
+#include "taco2DenoiseTransformLayerPlugin.h"
+#include "trtUtils.h"
+
+#include "NvInfer.h"
+
+#include <random>
+#include <vector>
+
+using namespace nvinfer1;
+using namespace nvinfer1::plugin;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+template <typename RNG>
+std::vector<float> genVec(const size_t size, RNG& rng)
+{
+  std::uniform_real_distribution<float> dist(-1.0, 1.0);
+  std::vector<float> vec(size);
+  for (size_t i = 0; i < size; ++i) {
+    vec[i] = dist(rng);
+  }
+
+  return vec;
+}
+
+} // namespace
+
+/******************************************************************************
+ * UNIT TEST ******************************************************************
+ *****************************************************************************/
+
+TEST(CPUCompareTestBatch1)
+{
+  std::mt19937 rng(0);
+
+  const int filterLength = 1026;
+  const int inputLength = 935;
+  const int inputWidth = filterLength / 2;
+
+  std::vector<float> weightsHost = genVec(inputWidth, rng);
+  Taco2DenoiseTransformLayerPlugin layer(
+      TRTUtils::toWeights(weightsHost), filterLength, inputLength);
+
+  std::vector<float> inputHost = genVec(filterLength * inputLength, rng);
+  CudaMemory<float> inputDevice(inputHost);
+
+  std::vector<Dims> inputDims{Dims4(1, 1, filterLength, inputLength)};
+
+  const std::vector<Dims> outputDims{Dims4(1, 1, filterLength, inputLength)};
+
+  const std::vector<DataType> dataTypes{DataType::kFLOAT};
+  const bool broadcast[] = {false};
+
+  layer.configurePlugin(
+      inputDims.data(),
+      static_cast<int>(inputDims.size()),
+      outputDims.data(),
+      static_cast<int>(outputDims.size()),
+      dataTypes.data(),
+      dataTypes.data(),
+      broadcast,
+      broadcast,
+#if NV_TENSORRT_MAJOR < 6
+      PluginFormat::kNCHW,
+#else
+      PluginFormat::kLINEAR,
+#endif
+      1);
+
+  layer.initialize();
+
+  std::vector<const float*> inputs{inputDevice.data()};
+
+  CudaMemory<float> outputDevice(filterLength * inputLength);
+  std::vector<float*> outputs{outputDevice.data()};
+
+  layer.enqueue(
+      1,
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      nullptr,
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+  std::vector<float> expOutput(filterLength * inputLength);
+
+  for (int y = 0; y < inputWidth; ++y) {
+    for (int x = 0; x < inputLength; ++x) {
+      const int idx = y * inputLength + x;
+      const int idx2 = (y + inputWidth) * inputLength + x;
+      const float real = inputHost[idx];
+      const float img = inputHost[idx2];
+      float mag = std::sqrt(real * real + img * img);
+      const float phase = std::atan2(img, real);
+      mag = std::max(0.0f, mag - weightsHost[y]);
+      expOutput[idx] = mag * std::cos(phase);
+      expOutput[idx2] = mag * std::sin(phase);
+    }
+  }
+
+  // match outputs
+  const std::vector<float> actOutput = outputDevice.toHost();
+
+  ASSERT_EQ(expOutput.size(), actOutput.size());
+  for (size_t i = 0; i < expOutput.size(); ++i) {
+    EXPECT_NEAR(expOutput[i], actOutput[i], 1e-6) << "i = " << i;
+  }
+}
+
+TEST(CPUCompareTestBatch4)
+{
+  std::mt19937 rng(0);
+
+  const int filterLength = 1026;
+  const int inputLength = 935;
+  const int inputWidth = filterLength / 2;
+  const int batchSize = 9;
+
+  std::vector<float> weightsHost = genVec(inputWidth, rng);
+  Taco2DenoiseTransformLayerPlugin layer(
+      TRTUtils::toWeights(weightsHost), filterLength, inputLength);
+
+  // duplicate vector
+  std::vector<float> inputHostBase = genVec(filterLength * inputLength, rng);
+  std::vector<float> inputHost;
+  for (int i = 0; i < batchSize; ++i) {
+    inputHost.insert(
+        inputHost.end(), inputHostBase.begin(), inputHostBase.end());
+  }
+
+  CudaMemory<float> inputDevice(inputHost);
+
+  std::vector<Dims> inputDims{Dims4(1, 1, filterLength, inputLength)};
+
+  const std::vector<Dims> outputDims{Dims4(1, 1, filterLength, inputLength)};
+
+  const std::vector<DataType> dataTypes{DataType::kFLOAT};
+  const bool broadcast[] = {false};
+
+  layer.configurePlugin(
+      inputDims.data(),
+      static_cast<int>(inputDims.size()),
+      outputDims.data(),
+      static_cast<int>(outputDims.size()),
+      dataTypes.data(),
+      dataTypes.data(),
+      broadcast,
+      broadcast,
+      PluginFormat::kLINEAR,
+      batchSize);
+
+  layer.initialize();
+
+  std::vector<const float*> inputs{inputDevice.data()};
+
+  CudaMemory<float> outputDevice(filterLength * inputLength * batchSize);
+  std::vector<float*> outputs{outputDevice.data()};
+
+  layer.enqueue(
+      batchSize,
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      nullptr,
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+  std::vector<float> expOutput(filterLength * inputLength);
+
+  for (int y = 0; y < inputWidth; ++y) {
+    for (int x = 0; x < inputLength; ++x) {
+      const int idx = y * inputLength + x;
+      const int idx2 = (y + inputWidth) * inputLength + x;
+      const float real = inputHost[idx];
+      const float img = inputHost[idx2];
+      float mag = std::sqrt(real * real + img * img);
+      const float phase = std::atan2(img, real);
+      mag = std::max(0.0f, mag - weightsHost[y]);
+      expOutput[idx] = mag * std::cos(phase);
+      expOutput[idx2] = mag * std::sin(phase);
+    }
+  }
+
+  // match outputs
+  const std::vector<float> actOutput = outputDevice.toHost();
+
+  ASSERT_EQ(expOutput.size() * batchSize, actOutput.size());
+  for (int b = 0; b < batchSize; ++b) {
+    for (size_t i = 0; i < expOutput.size(); ++i) {
+      EXPECT_NEAR(expOutput[i], actOutput[i + (b * expOutput.size())], 1e-6)
+          << "i = " << i << " b " << b;
+    }
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2LSTMCellLayerPlugin_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2LSTMCellLayerPlugin_test.cpp
@ -0,0 +1,944 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "binding.h"
+#include "cudaMemory.h"
+#include "cudaUtils.h"
+#include "logging.h"
+#include "taco2LSTMCellLayerPlugin.h"
+#include "trtUtils.h"
+#include "utils.h"
+
+#include "NvInfer.h"
+
+#include <random>
+#include <vector>
+
+using namespace nvinfer1;
+using namespace nvinfer1::plugin;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+template <typename RNG>
+std::vector<float> genVec(const size_t size, RNG& rng)
+{
+  std::uniform_real_distribution<float> dist(-10.0, 10.0);
+  std::vector<float> vec(size);
+  for (size_t i = 0; i < size; ++i) {
+    vec[i] = dist(rng);
+  }
+
+  return vec;
+}
+
+} // namespace
+
+/******************************************************************************
+ * UNIT TESTS *****************************************************************
+ *****************************************************************************/
+
+TEST(CPUCompareFP32I256Test)
+{
+  std::mt19937 rng(0);
+
+  const int inputLengthFirst = 256;
+  const int inputLengthSecond = 512;
+  const int inputLength = inputLengthFirst + inputLengthSecond;
+  const int numDimensions = 1024;
+
+  // weights
+  std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
+  const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
+  std::vector<float> hiddenWeight
+      = genVec(numDimensions * numDimensions * 4, rng);
+  const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
+
+  Taco2LSTMCellLayerPlugin layer(
+      TRTUtils::toWeights(inputWeight),
+      TRTUtils::toWeights(hiddenWeight),
+      TRTUtils::toWeights(inputBias),
+      TRTUtils::toWeights(hiddenBias),
+      inputLength,
+      numDimensions,
+      false);
+
+  const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
+  const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
+  const std::vector<float> hiddenState = genVec(numDimensions, rng);
+  const std::vector<float> cellState = genVec(numDimensions, rng);
+
+  CudaMemory<float> inputFirstDevice(inputFirst);
+  CudaMemory<float> inputSecondDevice(inputSecond);
+  CudaMemory<float> hiddenStateDevice(hiddenState);
+  CudaMemory<float> cellStateDevice(cellState);
+
+  const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
+                                    Dims4(1, inputLengthSecond, 1, 1),
+                                    Dims2(1, numDimensions),
+                                    Dims2(1, numDimensions)};
+  const std::vector<Dims> outputDims{Dims2(1, numDimensions),
+                                     Dims2(1, numDimensions)};
+  const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
+
+  const std::vector<DynamicPluginTensorDesc> inDesc{
+      {// INPUT_FIRST_INDEX
+       {Dims2(-1, inputLengthFirst),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthFirst),
+       Dims2(1, inputLengthFirst)},
+      {// INPUT_SECOND_INDEX
+       {Dims4(-1, inputLengthSecond, 1, 1),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthSecond),
+       Dims2(1, inputLengthSecond)},
+      {// HIDDEN_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)},
+      {// CELL_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)}};
+
+  const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)},
+                                                     {// CELL
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)}};
+
+  layer.configurePlugin(
+      inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
+
+  layer.initialize();
+
+  const std::vector<const float*> inputs{inputFirstDevice.data(),
+                                         inputSecondDevice.data(),
+                                         hiddenStateDevice.data(),
+                                         cellStateDevice.data()};
+
+  CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
+  CudaMemory<float> cellStateOutDevice(hiddenState.size());
+  std::vector<float*> outputs{hiddenStateOutDevice.data(),
+                              cellStateOutDevice.data()};
+
+  const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
+                                              Dims2(1, inputLengthFirst),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// INPUT_SECOND_INDEX
+                                              Dims4(1, inputLengthSecond, 1, 1),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// HIDDEN_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// CELL_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f}};
+
+  const std::vector<PluginTensorDesc> outConf{{// HIDDEN
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f},
+                                              {// CELL
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f}};
+
+  CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
+      inConf.data(),
+      static_cast<int>(inConf.size()),
+      outConf.data(),
+      static_cast<int>(outConf.size())));
+
+  layer.enqueue(
+      inConf.data(),
+      outConf.data(),
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      workspace.data(),
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+
+  std::vector<float> prod1(4 * numDimensions, 0);
+  std::vector<float> prod2(4 * numDimensions, 0);
+  std::vector<float> prod3(4 * numDimensions, 0);
+  std::vector<float> prod(4 * numDimensions, 0);
+
+  // perform input MV
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
+      val += inputWeight[i * inputLength + j] * inputFirst[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
+      val += inputWeight[i * inputLength + j + inputLengthFirst]
+             * inputSecond[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < hiddenBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < hiddenState.size(); ++j) {
+      val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
+    }
+    prod[i] += val;
+  }
+
+  // add biases
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    prod[i] += inputBias[i] + hiddenBias[i];
+  }
+
+  std::vector<float> expHiddenOut(hiddenState);
+  std::vector<float> expCellOut(cellState);
+
+  // perform reduction
+  for (int row = 0; row < numDimensions; ++row) {
+    const float c = cellState[row];
+    const float i = Utils::sigmoid(prod[row]);
+    const float f = Utils::sigmoid(prod[row + numDimensions]);
+    const float g = tanh(prod[row + numDimensions * 2]);
+    const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
+
+    const float cPrime = f * c + i * g;
+    const float hPrime = o * tanh(cPrime);
+
+    expHiddenOut[row] = hPrime;
+    expCellOut[row] = cPrime;
+  }
+
+  // copy back to host
+  const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
+  const std::vector<float> actCellOut = cellStateOutDevice.toHost();
+
+  ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
+  for (size_t i = 0; i < expHiddenOut.size(); ++i) {
+    EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 7.5e-4) << "i = " << i;
+  }
+
+  ASSERT_EQ(expCellOut.size(), actCellOut.size());
+  for (size_t i = 0; i < expCellOut.size(); ++i) {
+    EXPECT_NEAR(expCellOut[i], actCellOut[i], 5e-3) << "i = " << i;
+  }
+}
+
+TEST(CPUCompareFP32I1024Test)
+{
+  std::mt19937 rng(0);
+
+  const int inputLengthFirst = 1024;
+  const int inputLengthSecond = 512;
+  const int inputLength = inputLengthFirst + inputLengthSecond;
+  const int numDimensions = 1024;
+
+  // weights
+  std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
+  const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
+  std::vector<float> hiddenWeight
+      = genVec(numDimensions * numDimensions * 4, rng);
+  const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
+
+  Taco2LSTMCellLayerPlugin layer(
+      TRTUtils::toWeights(inputWeight),
+      TRTUtils::toWeights(hiddenWeight),
+      TRTUtils::toWeights(inputBias),
+      TRTUtils::toWeights(hiddenBias),
+      inputLength,
+      numDimensions,
+      false);
+
+  const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
+  const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
+  const std::vector<float> hiddenState = genVec(numDimensions, rng);
+  const std::vector<float> cellState = genVec(numDimensions, rng);
+
+  CudaMemory<float> inputFirstDevice(inputFirst);
+  CudaMemory<float> inputSecondDevice(inputSecond);
+  CudaMemory<float> hiddenStateDevice(hiddenState);
+  CudaMemory<float> cellStateDevice(cellState);
+
+  const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
+                                    Dims4(1, inputLengthSecond, 1, 1),
+                                    Dims2(1, numDimensions),
+                                    Dims2(1, numDimensions)};
+  const std::vector<Dims> outputDims{Dims2(1, numDimensions),
+                                     Dims2(1, numDimensions)};
+  const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
+
+  const std::vector<DynamicPluginTensorDesc> inDesc{
+      {// INPUT_FIRST_INDEX
+       {Dims2(-1, inputLengthFirst),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthFirst),
+       Dims2(1, inputLengthFirst)},
+      {// INPUT_SECOND_INDEX
+       {Dims4(-1, inputLengthSecond, 1, 1),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthSecond),
+       Dims2(1, inputLengthSecond)},
+      {// HIDDEN_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)},
+      {// CELL_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)}};
+
+  const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)},
+                                                     {// CELL
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)}};
+
+  layer.configurePlugin(
+      inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
+
+  layer.initialize();
+
+  const std::vector<const float*> inputs{inputFirstDevice.data(),
+                                         inputSecondDevice.data(),
+                                         hiddenStateDevice.data(),
+                                         cellStateDevice.data()};
+
+  CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
+  CudaMemory<float> cellStateOutDevice(hiddenState.size());
+  std::vector<float*> outputs{hiddenStateOutDevice.data(),
+                              cellStateOutDevice.data()};
+
+  const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
+                                              Dims2(1, inputLengthFirst),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// INPUT_SECOND_INDEX
+                                              Dims4(1, inputLengthSecond, 1, 1),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// HIDDEN_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// CELL_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f}};
+
+  const std::vector<PluginTensorDesc> outConf{{// HIDDEN
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f},
+                                              {// CELL
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f}};
+
+  CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
+      inConf.data(),
+      static_cast<int>(inConf.size()),
+      outConf.data(),
+      static_cast<int>(outConf.size())));
+
+  layer.enqueue(
+      inConf.data(),
+      outConf.data(),
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      workspace.data(),
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+
+  std::vector<float> prod1(4 * numDimensions, 0);
+  std::vector<float> prod2(4 * numDimensions, 0);
+  std::vector<float> prod3(4 * numDimensions, 0);
+  std::vector<float> prod(4 * numDimensions, 0);
+
+  // perform input MV
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
+      val += inputWeight[i * inputLength + j] * inputFirst[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
+      val += inputWeight[i * inputLength + j + inputLengthFirst]
+             * inputSecond[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < hiddenBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < hiddenState.size(); ++j) {
+      val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
+    }
+    prod[i] += val;
+  }
+
+  // add biases
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    prod[i] += inputBias[i] + hiddenBias[i];
+  }
+
+  std::vector<float> expHiddenOut(hiddenState);
+  std::vector<float> expCellOut(cellState);
+
+  // perform reduction
+  for (int row = 0; row < numDimensions; ++row) {
+    const float c = cellState[row];
+    const float i = Utils::sigmoid(prod[row]);
+    const float f = Utils::sigmoid(prod[row + numDimensions]);
+    const float g = tanh(prod[row + numDimensions * 2]);
+    const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
+
+    const float cPrime = f * c + i * g;
+    const float hPrime = o * tanh(cPrime);
+
+    expHiddenOut[row] = hPrime;
+    expCellOut[row] = cPrime;
+  }
+
+  // copy back to host
+  const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
+  const std::vector<float> actCellOut = cellStateOutDevice.toHost();
+
+  ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
+  for (size_t i = 0; i < expHiddenOut.size(); ++i) {
+    EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 7.5e-4) << "i = " << i;
+  }
+
+  ASSERT_EQ(expCellOut.size(), actCellOut.size());
+  for (size_t i = 0; i < expCellOut.size(); ++i) {
+    EXPECT_NEAR(expCellOut[i], actCellOut[i], 5e-3) << "i = " << i;
+  }
+}
+
+TEST(CPUCompareFP16I256Test)
+{
+  std::mt19937 rng(0);
+
+  const int inputLengthFirst = 256;
+  const int inputLengthSecond = 512;
+  const int inputLength = inputLengthFirst + inputLengthSecond;
+  const int numDimensions = 1024;
+
+  // weights
+  std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
+  const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
+  std::vector<float> hiddenWeight
+      = genVec(numDimensions * numDimensions * 4, rng);
+  const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
+
+  Taco2LSTMCellLayerPlugin layer(
+      TRTUtils::toWeights(inputWeight),
+      TRTUtils::toWeights(hiddenWeight),
+      TRTUtils::toWeights(inputBias),
+      TRTUtils::toWeights(hiddenBias),
+      inputLength,
+      numDimensions,
+      true);
+
+  const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
+  const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
+  const std::vector<float> hiddenState = genVec(numDimensions, rng);
+  const std::vector<float> cellState = genVec(numDimensions, rng);
+
+  CudaMemory<float> inputFirstDevice(inputFirst);
+  CudaMemory<float> inputSecondDevice(inputSecond);
+  CudaMemory<float> hiddenStateDevice(hiddenState);
+  CudaMemory<float> cellStateDevice(cellState);
+
+  const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
+                                    Dims4(1, inputLengthSecond, 1, 1),
+                                    Dims2(1, numDimensions),
+                                    Dims2(1, numDimensions)};
+  const std::vector<Dims> outputDims{Dims2(1, numDimensions),
+                                     Dims2(1, numDimensions)};
+  const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
+
+  const std::vector<DynamicPluginTensorDesc> inDesc{
+      {// INPUT_FIRST_INDEX
+       {Dims2(-1, inputLengthFirst),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthFirst),
+       Dims2(1, inputLengthFirst)},
+      {// INPUT_SECOND_INDEX
+       {Dims4(-1, inputLengthSecond, 1, 1),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthSecond),
+       Dims2(1, inputLengthSecond)},
+      {// HIDDEN_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)},
+      {// CELL_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)}};
+
+  const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)},
+                                                     {// CELL
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)}};
+
+  layer.configurePlugin(
+      inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
+
+  layer.initialize();
+
+  const std::vector<const float*> inputs{inputFirstDevice.data(),
+                                         inputSecondDevice.data(),
+                                         hiddenStateDevice.data(),
+                                         cellStateDevice.data()};
+
+  CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
+  CudaMemory<float> cellStateOutDevice(hiddenState.size());
+  std::vector<float*> outputs{hiddenStateOutDevice.data(),
+                              cellStateOutDevice.data()};
+
+  const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
+                                              Dims2(1, inputLengthFirst),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// INPUT_SECOND_INDEX
+                                              Dims4(1, inputLengthSecond, 1, 1),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// HIDDEN_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// CELL_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f}};
+
+  const std::vector<PluginTensorDesc> outConf{{// HIDDEN
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f},
+                                              {// CELL
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f}};
+
+  CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
+      inConf.data(),
+      static_cast<int>(inConf.size()),
+      outConf.data(),
+      static_cast<int>(outConf.size())));
+
+  layer.enqueue(
+      inConf.data(),
+      outConf.data(),
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      workspace.data(),
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+
+  std::vector<float> prod1(4 * numDimensions, 0);
+  std::vector<float> prod2(4 * numDimensions, 0);
+  std::vector<float> prod3(4 * numDimensions, 0);
+  std::vector<float> prod(4 * numDimensions, 0);
+
+  // perform input MV
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
+      val += inputWeight[i * inputLength + j] * inputFirst[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
+      val += inputWeight[i * inputLength + j + inputLengthFirst]
+             * inputSecond[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < hiddenBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < hiddenState.size(); ++j) {
+      val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
+    }
+    prod[i] += val;
+  }
+
+  // add biases
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    prod[i] += inputBias[i] + hiddenBias[i];
+  }
+
+  std::vector<float> expHiddenOut(hiddenState);
+  std::vector<float> expCellOut(cellState);
+
+  // perform reduction
+  for (int row = 0; row < numDimensions; ++row) {
+    const float c = cellState[row];
+    const float i = Utils::sigmoid(prod[row]);
+    const float f = Utils::sigmoid(prod[row + numDimensions]);
+    const float g = tanh(prod[row + numDimensions * 2]);
+    const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
+
+    const float cPrime = f * c + i * g;
+    const float hPrime = o * tanh(cPrime);
+
+    expHiddenOut[row] = hPrime;
+    expCellOut[row] = cPrime;
+  }
+
+  // copy back to host
+  const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
+  const std::vector<float> actCellOut = cellStateOutDevice.toHost();
+
+  ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
+  for (size_t i = 0; i < expHiddenOut.size(); ++i) {
+    EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 4.5e-1) << "i = " << i;
+  }
+
+  ASSERT_EQ(expCellOut.size(), actCellOut.size());
+  for (size_t i = 0; i < expCellOut.size(); ++i) {
+    EXPECT_NEAR(expCellOut[i], actCellOut[i], 4.5e-1) << "i = " << i;
+  }
+}
+
+TEST(CPUCompareFP16I1024Test)
+{
+  std::mt19937 rng(0);
+
+  const int inputLengthFirst = 1024;
+  const int inputLengthSecond = 512;
+  const int inputLength = inputLengthFirst + inputLengthSecond;
+  const int numDimensions = 1024;
+
+  // weights
+  std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
+  const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
+  std::vector<float> hiddenWeight
+      = genVec(numDimensions * numDimensions * 4, rng);
+  const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
+
+  Taco2LSTMCellLayerPlugin layer(
+      TRTUtils::toWeights(inputWeight),
+      TRTUtils::toWeights(hiddenWeight),
+      TRTUtils::toWeights(inputBias),
+      TRTUtils::toWeights(hiddenBias),
+      inputLength,
+      numDimensions,
+      true);
+
+  const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
+  const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
+  const std::vector<float> hiddenState = genVec(numDimensions, rng);
+  const std::vector<float> cellState = genVec(numDimensions, rng);
+
+  CudaMemory<float> inputFirstDevice(inputFirst);
+  CudaMemory<float> inputSecondDevice(inputSecond);
+  CudaMemory<float> hiddenStateDevice(hiddenState);
+  CudaMemory<float> cellStateDevice(cellState);
+
+  const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
+                                    Dims4(1, inputLengthSecond, 1, 1),
+                                    Dims2(1, numDimensions),
+                                    Dims2(1, numDimensions)};
+  const std::vector<Dims> outputDims{Dims2(1, numDimensions),
+                                     Dims2(1, numDimensions)};
+  const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
+
+  const std::vector<DynamicPluginTensorDesc> inDesc{
+      {// INPUT_FIRST_INDEX
+       {Dims2(-1, inputLengthFirst),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthFirst),
+       Dims2(1, inputLengthFirst)},
+      {// INPUT_SECOND_INDEX
+       {Dims4(-1, inputLengthSecond, 1, 1),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, inputLengthSecond),
+       Dims2(1, inputLengthSecond)},
+      {// HIDDEN_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)},
+      {// CELL_INDEX
+       {Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)}};
+
+  const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)},
+                                                     {// CELL
+                                                      {Dims2(-1, numDimensions),
+                                                       DataType::kFLOAT,
+                                                       TensorFormat::kLINEAR,
+                                                       1.0f},
+                                                      Dims2(1, numDimensions),
+                                                      Dims2(1, numDimensions)}};
+
+  layer.configurePlugin(
+      inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
+
+  layer.initialize();
+
+  const std::vector<const float*> inputs{inputFirstDevice.data(),
+                                         inputSecondDevice.data(),
+                                         hiddenStateDevice.data(),
+                                         cellStateDevice.data()};
+
+  CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
+  CudaMemory<float> cellStateOutDevice(hiddenState.size());
+  std::vector<float*> outputs{hiddenStateOutDevice.data(),
+                              cellStateOutDevice.data()};
+
+  const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
+                                              Dims2(1, inputLengthFirst),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// INPUT_SECOND_INDEX
+                                              Dims4(1, inputLengthSecond, 1, 1),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// HIDDEN_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f},
+                                             {// CELL_INDEX
+                                              Dims2(1, numDimensions),
+                                              DataType::kFLOAT,
+                                              TensorFormat::kLINEAR,
+                                              1.0f}};
+
+  const std::vector<PluginTensorDesc> outConf{{// HIDDEN
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f},
+                                              {// CELL
+                                               Dims2(1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f}};
+
+  CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
+      inConf.data(),
+      static_cast<int>(inConf.size()),
+      outConf.data(),
+      static_cast<int>(outConf.size())));
+
+  layer.enqueue(
+      inConf.data(),
+      outConf.data(),
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      workspace.data(),
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+
+  std::vector<float> prod1(4 * numDimensions, 0);
+  std::vector<float> prod2(4 * numDimensions, 0);
+  std::vector<float> prod3(4 * numDimensions, 0);
+  std::vector<float> prod(4 * numDimensions, 0);
+
+  // perform input MV
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
+      val += inputWeight[i * inputLength + j] * inputFirst[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
+      val += inputWeight[i * inputLength + j + inputLengthFirst]
+             * inputSecond[j];
+    }
+    prod[i] += val;
+  }
+  for (size_t i = 0; i < hiddenBias.size(); ++i) {
+    double val = 0;
+    for (size_t j = 0; j < hiddenState.size(); ++j) {
+      val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
+    }
+    prod[i] += val;
+  }
+
+  // add biases
+  for (size_t i = 0; i < inputBias.size(); ++i) {
+    prod[i] += inputBias[i] + hiddenBias[i];
+  }
+
+  std::vector<float> expHiddenOut(hiddenState);
+  std::vector<float> expCellOut(cellState);
+
+  // perform reduction
+  for (int row = 0; row < numDimensions; ++row) {
+    const float c = cellState[row];
+    const float i = Utils::sigmoid(prod[row]);
+    const float f = Utils::sigmoid(prod[row + numDimensions]);
+    const float g = tanh(prod[row + numDimensions * 2]);
+    const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
+
+    const float cPrime = f * c + i * g;
+    const float hPrime = o * tanh(cPrime);
+
+    expHiddenOut[row] = hPrime;
+    expCellOut[row] = cPrime;
+  }
+
+  // copy back to host
+  const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
+  const std::vector<float> actCellOut = cellStateOutDevice.toHost();
+
+  ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
+  for (size_t i = 0; i < expHiddenOut.size(); ++i) {
+    EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 4.5e-1) << "i = " << i;
+  }
+
+  ASSERT_EQ(expCellOut.size(), actCellOut.size());
+  for (size_t i = 0; i < expCellOut.size(); ++i) {
+    EXPECT_NEAR(expCellOut[i], actCellOut[i], 4.5e-1) << "i = " << i;
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2ModulationRemovalLayerPlugin_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2ModulationRemovalLayerPlugin_test.cpp
@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "cudaMemory.h"
+#include "taco2ModulationRemovalLayerPlugin.h"
+#include "trtUtils.h"
+
+#include "NvInfer.h"
+
+#include <cfloat>
+#include <random>
+#include <vector>
+
+using namespace nvinfer1;
+using namespace nvinfer1::plugin;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+template <typename RNG>
+std::vector<float> genVec(const size_t size, RNG& rng)
+{
+  std::uniform_real_distribution<float> dist(-1.0, 1.0);
+  std::vector<float> vec(size);
+  for (size_t i = 0; i < size; ++i) {
+    vec[i] = dist(rng);
+  }
+
+  return vec;
+}
+
+} // namespace
+
+/******************************************************************************
+ * UNIT TEST ******************************************************************
+ *****************************************************************************/
+
+TEST(CPUCompareTestBatch1)
+{
+  std::mt19937 rng(0);
+
+  const int numFrames = 250;
+  const int filterLength = 1024;
+  const int hopLength = 256;
+  const int inputLength = numFrames * hopLength;
+
+  std::vector<float> weightsHost = genVec(filterLength, rng);
+  std::fill(weightsHost.begin(), weightsHost.end(), 1.0f);
+  Taco2ModulationRemovalLayerPlugin layer(
+      TRTUtils::toWeights(weightsHost), inputLength, filterLength, hopLength);
+
+  std::vector<float> inputHost = genVec(inputLength, rng);
+  std::fill(inputHost.begin(), inputHost.end(), 1.0f);
+  CudaMemory<float> inputDevice(inputHost);
+
+  std::vector<Dims> inputDims{Dims3(1, 1, inputLength)};
+
+  const std::vector<Dims> outputDims{Dims3(1, 1, inputLength)};
+
+  const std::vector<DataType> dataTypes{DataType::kFLOAT};
+  const bool broadcast[] = {false};
+
+  layer.configurePlugin(
+      inputDims.data(),
+      static_cast<int>(inputDims.size()),
+      outputDims.data(),
+      static_cast<int>(outputDims.size()),
+      dataTypes.data(),
+      dataTypes.data(),
+      broadcast,
+      broadcast,
+#if NV_TENSORRT_MAJOR < 6
+      PluginFormat::kNCHW,
+#else
+      PluginFormat::kLINEAR,
+#endif
+      1);
+
+  layer.initialize();
+
+  std::vector<const float*> inputs{inputDevice.data()};
+
+  CudaMemory<float> outputDevice(inputLength - filterLength);
+  std::vector<float*> outputs{outputDevice.data()};
+
+  layer.enqueue(
+      1,
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      nullptr,
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+  std::vector<float> windowSum(inputLength, 0);
+  for (int i = 0; i < inputLength; i += hopLength) {
+    for (int j = 0; j < filterLength; ++j) {
+      const int idx = i + j;
+      if (idx < inputLength) {
+        windowSum[idx] += weightsHost[j];
+      }
+    }
+  }
+
+  std::vector<float> expOutput(inputLength, 0);
+  for (int x = 0; x < inputLength; ++x) {
+    float val = inputHost[x];
+    if (windowSum[x] > FLT_MIN) {
+      val /= windowSum[x];
+    }
+    val *= static_cast<float>(filterLength) / static_cast<float>(hopLength);
+    expOutput[x] = val;
+  }
+  expOutput.erase(expOutput.begin(), expOutput.begin() + (filterLength / 2));
+  expOutput.erase(expOutput.end() - (filterLength / 2), expOutput.end());
+
+  // match outputs
+  const std::vector<float> actOutput = outputDevice.toHost();
+
+  ASSERT_EQ(expOutput.size(), actOutput.size());
+  for (size_t i = 0; i < expOutput.size(); ++i) {
+    EXPECT_NEAR(expOutput[i], actOutput[i], 1e-6) << "i = " << i;
+  }
+}
+
+TEST(CPUCompareTestBatch4)
+{
+  std::mt19937 rng(0);
+
+  const int batchSize = 2;
+  const int numFrames = 250;
+  const int filterLength = 1024;
+  const int hopLength = 256;
+  const int inputLength = numFrames * hopLength;
+
+  std::vector<float> weightsHost = genVec(filterLength, rng);
+  std::fill(weightsHost.begin(), weightsHost.end(), 1.0f);
+  Taco2ModulationRemovalLayerPlugin layer(
+      TRTUtils::toWeights(weightsHost), inputLength, filterLength, hopLength);
+
+  std::vector<float> inputHost = genVec(batchSize * inputLength, rng);
+  std::fill(inputHost.begin(), inputHost.end(), 1.0f);
+  CudaMemory<float> inputDevice(inputHost);
+
+  std::vector<Dims> inputDims{Dims3(1, 1, inputLength)};
+
+  const std::vector<Dims> outputDims{Dims3(1, 1, inputLength)};
+
+  const std::vector<DataType> dataTypes{DataType::kFLOAT};
+  const bool broadcast[] = {false};
+
+  layer.configurePlugin(
+      inputDims.data(),
+      static_cast<int>(inputDims.size()),
+      outputDims.data(),
+      static_cast<int>(outputDims.size()),
+      dataTypes.data(),
+      dataTypes.data(),
+      broadcast,
+      broadcast,
+      PluginFormat::kLINEAR,
+      batchSize);
+
+  layer.initialize();
+
+  std::vector<const float*> inputs{inputDevice.data()};
+
+  CudaMemory<float> outputDevice((inputLength - filterLength) * batchSize);
+  std::vector<float*> outputs{outputDevice.data()};
+
+  layer.enqueue(
+      batchSize,
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      nullptr,
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+  std::vector<float> windowSum(inputLength, 0);
+  for (int i = 0; i < inputLength; i += hopLength) {
+    for (int j = 0; j < filterLength; ++j) {
+      const int idx = i + j;
+      if (idx < inputLength) {
+        windowSum[idx] += weightsHost[j];
+      }
+    }
+  }
+
+  std::vector<float> expOutput(inputLength, 0);
+  for (int x = 0; x < inputLength; ++x) {
+    float val = inputHost[x];
+    if (windowSum[x] > FLT_MIN) {
+      val /= windowSum[x];
+    }
+    val *= static_cast<float>(filterLength) / static_cast<float>(hopLength);
+    expOutput[x] = val;
+  }
+  expOutput.erase(expOutput.begin(), expOutput.begin() + (filterLength / 2));
+  expOutput.erase(expOutput.end() - (filterLength / 2), expOutput.end());
+
+  // match outputs -- across entire batch
+  const std::vector<float> actOutput = outputDevice.toHost();
+
+  ASSERT_EQ(expOutput.size() * batchSize, actOutput.size());
+  for (int b = 0; b < batchSize; ++b) {
+    for (size_t i = 0; i < expOutput.size(); ++i) {
+      EXPECT_NEAR(expOutput[i], actOutput[i + b * expOutput.size()], 1e-6)
+          << "i = " << i << ", b = " << b;
+    }
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2PrenetLayerPlugin_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2PrenetLayerPlugin_test.cpp
@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "binding.h"
+#include "cudaMemory.h"
+#include "cudaUtils.h"
+#include "logging.h"
+#include "taco2PrenetLayerPlugin.h"
+#include "trtUtils.h"
+
+#include "NvInfer.h"
+
+#include <random>
+#include <vector>
+
+using namespace nvinfer1;
+using namespace nvinfer1::plugin;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+template <typename RNG>
+std::vector<float> genVec(const size_t size, RNG& rng)
+{
+  std::uniform_real_distribution<float> dist(-1.0, 1.0);
+  std::vector<float> vec(size);
+  for (size_t i = 0; i < size; ++i) {
+    vec[i] = dist(rng);
+  }
+
+  return vec;
+}
+
+} // namespace
+
+/******************************************************************************
+ * UNIT TESTS *****************************************************************
+ *****************************************************************************/
+
+TEST(CPUCompareTest)
+{
+  std::mt19937 rng(0);
+
+  const int inputLength = 80;
+  const int numDimensions = 256;
+
+  // weights
+  std::vector<float> weight1 = genVec(inputLength * numDimensions, rng);
+  std::vector<float> weight2 = genVec(numDimensions * numDimensions, rng);
+
+  Taco2PrenetLayerPlugin layer(
+      TRTUtils::toWeights(weight1),
+      TRTUtils::toWeights(weight2),
+      inputLength,
+      numDimensions);
+
+  const std::vector<float> inputHost = genVec(numDimensions, rng);
+  const std::vector<float> dropoutHost(numDimensions, 1.0f);
+
+  CudaMemory<float> inputDevice(inputHost);
+  CudaMemory<float> dropoutDevice(dropoutHost);
+
+  std::vector<Dims> inputDims{Dims3(1, 1, inputLength),
+                              Dims2(1, numDimensions)};
+  const std::vector<Dims> outputDims{Dims3(1, 1, numDimensions)};
+  const std::vector<DataType> dataTypes(2, DataType::kFLOAT);
+
+  const std::vector<DynamicPluginTensorDesc> inDynDesc{
+      {{Dims3(-1, 1, inputLength),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims3(1, 1, inputLength),
+       Dims3(1, 1, inputLength)},
+      {{Dims2(-1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims2(1, numDimensions),
+       Dims2(1, numDimensions)}};
+  const std::vector<DynamicPluginTensorDesc> outDynDesc{
+      {{Dims3(-1, 1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims3(1, 1, numDimensions),
+       Dims3(1, 1, numDimensions)}};
+
+  layer.configurePlugin(
+      inDynDesc.data(), inDynDesc.size(), outDynDesc.data(), outDynDesc.size());
+
+  layer.initialize();
+
+  std::vector<const float*> inputs{inputDevice.data(), dropoutDevice.data()};
+
+  CudaMemory<float> outputDevice(numDimensions);
+  std::vector<float*> outputs{outputDevice.data()};
+
+  const std::vector<PluginTensorDesc> inDesc{
+      {Dims3(1, 1, inputLength), DataType::kFLOAT, TensorFormat::kLINEAR, 1.0f},
+      {Dims2(1, numDimensions), DataType::kFLOAT, TensorFormat::kLINEAR, 1.0f},
+  };
+  const std::vector<PluginTensorDesc> outDesc{{Dims3(1, 1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f}};
+
+  CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
+      inDesc.data(),
+      static_cast<int>(inDesc.size()),
+      outDesc.data(),
+      static_cast<int>(outDesc.size())));
+
+  layer.enqueue(
+      inDesc.data(),
+      outDesc.data(),
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      workspace.data(),
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+  std::vector<float> expOutput(numDimensions);
+
+  std::vector<float> intermediate(numDimensions);
+  for (int i = 0; i < numDimensions; ++i) {
+    float v = 0.0f;
+    for (int j = 0; j < inputLength; ++j) {
+      v += inputHost[j] * weight1[i * inputLength + j];
+    }
+    intermediate[i] = v;
+  }
+  for (int i = 0; i < numDimensions; ++i) {
+    intermediate[i] = std::max(0.0f, intermediate[i]) * dropoutHost[i];
+  }
+
+  for (int i = 0; i < numDimensions; ++i) {
+    float v = 0.0f;
+    for (int j = 0; j < numDimensions; ++j) {
+      v += intermediate[j] * weight2[i * numDimensions + j];
+    }
+    expOutput[i] = v;
+  }
+  for (int i = 0; i < numDimensions; ++i) {
+    expOutput[i] = std::max(0.0f, expOutput[i]) * dropoutHost[i];
+  }
+
+  // match outputs
+  const std::vector<float> actOutput = outputDevice.toHost();
+
+  ASSERT_EQ(expOutput.size(), actOutput.size());
+  for (size_t i = 0; i < expOutput.size(); ++i) {
+    EXPECT_NEAR(expOutput[i], actOutput[i], 1e-4) << "i = " << i;
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2ProjectionLayerPlugin_test.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/Taco2ProjectionLayerPlugin_test.cpp
@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+#include "binding.h"
+#include "cudaMemory.h"
+#include "cudaUtils.h"
+#include "logging.h"
+#include "taco2ProjectionLayerPlugin.h"
+#include "trtUtils.h"
+
+#include "NvInfer.h"
+
+#include <random>
+#include <vector>
+
+using namespace nvinfer1;
+using namespace nvinfer1::plugin;
+using namespace tts;
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+template <typename RNG>
+std::vector<float> genVec(const size_t size, RNG& rng)
+{
+  std::uniform_real_distribution<float> dist(-1.0, 1.0);
+  std::vector<float> vec(size);
+  for (size_t i = 0; i < size; ++i) {
+    vec[i] = dist(rng);
+  }
+
+  return vec;
+}
+
+} // namespace
+
+/******************************************************************************
+ * UNIT TESTS *****************************************************************
+ *****************************************************************************/
+
+TEST(CPUCompareTest)
+{
+  std::mt19937 rng(0);
+
+  const int hiddenInputLength = 1024;
+  const int contextInputLength = 512;
+  const int numChannelDimensions = 80;
+  const int numGateDimensions = 1;
+
+  const int inputLength = hiddenInputLength + contextInputLength;
+  const int numDimensions = numChannelDimensions + numGateDimensions;
+
+  // weights
+  std::vector<float> weightChannel
+      = genVec(inputLength * numChannelDimensions, rng);
+  std::vector<float> weightGate = genVec(inputLength * numGateDimensions, rng);
+
+  std::vector<float> biasChannel = genVec(numChannelDimensions, rng);
+  std::vector<float> biasGate = genVec(numGateDimensions, rng);
+
+  Taco2ProjectionLayerPlugin layer(
+      TRTUtils::toWeights(weightChannel),
+      TRTUtils::toWeights(weightGate),
+      TRTUtils::toWeights(biasChannel),
+      TRTUtils::toWeights(biasGate),
+      hiddenInputLength,
+      contextInputLength,
+      numChannelDimensions,
+      numGateDimensions);
+
+  std::vector<float> inputHidden = genVec(hiddenInputLength, rng);
+  std::vector<float> inputContext = genVec(contextInputLength, rng);
+
+  CudaMemory<float> inputHiddenDevice(inputHidden);
+  CudaMemory<float> inputContextDevice(inputContext);
+
+  std::vector<Dims> inputDims{Dims3(1, 1, hiddenInputLength),
+                              Dims3(1, 1, contextInputLength)};
+  const std::vector<Dims> outputDims{Dims3(1, 1, numDimensions)};
+  const std::vector<DataType> dataTypes(2, DataType::kFLOAT);
+
+  const std::vector<DynamicPluginTensorDesc> inDynDesc{
+      {{Dims3(-1, 1, hiddenInputLength),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims3(1, 1, hiddenInputLength),
+       Dims3(1, 1, hiddenInputLength)},
+      {{Dims3(-1, 1, contextInputLength),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims3(1, 1, contextInputLength),
+       Dims3(1, 1, contextInputLength)}};
+  const std::vector<DynamicPluginTensorDesc> outDynDesc{
+      {{Dims3(-1, 1, numDimensions),
+        DataType::kFLOAT,
+        TensorFormat::kLINEAR,
+        1.0f},
+       Dims3(1, 1, numDimensions),
+       Dims3(1, 1, numDimensions)}};
+
+  layer.configurePlugin(
+      inDynDesc.data(), inDynDesc.size(), outDynDesc.data(), outDynDesc.size());
+
+  layer.initialize();
+
+  std::vector<const float*> inputs{inputHiddenDevice.data(),
+                                   inputContextDevice.data()};
+
+  CudaMemory<float> outputDevice(numDimensions);
+  std::vector<float*> outputs{outputDevice.data()};
+
+  const std::vector<PluginTensorDesc> inDesc{
+      {Dims3(1, 1, hiddenInputLength),
+       DataType::kFLOAT,
+       TensorFormat::kLINEAR,
+       1.0f},
+      {Dims3(1, 1, contextInputLength),
+       DataType::kFLOAT,
+       TensorFormat::kLINEAR,
+       1.0f},
+  };
+  const std::vector<PluginTensorDesc> outDesc{{Dims3(1, 1, numDimensions),
+                                               DataType::kFLOAT,
+                                               TensorFormat::kLINEAR,
+                                               1.0f}};
+
+  CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
+      inDesc.data(),
+      static_cast<int>(inDesc.size()),
+      outDesc.data(),
+      static_cast<int>(outDesc.size())));
+
+  layer.enqueue(
+      inDesc.data(),
+      outDesc.data(),
+      reinterpret_cast<const void* const*>(inputs.data()),
+      reinterpret_cast<void**>(outputs.data()),
+      workspace.data(),
+      0);
+  CudaUtils::sync(0);
+
+  // perform operations on cpu
+  std::vector<float> expOutput(numDimensions);
+
+  for (int i = 0; i < numChannelDimensions; ++i) {
+    float v = 0.0f;
+    for (int j = 0; j < hiddenInputLength; ++j) {
+      v += inputHidden[j] * weightChannel[i * inputLength + j];
+    }
+    for (int j = 0; j < contextInputLength; ++j) {
+      v += inputContext[j]
+           * weightChannel[i * inputLength + j + hiddenInputLength];
+    }
+    expOutput[i] = v + biasChannel[i];
+  }
+  for (int i = 0; i < numGateDimensions; ++i) {
+    float v = 0.0f;
+    for (int j = 0; j < hiddenInputLength; ++j) {
+      v += inputHidden[j] * weightGate[i * inputLength + j];
+    }
+    for (int j = 0; j < contextInputLength; ++j) {
+      v += inputContext[j]
+           * weightGate[i * inputLength + j + hiddenInputLength];
+    }
+    expOutput[i + numChannelDimensions] = v + biasGate[i];
+  }
+
+  // match outputs
+  const std::vector<float> actOutput = outputDevice.toHost();
+
+  ASSERT_EQ(expOutput.size(), actOutput.size());
+  for (size_t i = 0; i < expOutput.size(); ++i) {
+    EXPECT_NEAR(expOutput[i], actOutput[i], 1e-4) << "i = " << i;
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/UnitTest.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/UnitTest.cpp
@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "UnitTest.hpp"
+
+#include <exception>
+#include <iostream>
+
+namespace
+{
+
+std::vector<UnitTest*>* s_tests = nullptr;
+}
+
+/******************************************************************************
+ * PUBLIC STATIC METHODS ******************************************************
+ *****************************************************************************/
+
+bool UnitTest::runAll()
+{
+  size_t numPassed = 0;
+  size_t numTests = 0;
+
+  if (s_tests) {
+    numTests = s_tests->size();
+    for (UnitTest* const test : *s_tests) {
+      try {
+        test->run();
+
+        if (test->passed()) {
+          std::cout << "Test: " << test->fullname() << " passed." << std::endl;
+          ++numPassed;
+          continue;
+        }
+      } catch (const TestException&) {
+        // assertion failed
+      } catch (const std::exception& e) {
+        std::cout << "Unhandled excpetion: " << e.what() << std::endl;
+      }
+      std::cout << "Test: " << test->fullname() << " failed." << std::endl;
+    }
+  }
+
+  std::cout << numPassed << " / " << numTests << " passed." << std::endl;
+
+  return numPassed == numTests;
+}
+
+void UnitTest::registerTest(UnitTest* const test)
+{
+  if (!s_tests) {
+    s_tests = new std::vector<UnitTest*>(0);
+  }
+
+  s_tests->emplace_back(test);
+}
+
+/******************************************************************************
+ * CONSTRUCTORS / DESTRUCTOR **************************************************
+ *****************************************************************************/
+
+UnitTest::UnitTest(const std::string& filename, const std::string& name)
+    : m_nullStream(), m_passed(true), m_filename(filename), m_name(name)
+{
+  registerTest(this);
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+std::string UnitTest::fullname() const
+{
+  return m_filename + "__" + m_name;
+}
+
+bool UnitTest::passed() const
+{
+  return m_passed;
+}
+
+/******************************************************************************
+ * PROTECTED METHODS **********************************************************
+ *****************************************************************************/
+
+void UnitTest::failure()
+{
+  m_passed = false;
+}
+
+/******************************************************************************
+ * MAIN ***********************************************************************
+ *****************************************************************************/
+
+int main(int /*argc*/, char** /*argv*/)
+{
+  if (UnitTest::runAll()) {
+    return 0;
+  } else {
+    return 1;
+  }
+}
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/UnitTest.hpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/test/UnitTest.hpp
@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <cmath>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+#ifndef TT2I_UNITTEST_HPP
+#define TT2I_UNITTEST_HPP
+
+#define _TEST(test_name, name)                                                 \
+  class test_name : public UnitTest                                            \
+  {                                                                            \
+  public:                                                                      \
+    test_name() : UnitTest(__FILE__, #name){};                                 \
+    void run() override;                                                       \
+  };                                                                           \
+  test_name test_name##_instance;                                              \
+  void test_name::run()
+
+#define TEST(name) _TEST(test_##__FILE__##__##name, name)
+
+#define ASSERT_TRUE(x)                                                         \
+  do {                                                                         \
+    if (!(x)) {                                                                \
+      std::cerr << "ASSERT_TRUE: " << #x << "(" << (x) << ") is false at "     \
+                << __FILE__ << ":" << __LINE__ << std::endl;                   \
+      throw TestException();                                                   \
+    }                                                                          \
+  } while (false)
+
+#define ASSERT_EQ(x, y)                                                        \
+  do {                                                                         \
+    if (!((x) == (y))) {                                                       \
+      std::cerr << "ASSERT_EQ: " << #x << "(" << (x) << ") != " << #y << "("   \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      throw TestException();                                                   \
+    }                                                                          \
+  } while (false)
+
+#define ASSERT_LT(x, y)                                                        \
+  do {                                                                         \
+    if (!areComparable((x), (y)) || !((x) < (y))) {                            \
+      std::cerr << "ASSERT_LT: " << #x << "(" << (x) << ") !< " << #y << "("   \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      throw TestException();                                                   \
+    }                                                                          \
+  } while (false)
+
+#define ASSERT_LE(x, y)                                                        \
+  do {                                                                         \
+    if (!areComparable((x), (y)) || !((x) <= (y))) {                           \
+      std::cerr << "ASSERT_LE: " << #x << "(" << (x) << ") !<= " << #y << "("  \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      throw TestException();                                                   \
+    }                                                                          \
+  } while (false)
+
+#define ASSERT_GT(x, y)                                                        \
+  do {                                                                         \
+    if (!areComparable((x), (y)) || !((x) > (y))) {                            \
+      std::cerr << "ASSERT_GT: " << #x << "(" << (x) << ") !> " << #y << "("   \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      throw TestException();                                                   \
+    }                                                                          \
+  } while (false)
+
+#define ASSERT_GE(x, y)                                                        \
+  do {                                                                         \
+    if (!areComparable((x), (y)) || !((x) >= (y))) {                           \
+      std::cerr << "ASSERT_GE: " << #x << "(" << (x) << ") !>= " << #y << "("  \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      throw TestException();                                                   \
+    }                                                                          \
+  } while (false)
+
+#define EXPECT_TRUE(x)                                                         \
+  [&]() {                                                                      \
+    if (!(x)) {                                                                \
+      std::cerr << "EXPECT_TRUE: " << #x << "(" << (x) << ") is false at "     \
+                << __FILE__ << ":" << __LINE__ << std::endl;                   \
+      this->failure();                                                         \
+      return CheckOutput(true);                                                \
+    } else {                                                                   \
+      return CheckOutput(false);                                               \
+    }                                                                          \
+  }()
+
+#define EXPECT_EQ(x, y)                                                        \
+  [&]() {                                                                      \
+    if (!areComparable((x), (y)) || !((x) == (y))) {                           \
+      std::cerr << "EXPECT_EQ: " << #x << "(" << (x) << ") != " << #y << "("   \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      this->failure();                                                         \
+      return CheckOutput(true);                                                \
+    } else {                                                                   \
+      return CheckOutput(false);                                               \
+    }                                                                          \
+  }()
+
+#define EXPECT_LT(x, y)                                                        \
+  [&]() {                                                                      \
+    if (!areComparable((x), (y)) || !((x) < (y))) {                            \
+      std::cerr << "EXPECT_LT: " << #x << "(" << (x) << ") !< " << #y << "("   \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      this->failure();                                                         \
+      return CheckOutput(true);                                                \
+    } else {                                                                   \
+      return CheckOutput(false);                                               \
+    }                                                                          \
+  }()
+
+#define EXPECT_LE(x, y)                                                        \
+  [&]() {                                                                      \
+    if (!areComparable((x), (y)) || !((x) <= (y))) {                           \
+      std::cerr << "EXPECT_LE: " << #x << "(" << (x) << ") !<= " << #y << "("  \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      this->failure();                                                         \
+      return CheckOutput(true);                                                \
+    } else {                                                                   \
+      return CheckOutput(false);                                               \
+    }                                                                          \
+  }()
+
+#define EXPECT_GT(x, y)                                                        \
+  [&]() {                                                                      \
+    if (!areComparable((x), (y)) || !((x) > (y))) {                            \
+      std::cerr << "EXPECT_GT: " << #x << "(" << (x) << ") !> " << #y << "("   \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      this->failure();                                                         \
+      return CheckOutput(true);                                                \
+    } else {                                                                   \
+      return CheckOutput(false);                                               \
+    }                                                                          \
+  }()
+
+#define EXPECT_GE(x, y)                                                        \
+  [&]() {                                                                      \
+    if (!areComparable((x), (y)) || !((x) >= (y))) {                           \
+      std::cerr << "EXPECT_GE: " << #x << "(" << (x) << ") !>= " << #y << "("  \
+                << (y) << ") "                                                 \
+                << "at " << __FILE__ << ":" << __LINE__ << std::endl;          \
+      this->failure();                                                         \
+      return CheckOutput(true);                                                \
+    } else {                                                                   \
+      return CheckOutput(false);                                               \
+    }                                                                          \
+  }()
+
+#define EXPECT_NEAR(x, y, t)                                                   \
+  [&]() {                                                                      \
+    auto diff = std::abs((x) - (y));                                           \
+    if (!areComparable((x), (y)) || diff > (t)) {                              \
+      std::cerr << "EXPECT_NEAR: " << #x << "(" << (x) << ") !~= " << #y       \
+                << "(" << (y) << ") "                                          \
+                << " within (" << diff << "/" #t << ") at " << __FILE__ << ":" \
+                << __LINE__ << std::endl;                                      \
+      this->failure();                                                         \
+      return CheckOutput(true);                                                \
+    } else {                                                                   \
+      return CheckOutput(false);                                               \
+    }                                                                          \
+  }()
+
+class CheckOutput
+{
+public:
+  CheckOutput(bool output) : m_displayOutput(output), m_output()
+  {
+  }
+
+  CheckOutput(CheckOutput&& other)
+      : m_displayOutput(other.m_displayOutput),
+        m_output(std::move(other.m_output))
+  {
+    other.m_displayOutput = false;
+  }
+
+  ~CheckOutput()
+  {
+    if (m_displayOutput && !m_output.str().empty()) {
+      std::cerr << m_output.str() << std::endl;
+    }
+  }
+
+  template <typename T>
+  CheckOutput& operator<<(const T& obj)
+  {
+    m_output << obj;
+
+    return *this;
+  }
+
+private:
+  bool m_displayOutput;
+  std::ostringstream m_output;
+};
+
+class TestException : public std::runtime_error
+{
+public:
+  TestException() : std::runtime_error("TestFailed"){};
+};
+
+class UnitTest
+{
+public:
+  static bool runAll();
+
+  static void registerTest(UnitTest* test);
+
+  UnitTest(const std::string& filename, const std::string& name);
+
+  virtual ~UnitTest() = default;
+
+  virtual void run() = 0;
+
+  std::string fullname() const;
+
+  bool passed() const;
+
+protected:
+  void failure();
+
+  template <
+      typename T,
+      typename std::enable_if<std::is_floating_point<T>::value, int>::type = 0>
+  bool areComparable(T x, T y) const
+  {
+    return !std::isnan(x) && !std::isnan(y) &&
+           (!std::isinf(x) || !std::isinf(y));
+  }
+
+  template <
+      typename T,
+      typename std::enable_if<!std::is_floating_point<T>::value, int>::type = 0>
+  bool areComparable(T, T) const
+  {
+    return true;
+  }
+
+  std::ostringstream m_nullStream;
+
+private:
+  bool m_passed;
+  std::string m_filename;
+  std::string m_name;
+};
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/CMakeLists.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/CMakeLists.txt
@ -0,0 +1,77 @@
+##
+# Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above copyright
+#       notice, this list of conditions and the following disclaimer in the
+#       documentation and/or other materials provided with the distribution.
+#     * Neither the name of the NVIDIA CORPORATION nor the
+#       names of its contributors may be used to endorse or promote products
+#       derived from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+file(GLOB TACOTRON2WAVEGLOW_SOURCES 
+    speechSynthesizer.cpp
+    denoiser/*.cpp
+    layers/*.cpp
+    tacotron2/*.cpp
+    tacotron2/*.cu
+    util/*.cpp
+    util/*.cu
+    waveglow/*.cpp
+    waveglow/*.cu
+    plugins/taco2AttentionPlugin/*.cpp
+    plugins/taco2AttentionPlugin/*.cu
+    plugins/taco2DenoiseTransformPlugin/*.cpp
+    plugins/taco2DenoiseTransformPlugin/*.cu
+    plugins/taco2LSTMCellPlugin/*.cpp
+    plugins/taco2LSTMCellPlugin/*.cu
+    plugins/taco2ModulationRemovalPlugin/*.cpp
+    plugins/taco2ModulationRemovalPlugin/*.cu
+    plugins/taco2PrenetPlugin/*.cpp
+    plugins/taco2PrenetPlugin/*.cu
+    plugins/taco2ProjectionPlugin/*.cpp
+    plugins/taco2ProjectionPlugin/*.cu
+)
+
+set(TARGET_NAME "tt2i")
+
+## library
+add_library(${TARGET_NAME} ${TACOTRON2WAVEGLOW_SOURCES})
+target_link_libraries(${TARGET_NAME}
+    cublas
+    nvinfer
+    nvonnxparser
+)
+set_property(TARGET ${TARGET_NAME} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
+set_property(TARGET ${TARGET_NAME} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
+
+include_directories(
+    ./
+    ./denoiser/
+    ./layers/
+    ./tacotron2/
+    ./util/
+    ./waveglow/
+    ./common/
+    ./plugins/taco2AttentionPlugin/
+    ./plugins/taco2DenoiseTransformPlugin/
+    ./plugins/taco2LSTMCellPlugin/
+    ./plugins/taco2ModulationRemovalPlugin/
+    ./plugins/taco2PrenetPlugin/
+    ./plugins/taco2ProjectionPlugin/
+)
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/common/logging.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/common/logging.h
@ -0,0 +1,505 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TENSORRT_LOGGING_H
+#define TENSORRT_LOGGING_H
+
+#include "NvInferRuntimeCommon.h"
+#include <cassert>
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <sstream>
+#include <string>
+
+using Severity = nvinfer1::ILogger::Severity;
+
+class LogStreamConsumerBuffer : public std::stringbuf
+{
+public:
+    LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mOutput(stream)
+        , mPrefix(prefix)
+        , mShouldLog(shouldLog)
+    {
+    }
+
+    LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
+        : mOutput(other.mOutput)
+        , mPrefix(std::move(other.mPrefix))
+        , mShouldLog(std::move(other.mShouldLog))
+    {
+    }
+
+    ~LogStreamConsumerBuffer()
+    {
+        // std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
+        // std::streambuf::pptr() gives a pointer to the current position of the output sequence
+        // if the pointer to the beginning is not equal to the pointer to the current position,
+        // call putOutput() to log the output to the stream
+        if (pbase() != pptr())
+        {
+            putOutput();
+        }
+    }
+
+    // synchronizes the stream buffer and returns 0 on success
+    // synchronizing the stream buffer consists of inserting the buffer contents into the stream,
+    // resetting the buffer and flushing the stream
+    virtual int sync()
+    {
+        putOutput();
+        return 0;
+    }
+
+    void putOutput()
+    {
+        if (mShouldLog)
+        {
+            // prepend timestamp
+            std::time_t timestamp = std::time(nullptr);
+            tm* tm_local = std::localtime(&timestamp);
+            std::cout << "[";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mon << "/";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
+            std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
+            std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
+            // std::stringbuf::str() gets the string contents of the buffer
+            // insert the buffer contents pre-appended by the appropriate prefix into the stream
+            mOutput << mPrefix << str();
+            // set the buffer to empty
+            str("");
+            // flush the stream
+            mOutput.flush();
+        }
+    }
+
+    void setShouldLog(bool shouldLog)
+    {
+        mShouldLog = shouldLog;
+    }
+
+private:
+    std::ostream& mOutput;
+    std::string mPrefix;
+    bool mShouldLog;
+};
+
+//!
+//! \class LogStreamConsumerBase
+//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
+//!
+class LogStreamConsumerBase
+{
+public:
+    LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
+        : mBuffer(stream, prefix, shouldLog)
+    {
+    }
+
+protected:
+    LogStreamConsumerBuffer mBuffer;
+};
+
+//!
+//! \class LogStreamConsumer
+//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
+//!  Order of base classes is LogStreamConsumerBase and then std::ostream.
+//!  This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
+//!  in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
+//!  This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
+//!  Please do not change the order of the parent classes.
+//!
+class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
+{
+public:
+    //! \brief Creates a LogStreamConsumer which logs messages with level severity.
+    //!  Reportable severity determines if the messages are severe enough to be logged.
+    LogStreamConsumer(Severity reportableSeverity, Severity severity)
+        : LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(severity <= reportableSeverity)
+        , mSeverity(severity)
+    {
+    }
+
+    LogStreamConsumer(LogStreamConsumer&& other)
+        : LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
+        , std::ostream(&mBuffer) // links the stream buffer with the stream
+        , mShouldLog(other.mShouldLog)
+        , mSeverity(other.mSeverity)
+    {
+    }
+
+    void setReportableSeverity(Severity reportableSeverity)
+    {
+        mShouldLog = mSeverity <= reportableSeverity;
+        mBuffer.setShouldLog(mShouldLog);
+    }
+
+private:
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    static std::string severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    bool mShouldLog;
+    Severity mSeverity;
+};
+
+//! \class Logger
+//!
+//! \brief Class which manages logging of TensorRT tools and samples
+//!
+//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
+//! and supports logging two types of messages:
+//!
+//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
+//! - Test pass/fail messages
+//!
+//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
+//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
+//!
+//! In the future, this class could be extended to support dumping test results to a file in some standard format
+//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
+//!
+//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
+//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
+//! library and messages coming from the sample.
+//!
+//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
+//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
+//! object.
+
+class Logger : public nvinfer1::ILogger
+{
+public:
+    Logger(Severity severity = Severity::kWARNING)
+        : mReportableSeverity(severity)
+    {
+    }
+
+    //!
+    //! \enum TestResult
+    //! \brief Represents the state of a given test
+    //!
+    enum class TestResult
+    {
+        kRUNNING, //!< The test is running
+        kPASSED,  //!< The test passed
+        kFAILED,  //!< The test failed
+        kWAIVED   //!< The test was waived
+    };
+
+    //!
+    //! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
+    //! \return The nvinfer1::ILogger associated with this Logger
+    //!
+    //! TODO Once all samples are updated to use this method to register the logger with TensorRT,
+    //! we can eliminate the inheritance of Logger from ILogger
+    //!
+    nvinfer1::ILogger& getTRTLogger()
+    {
+        return *this;
+    }
+
+    //!
+    //! \brief Implementation of the nvinfer1::ILogger::log() virtual method
+    //!
+    //! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
+    //! inheritance from nvinfer1::ILogger
+    //!
+    void log(Severity severity, const char* msg) override
+    {
+        LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
+    }
+
+    //!
+    //! \brief Method for controlling the verbosity of logging output
+    //!
+    //! \param severity The logger will only emit messages that have severity of this level or higher.
+    //!
+    void setReportableSeverity(Severity severity)
+    {
+        mReportableSeverity = severity;
+    }
+
+    //!
+    //! \brief Opaque handle that holds logging information for a particular test
+    //!
+    //! This object is an opaque handle to information used by the Logger to print test results.
+    //! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
+    //! with Logger::reportTest{Start,End}().
+    //!
+    class TestAtom
+    {
+    public:
+        TestAtom(TestAtom&&) = default;
+
+    private:
+        friend class Logger;
+
+        TestAtom(bool started, const std::string& name, const std::string& cmdline)
+            : mStarted(started)
+            , mName(name)
+            , mCmdline(cmdline)
+        {
+        }
+
+        bool mStarted;
+        std::string mName;
+        std::string mCmdline;
+    };
+
+    //!
+    //! \brief Define a test for logging
+    //!
+    //! \param[in] name The name of the test.  This should be a string starting with
+    //!                  "TensorRT" and containing dot-separated strings containing
+    //!                  the characters [A-Za-z0-9_].
+    //!                  For example, "TensorRT.sample_googlenet"
+    //! \param[in] cmdline The command line used to reproduce the test
+    //
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    //!
+    static TestAtom defineTest(const std::string& name, const std::string& cmdline)
+    {
+        return TestAtom(false, name, cmdline);
+    }
+
+    //!
+    //! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
+    //!        as input
+    //!
+    //! \param[in] name The name of the test
+    //! \param[in] argc The number of command-line arguments
+    //! \param[in] argv The array of command-line arguments (given as C strings)
+    //!
+    //! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
+    static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
+    {
+        auto cmdline = genCmdlineString(argc, argv);
+        return defineTest(name, cmdline);
+    }
+
+    //!
+    //! \brief Report that a test has started.
+    //!
+    //! \pre reportTestStart() has not been called yet for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has started
+    //!
+    static void reportTestStart(TestAtom& testAtom)
+    {
+        reportTestResult(testAtom, TestResult::kRUNNING);
+        assert(!testAtom.mStarted);
+        testAtom.mStarted = true;
+    }
+
+    //!
+    //! \brief Report that a test has ended.
+    //!
+    //! \pre reportTestStart() has been called for the given testAtom
+    //!
+    //! \param[in] testAtom The handle to the test that has ended
+    //! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
+    //!                   TestResult::kFAILED, TestResult::kWAIVED
+    //!
+    static void reportTestEnd(const TestAtom& testAtom, TestResult result)
+    {
+        assert(result != TestResult::kRUNNING);
+        assert(testAtom.mStarted);
+        reportTestResult(testAtom, result);
+    }
+
+    static int reportPass(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kPASSED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportFail(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kFAILED);
+        return EXIT_FAILURE;
+    }
+
+    static int reportWaive(const TestAtom& testAtom)
+    {
+        reportTestEnd(testAtom, TestResult::kWAIVED);
+        return EXIT_SUCCESS;
+    }
+
+    static int reportTest(const TestAtom& testAtom, bool pass)
+    {
+        return pass ? reportPass(testAtom) : reportFail(testAtom);
+    }
+
+    Severity getReportableSeverity() const
+    {
+        return mReportableSeverity;
+    }
+
+private:
+    //!
+    //! \brief returns an appropriate string for prefixing a log message with the given severity
+    //!
+    static const char* severityPrefix(Severity severity)
+    {
+        switch (severity)
+        {
+        case Severity::kINTERNAL_ERROR: return "[F] ";
+        case Severity::kERROR: return "[E] ";
+        case Severity::kWARNING: return "[W] ";
+        case Severity::kINFO: return "[I] ";
+        case Severity::kVERBOSE: return "[V] ";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate string for prefixing a test result message with the given result
+    //!
+    static const char* testResultString(TestResult result)
+    {
+        switch (result)
+        {
+        case TestResult::kRUNNING: return "RUNNING";
+        case TestResult::kPASSED: return "PASSED";
+        case TestResult::kFAILED: return "FAILED";
+        case TestResult::kWAIVED: return "WAIVED";
+        default: assert(0); return "";
+        }
+    }
+
+    //!
+    //! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
+    //!
+    static std::ostream& severityOstream(Severity severity)
+    {
+        return severity >= Severity::kINFO ? std::cout : std::cerr;
+    }
+
+    //!
+    //! \brief method that implements logging test results
+    //!
+    static void reportTestResult(const TestAtom& testAtom, TestResult result)
+    {
+        severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
+                                         << testAtom.mCmdline << std::endl;
+    }
+
+    //!
+    //! \brief generate a command line string from the given (argc, argv) values
+    //!
+    static std::string genCmdlineString(int argc, char const* const* argv)
+    {
+        std::stringstream ss;
+        for (int i = 0; i < argc; i++)
+        {
+            if (i > 0)
+                ss << " ";
+            ss << argv[i];
+        }
+        return ss.str();
+    }
+
+    Severity mReportableSeverity;
+};
+
+namespace
+{
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
+//!
+//! Example usage:
+//!
+//!     LOG_VERBOSE(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
+//!
+//! Example usage:
+//!
+//!     LOG_INFO(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_INFO(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
+//!
+//! Example usage:
+//!
+//!     LOG_WARN(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_WARN(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
+//!
+//! Example usage:
+//!
+//!     LOG_ERROR(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_ERROR(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
+}
+
+//!
+//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
+//         ("fatal" severity)
+//!
+//! Example usage:
+//!
+//!     LOG_FATAL(logger) << "hello world" << std::endl;
+//!
+inline LogStreamConsumer LOG_FATAL(const Logger& logger)
+{
+    return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
+}
+
+} // anonymous namespace
+
+#endif // TENSORRT_LOGGING_H
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/data/models.md5sum
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/data/models.md5sum
@ -0,0 +1,2 @@
+4ac2851cbf9ad438581845e5594f591b data/tacotron2.pt
+418ba5c243fbacd88fb271b080c1dc0a data/waveglow.pt
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserBuilder.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserBuilder.cpp
@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "denoiserBuilder.h"
+#include "denoiserStreamingInstance.h"
+#include "pluginBuilder.h"
+#include "trtUtils.h"
+
+#include <iostream>
+
+using namespace nvinfer1;
+
+namespace tts
+{
+
+/******************************************************************************
+ * CONSTANTS ******************************************************************
+ *****************************************************************************/
+
+namespace
+{
+constexpr const char* const INPUT_NAME = DenoiserStreamingInstance::INPUT_NAME;
+constexpr const char* const OUTPUT_NAME = DenoiserStreamingInstance::OUTPUT_NAME;
+} // namespace
+
+/******************************************************************************
+ * CONSTRUCTORS / DESTRUCTOR **************************************************
+ *****************************************************************************/
+
+DenoiserBuilder::DenoiserBuilder(int sampleLength, int filterLength, int numOverlap, int winLength)
+    : mChunkSize(sampleLength)
+    , mFilterLength(filterLength)
+    , mHopLength(filterLength / numOverlap)
+    , mWinLength(winLength)
+{
+    // do nothing
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+TRTPtr<ICudaEngine> DenoiserBuilder::build(
+    IModelImporter& importer,
+    IBuilder& builder,
+    const int maxBatchSize,
+    const bool useFP16)
+{
+  TRTPtr<INetworkDefinition> network(builder.createNetworkV2(0));
+  network->setName("Denoiser");
+
+  const int cutoff = mFilterLength / 2 + 1;
+
+  const LayerData* const stftData = importer.getWeights({"denoiser", "stft"});
+  const LayerData* const denoiserData = importer.getWeights({"denoiser"});
+
+  ITensor* const input = network->addInput(
+      INPUT_NAME, DataType::kFLOAT, Dims4(1, 1, 1, mChunkSize));
+
+    // forward transform
+    #if NV_TENSORRT_MAJOR < 7 
+    IConvolutionLayer* const convLayer = network->addConvolution(
+        *input, cutoff * 2, DimsHW(1, mFilterLength), stftData->get("forward_basis"), Weights{});
+    convLayer->setPadding(DimsHW(0, mFilterLength / 2));
+    convLayer->setStride(DimsHW(1, mHopLength));
+    #else
+    IConvolutionLayer* const convLayer = network->addConvolutionNd(
+        *input, cutoff * 2, Dims2(1, mFilterLength), stftData->get("forward_basis"), Weights{});
+    convLayer->setPaddingNd(Dims2(0, mFilterLength / 2));
+    convLayer->setStrideNd(Dims2(1, mHopLength));
+    #endif
+    convLayer->setName("forward_transform_layer");
+
+    // use plugin to compute magnitude and phase
+    PluginBuilder denoiseTransformBuilder("Taco2DenoiseTransform", "0.1.0");
+    denoiseTransformBuilder.setField(
+        "InputLength", static_cast<int32_t>(TRTUtils::getTensorSize(*convLayer->getOutput(0)) / (cutoff * 2)));
+    denoiseTransformBuilder.setField("FilterLength", cutoff * 2);
+    denoiseTransformBuilder.setField("Weights", denoiserData->get("bias_spec"));
+    TRTPtr<IPluginV2> denoise = denoiseTransformBuilder.make("denoise_layer");
+
+    std::vector<ITensor*> denoiseInputs{convLayer->getOutput(0)};
+    ILayer* const denoiseLayer
+        = network->addPluginV2(denoiseInputs.data(), static_cast<int>(denoiseInputs.size()), *denoise);
+
+    // inverse transform
+    #if NV_TENSORRT_MAJOR < 7 
+    IDeconvolutionLayer* const deconvLayer = network->addDeconvolution(
+        *denoiseLayer->getOutput(0), 1, DimsHW(1, mFilterLength), stftData->get("inverse_basis"), {});
+    deconvLayer->setStride(DimsHW(1, mHopLength));
+    #else
+    IDeconvolutionLayer* const deconvLayer = network->addDeconvolutionNd(
+        *denoiseLayer->getOutput(0), 1, Dims2(1, mFilterLength), stftData->get("inverse_basis"), {});
+    deconvLayer->setStrideNd(Dims2(1, mHopLength));
+    #endif
+    deconvLayer->setName("inverse_transform_layer");
+
+    // apply windowing
+    PluginBuilder modulationRemovalBuilder("Taco2ModulationRemoval", "0.1.0");
+    modulationRemovalBuilder.setField(
+        "InputLength", static_cast<int32_t>(TRTUtils::getTensorSize(*deconvLayer->getOutput(0))));
+    modulationRemovalBuilder.setField("FilterLength", static_cast<int32_t>(mFilterLength));
+    modulationRemovalBuilder.setField("HopLength", static_cast<int32_t>(mHopLength));
+    modulationRemovalBuilder.setField("Weights", stftData->get("win_sq"));
+    TRTPtr<IPluginV2> modRemoval
+        = modulationRemovalBuilder.make("modulation_removal_layer");
+
+    std::vector<ITensor*> modRemovalInputs{deconvLayer->getOutput(0)};
+    ILayer* const modRemovalLayer
+        = network->addPluginV2(modRemovalInputs.data(), static_cast<int>(modRemovalInputs.size()), *modRemoval);
+
+    ITensor* const output = modRemovalLayer->getOutput(0);
+    output->setName(OUTPUT_NAME);
+    network->markOutput(*output);
+
+    assert(TRTUtils::getTensorSize(*output) == static_cast<size_t>(mChunkSize));
+
+    // build engine
+    TRTPtr<IBuilderConfig> config(builder.createBuilderConfig());
+
+    config->setMaxWorkspaceSize(1ULL << 29); // 512 MB
+    if (useFP16)
+    {
+        config->setFlag(BuilderFlag::kFP16);
+    }
+    builder.setMaxBatchSize(maxBatchSize);
+    TRTPtr<ICudaEngine> engine(
+        builder.buildEngineWithConfig(*network, *config));
+
+    if (!engine)
+    {
+        throw std::runtime_error("Failed to build Denoiser engine.");
+    }
+
+    return engine;
+}
+
+} // namespace tts
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserBuilder.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserBuilder.h
@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_DENOISER_H
+#define TT2I_DENOISER_H
+
+#include "IModelImporter.h"
+#include "trtPtr.h"
+
+#include <memory>
+
+namespace nvinfer1
+{
+class ICudaEngine;
+class IBuilder;
+} // namespace nvinfer1
+
+namespace tts
+{
+
+class DenoiserBuilder
+{
+public:
+    /**
+     * @brief Create a new denoiser.
+     *
+     * @param sampleLength The number of samples.
+     * @param filterLength The filter length.
+     * @param numOverlap The number of overlapping filters.
+     * @param winLength The length of the window.
+     */
+    DenoiserBuilder(int sampleLength, int filterLength = 1024, int numOverlap = 4, int winLength = 1024);
+
+    /**
+     * @brief Create a new Denoiser engine.
+     *
+     * @param importer The weight importer.
+     * @param builder The builder.
+     * @param maxBatchSize The maximum batch size to support.
+     * @param useFP16 Whether or not to allow FP16 calculations.
+     *
+     * @return The built engine.
+     */
+    TRTPtr<nvinfer1::ICudaEngine> build(
+        IModelImporter& importer,
+        nvinfer1::IBuilder& builder,
+        const int maxBatchSize,
+        const bool useFP16);
+
+  private:
+    int mChunkSize;
+    int mFilterLength;
+    int mHopLength;
+    int mWinLength;
+};
+
+} // namespace tts
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserInstance.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserInstance.cpp
@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "denoiserInstance.h"
+#include "cudaUtils.h"
+#include "dataShuffler.h"
+
+#include <stdexcept>
+
+using namespace nvinfer1;
+
+namespace tts
+{
+
+/******************************************************************************
+ * CONSTRUCTORS / DESTRUCTOR **************************************************
+ *****************************************************************************/
+
+DenoiserInstance::DenoiserInstance(TRTPtr<ICudaEngine>&& engine) :
+    TimedObject("DenoiserInstance::infer()"),
+    mStreamingInstance(std::move(engine)),
+    mInBufferDevice(
+        mStreamingInstance.getChunkSize()
+        * mStreamingInstance.getMaxBatchSize()),
+    mOutBufferDevice(
+        mStreamingInstance.getChunkSize()
+        * mStreamingInstance.getMaxBatchSize())
+{
+    // do nothing
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+void DenoiserInstance::infer(const int batchSize, const float* const inputDevice, const int inputSpacing,
+    const int* const inputLength, float* outputDevice)
+{
+    startTiming();
+
+    cudaStream_t stream;
+    if (cudaStreamCreate(&stream) != cudaSuccess)
+    {
+        throw std::runtime_error("Failed to create stream.");
+    }
+
+    const int chunkSize = mStreamingInstance.getChunkSize();
+
+    int maxNumSamples = 0;
+    for (int i = 0; i < batchSize; ++i)
+    {
+        if (inputLength[i] > maxNumSamples)
+        {
+            maxNumSamples = inputLength[i];
+        }
+    }
+
+    mStreamingInstance.startInference();
+
+    for (int pos = 0; pos < maxNumSamples; pos += chunkSize)
+    {
+      DataShuffler::frameTransfer(
+          inputDevice,
+          mInBufferDevice.data(),
+          inputSpacing,
+          pos,
+          chunkSize,
+          batchSize,
+          chunkSize,
+          0,
+          stream);
+
+      mStreamingInstance.inferNext(
+          batchSize, mInBufferDevice.data(), mOutBufferDevice.data(), stream);
+
+      DataShuffler::frameTransfer(
+          mOutBufferDevice.data(),
+          outputDevice,
+          chunkSize,
+          0,
+          chunkSize,
+          batchSize,
+          inputSpacing,
+          pos,
+          stream);
+    }
+
+    CudaUtils::sync(stream);
+
+    cudaStreamDestroy(stream);
+
+    stopTiming();
+}
+
+} // namespace tts
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserInstance.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserInstance.h
@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_DENOISERINSTANCE_H
+#define TT2I_DENOISERINSTANCE_H
+
+#include "cudaMemory.h"
+#include "denoiserStreamingInstance.h"
+#include "timedObject.h"
+
+namespace nvinfer1
+{
+class ICudaEngine;
+} // namespace nvinfer1
+
+namespace tts
+{
+
+class DenoiserInstance : public TimedObject
+{
+public:
+    /**
+     * @brief Create a new denoiser.
+     *
+     * @param sampleNoise The audio sample of what should be "noise" to be
+     * removed.
+     * @param sampleLength The number of samples in the "noise".
+     * @param filterLength The filter length.
+     * @param overlapLength The length of overlap between filters.
+     * @param winLength The length of the window.
+     */
+  DenoiserInstance(TRTPtr<nvinfer1::ICudaEngine>&& engine);
+
+  /**
+   * @brief Perform inference using the denoiser.
+   *
+   * @param batchSize The number of items in the batch.
+   * @param inputDevice The input tensor on the device.
+   * @param inputSpacing The spacing between the start of items in the batch.
+   * @param inputLength The length of each input.
+   * @param outputDevice The output tensor on the device.
+   */
+  void infer(
+      const int batchSize,
+      const float* inputDevice,
+      int inputSpacing,
+      const int* inputLength,
+      float* outputDevice);
+
+private:
+    DenoiserStreamingInstance mStreamingInstance;
+    CudaMemory<float> mInBufferDevice;
+    CudaMemory<float> mOutBufferDevice;
+};
+
+} // namespace tts
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserLoader.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserLoader.cpp
@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "denoiserLoader.h"
+#include "denoiserBuilder.h"
+#include "engineCache.h"
+#include "jsonModelImporter.h"
+#include "utils.h"
+
+#include "NvInfer.h"
+
+#include <stdexcept>
+
+using namespace nvinfer1;
+
+namespace tts
+{
+
+/******************************************************************************
+ * PUBLIC STATIC METHODS ******************************************************
+ *****************************************************************************/
+
+std::shared_ptr<DenoiserInstance> DenoiserLoader::load(
+    EngineCache& cache, IBuilder& builder, const std::string& filename, const bool fp16, const int batchSize)
+{
+  TRTPtr<ICudaEngine> engine;
+  if (Utils::hasExtension(filename, ".json")) {
+    DenoiserBuilder denoiserBuilder(2 << 13);
+
+    JSONModelImporter importer(filename);
+    engine = denoiserBuilder.build(importer, builder, batchSize, fp16);
+
+    // save generated engine
+    const std::string engFilename(filename + ".eng");
+    cache.save(*engine, engFilename);
+    }
+    else if (Utils::hasExtension(filename, ".eng"))
+    {
+        engine = cache.load(filename);
+
+        if (engine->getMaxBatchSize() < batchSize)
+        {
+            throw std::runtime_error(
+          "Engine " + filename
+          + " does not support "
+            " the requested batch size: "
+          + std::to_string(engine->getMaxBatchSize()) + " / "
+          + std::to_string(batchSize)
+          + "."
+            "Rebuild the engine with the larger batch size.");
+        }
+    }
+    else
+    {
+        throw std::runtime_error("Unknown model file type: " + filename);
+    }
+
+    return std::make_shared<DenoiserInstance>(std::move(engine));
+}
+
+} // namespace tts
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserLoader.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserLoader.h
@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_DENOISERLOADER_H
+#define TT2I_DENOISERLOADER_H
+
+#include "denoiserInstance.h"
+
+#include <memory>
+#include <string>
+
+namespace nvinfer1
+{
+class IBuilder;
+}
+
+namespace tts
+{
+
+class EngineCache;
+
+class DenoiserLoader
+{
+public:
+    /**
+     * @brief Load a new DenoiserInstance from an engine file or a json file.
+     *
+     * @param cache The engine cache.
+     * @param builder The TensorRT Engine Builder.
+     * @param filename The name of the engine/json file.
+     * @param fp16 If building an engine from a json file, whether or not to
+     * allow fp16 operations. If loading an engine file, this input is ignored.
+     * @param batchSize If building an engine from a json file, the maximum batch
+     * size to support. If loading an engine file, this input is ignored.
+     *
+     * @return The newly created DenoiserInstance.
+     */
+    static std::shared_ptr<DenoiserInstance> load(EngineCache& cache, nvinfer1::IBuilder& builder,
+        const std::string& filename, bool fp16 = true, int batchSize = 8);
+};
+
+} // namespace tts
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserStreamingInstance.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserStreamingInstance.cpp
@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "denoiserStreamingInstance.h"
+#include "trtUtils.h"
+
+#include <stdexcept>
+
+using namespace nvinfer1;
+
+namespace tts
+{
+
+/******************************************************************************
+ * CONSTRUCTORS / DESTRUCTOR **************************************************
+ *****************************************************************************/
+
+DenoiserStreamingInstance::DenoiserStreamingInstance(
+    TRTPtr<ICudaEngine>&& engine) :
+    TimedObject("DenoiserStreamingInstance::infer()"),
+    EngineDriver(std::move(engine)),
+    mBinding(),
+    mContext(getEngine().createExecutionContext()),
+    mChunkSize(TRTUtils::getBindingSize(getEngine(), INPUT_NAME))
+{
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+void DenoiserStreamingInstance::startInference()
+{
+    // do nothing
+}
+
+void DenoiserStreamingInstance::inferNext(
+    const int batchSize, const float* const inputDevice, float* outputDevice, cudaStream_t stream)
+{
+    startTiming();
+
+    const ICudaEngine& engine = mContext->getEngine();
+
+    mBinding.setBinding(engine, OUTPUT_NAME, outputDevice);
+    mBinding.setBinding(engine, INPUT_NAME, inputDevice);
+
+    if (!mContext->enqueue(batchSize, mBinding.getBindings(), stream, nullptr))
+    {
+        throw std::runtime_error("Failed to run encoding.");
+    }
+
+    stopTiming();
+}
+
+} // namespace tts
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserStreamingInstance.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/denoiser/denoiserStreamingInstance.h
@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_DENOISERSTREAMINGINSTANCE_H
+#define TT2I_DENOISERSTREAMINGINSTANCE_H
+
+#include "binding.h"
+#include "engineDriver.h"
+#include "timedObject.h"
+
+namespace nvinfer1
+{
+class ICudaEngine;
+class IExecutionContext;
+} // namespace nvinfer1
+
+namespace tts
+{
+
+class DenoiserStreamingInstance : public TimedObject, public EngineDriver
+{
+public:
+    /**
+     * @brief Tensor of shape {1 x INPUT_LENGTH}
+     */
+    static constexpr const char* const INPUT_NAME = "input_denoiser";
+
+    /**
+     * @brief Tensor of shape {1 x OUTPUT_LENGTH}
+     */
+    static constexpr const char* const OUTPUT_NAME = "output_denoiser";
+
+    /**
+     * @brief Create a new denoiser.
+     *
+     * @param sampleNoise The audio sample of what should be "noise" to be
+     * removed.
+     * @param sampleLength The number of samples in the "noise".
+     * @param filterLength The filter length.
+     * @param overlapLength The length of overlap between filters.
+     * @param winLength The length of the window.
+     */
+    DenoiserStreamingInstance(TRTPtr<nvinfer1::ICudaEngine>&& engine);
+
+    /**
+     * @brief Start a new session for performing streaming inference. This
+     * method should be called before the first call to `inferNext()`.
+     */
+    void startInference();
+
+    /**
+     * @brief Perform inference on a chunk of input.
+     *
+     * @param batchSize The size of the batch to process.
+     * @param inputDevice The input tensor on the device.
+     * @param outputDevice The output tensor on the device.
+     * @param stream The stream to operate on.
+     */
+    void inferNext(const int batchSize, const float* inputDevice, float* outputDevice, cudaStream_t stream);
+
+    /**
+     * @brief Get the size of the chunk the denoiser will process.
+     *
+     * @return The size of the chunk.
+     */
+    int getChunkSize() const
+    {
+        return mChunkSize;
+    }
+
+private:
+    Binding mBinding;
+    TRTPtr<nvinfer1::IExecutionContext> mContext;
+    int mChunkSize;
+};
+
+} // namespace tts
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/attentionLayerCreator.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/attentionLayerCreator.cpp
@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "attentionLayerCreator.h"
+#include "dims5.h"
+#include "layerData.h"
+
+#include "NvInfer.h"
+
+using namespace nvinfer1;
+
+namespace tts
+{
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+ILayer* AttentionLayerCreator::addLocation(INetworkDefinition& network, ITensor* const input, const int attentionDim,
+    const int numFilters, const int kernelSize, const LayerData& convData, const LayerData& linearData,
+    const std::string& name)
+{
+    // conv layer
+    const int padding = (kernelSize - 1) / 2;
+    #if NV_TENSORRT_MAJOR < 7
+    IConvolutionLayer* const convLayer = network.addConvolution(
+        *input, numFilters, DimsHW{kernelSize, 1}, convData.get("weight"), {DataType::kFLOAT, nullptr, 0});
+    convLayer->setPadding({padding, 0});
+    #else
+    IConvolutionLayer* const convLayer = network.addConvolutionNd(
+        *input, numFilters, Dims2(kernelSize, 1), convData.get("weight"), {DataType::kFLOAT, nullptr, 0});
+    convLayer->setPaddingNd(Dims2(padding, 0));
+    #endif
+    convLayer->setName((name + ".conv_layer").c_str());
+
+    // need to tranpose
+    IShuffleLayer* const transLayer = network.addShuffle(*convLayer->getOutput(0));
+    transLayer->setFirstTranspose({0, 2, 1, 3});
+    transLayer->setReshapeDimensions(Dims5{1, convLayer->getOutput(0)->getDimensions().d[2],
+        convLayer->getOutput(0)->getDimensions().d[1], 1, convLayer->getOutput(0)->getDimensions().d[3]});
+    transLayer->setName((name + ".transpose").c_str());
+
+    // fully connected layer
+    ILayer* const linearLayer = network.addFullyConnected(
+        *transLayer->getOutput(0), attentionDim, linearData.get("weight"), Weights{DataType::kFLOAT, 0, 0});
+    linearLayer->setName((name + ".linear_layer").c_str());
+    return linearLayer;
+}
+
+ILayer* AttentionLayerCreator::addEnergy(INetworkDefinition& network, ITensor* const input1, ITensor* const input2,
+    ITensor* const input3, const LayerData& linearData, const std::string& name)
+{
+    // summation
+    ILayer* const add1Layer = network.addElementWise(*input1, *input2, ElementWiseOperation::kSUM);
+    add1Layer->setName((name + ".0.elementwise_sum").c_str());
+    ILayer* const add2Layer = network.addElementWise(*add1Layer->getOutput(0), *input3, ElementWiseOperation::kSUM);
+    add2Layer->setName((name + ".1.elementwise_sum").c_str());
+
+    // activation
+    ILayer* const actLayer = network.addActivation(*add2Layer->getOutput(0), ActivationType::kTANH);
+    actLayer->setName((name + ".tanh").c_str());
+
+    // fully connected layer
+    ILayer* const linearLayer = network.addFullyConnected(
+        *actLayer->getOutput(0), 1, linearData.get("weight"), Weights{DataType::kFLOAT, 0, 0});
+    linearLayer->setName((name + ".linear_layer").c_str());
+    return linearLayer;
+}
+
+ILayer* AttentionLayerCreator::addPaddedSoftMax(INetworkDefinition& network, ITensor* const input,
+    ITensor* const inputMask, ITensor* const inputSegments, const std::string& name)
+{
+    // make our inputs 2 dimensional
+    IShuffleLayer* const maskShuffleLayer = network.addShuffle(*inputMask);
+    maskShuffleLayer->setReshapeDimensions(Dims2{1, -1});
+    maskShuffleLayer->setName((name + ".mask_reshape").c_str());
+
+    IShuffleLayer* const inputShuffleLayer = network.addShuffle(*input);
+    inputShuffleLayer->setReshapeDimensions(Dims2{1, -1});
+    inputShuffleLayer->setName((name + ".input_reshape").c_str());
+
+    // perform softmax over non-padding elements
+    ILayer* const softMaxLayer = network.addRaggedSoftMax(*inputShuffleLayer->getOutput(0), *inputSegments);
+    softMaxLayer->setName((name + ".ragged_softmax").c_str());
+
+    // zero padding
+    ILayer* const maskLayer = network.addElementWise(
+        *softMaxLayer->getOutput(0), *maskShuffleLayer->getOutput(0), ElementWiseOperation::kPROD);
+    maskLayer->setName((name + ".mask").c_str());
+
+    // return three dimensional output
+    IShuffleLayer* const outShuffle = network.addShuffle(*maskLayer->getOutput(0));
+    outShuffle->setReshapeDimensions(Dims3{-1, 1, 1});
+    outShuffle->setName((name + ".transpose").c_str());
+
+    return outShuffle;
+}
+
+} // namespace tts
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/attentionLayerCreator.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/attentionLayerCreator.h
@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_COMPOSITELAYERS_H
+#define TT2I_COMPOSITELAYERS_H
+
+#include <string>
+#include <vector>
+
+namespace nvinfer1
+{
+class INetworkDefinition;
+class ITensor;
+class ILayer;
+} // namespace nvinfer1
+
+namespace tts
+{
+
+class LayerData;
+
+class AttentionLayerCreator
+{
+public:
+    /**
+     * @brief Add a location layer to the given network.
+     *
+     * @param network The network to add to.
+     * @param input The input tensor.
+     * @param attentionDim The number of dimensions.
+     * @param numFilters The number of filters
+     * @param kernelSize The size of each kernel.
+     * @param convData The convolution data.
+     * @param linearData The linear data for the fully connected layer.
+     * @param name The name to prefix the layers with.
+     *
+     * @return The last of the newly added layers.
+     */
+    static nvinfer1::ILayer* addLocation(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input,
+        int attentionDim, int numFilters, int kernelSize, const LayerData& convData, const LayerData& linearData,
+        const std::string& name);
+
+    /**
+     * @brief Add an energy layer to the given network.
+     *
+     * @param network The network.
+     * @param input1 The first input to be summed.
+     * @param input2 The second input to be summed.
+     * @param input3 The third input to be summed.
+     * @param linearData The data for the fully connected layer.
+     * @param name The name to prefix layers with.
+     *
+     * @return The last layer of the newly added layers.
+     */
+    static nvinfer1::ILayer* addEnergy(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input1,
+        nvinfer1::ITensor* input2, nvinfer1::ITensor* input3, const LayerData& linearData, const std::string& name);
+
+    /**
+     * @brief Perform a softmax on padded input.
+     *
+     * @param network The network being built.
+     * @param input The padded input.
+     * @param inputMask The mask.
+     * @param inputSegments The length of the input.
+     * @param name The name to prefix the layers with.
+     *
+     * @return The last layer.
+     */
+    static nvinfer1::ILayer* addPaddedSoftMax(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input,
+        nvinfer1::ITensor* inputMask, nvinfer1::ITensor* inputSegments, const std::string& name);
+};
+
+} // namespace tts
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/convBatchNormCreator.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/convBatchNormCreator.cpp
@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "convBatchNormCreator.h"
+#include "layerData.h"
+#include "trtUtils.h"
+
+#include "NvInfer.h"
+
+#include <cmath>
+#include <stdexcept>
+
+using namespace nvinfer1;
+
+namespace tts
+{
+
+/******************************************************************************
+ * CONSTANTS ******************************************************************
+ *****************************************************************************/
+
+namespace
+{
+constexpr const float EPS = 1e-5f;
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+ILayer* ConvBatchNormCreator::add(INetworkDefinition& network, ITensor* const input, const LayerData& convData,
+    const LayerData& normData, const std::string& activation, const std::string& name)
+{
+    // base the number of channels based on the output size of the batch norm
+    const int numChannels = static_cast<int>(normData.get("bias").count);
+
+    // CONVOLUTION //////////////////////////////////////////////////////////////
+
+    const std::vector<float>& convWeight = newVector(static_cast<const float*>(convData.get("weight").values),
+        static_cast<const float*>(convData.get("weight").values) + convData.get("weight").count);
+    const std::vector<float>& convBias = newVector(static_cast<const float*>(convData.get("bias").values),
+        static_cast<const float*>(convData.get("bias").values) + convData.get("bias").count);
+    #if NV_TENSORRT_MAJOR < 7 
+    IConvolutionLayer* const convLayer = network.addConvolution(
+        *input, numChannels, DimsHW(5, 1), TRTUtils::toWeights(convWeight), TRTUtils::toWeights(convBias));
+    convLayer->setPadding({2, 0});
+    #else
+    IConvolutionLayer* const convLayer = network.addConvolutionNd(
+        *input, numChannels, Dims2(5, 1), TRTUtils::toWeights(convWeight), TRTUtils::toWeights(convBias));
+    convLayer->setPaddingNd(Dims2(2, 0));
+    #endif
+    convLayer->setName((name + ".conv_layer").c_str());
+
+    ITensor* const batchInput = convLayer->getOutput(0);
+
+    // BATCH NORM ///////////////////////////////////////////////////////////////
+
+    // create vectors
+    std::vector<float>& negativeMeanWeights = newVector(static_cast<const float*>(normData.get("running_mean").values),
+        static_cast<const float*>(normData.get("running_mean").values) + normData.get("running_mean").count);
+    std::vector<float>& scaleWeights = newVector(static_cast<const float*>(normData.get("weight").values),
+        static_cast<const float*>(normData.get("weight").values) + normData.get("weight").count);
+    const std::vector<float>& normBias = newVector(static_cast<const float*>(normData.get("bias").values),
+        static_cast<const float*>(normData.get("bias").values) + normData.get("bias").count);
+
+    const Weights emptyWeights{DataType::kFLOAT, nullptr, 0};
+
+    // check input
+    if (negativeMeanWeights.size() != scaleWeights.size())
+    {
+        throw std::runtime_error("Mismatch between 'running_mean' and 'weight' sizes: "
+            + std::to_string(negativeMeanWeights.size()) + " " + std::to_string(scaleWeights.size()) + ".");
+    }
+    if (static_cast<size_t>(normData.get("running_var").count) != scaleWeights.size())
+    {
+        throw std::runtime_error("Size of 'running_var' does not match 'running_mean':"
+            + std::to_string(normData.get("running_var").count) + " vs. " + std::to_string(scaleWeights.size()));
+    }
+
+    // create negative mean values
+    for (float& val : negativeMeanWeights)
+    {
+        val = -val;
+    }
+
+    // compute scaling matrix
+    // weight / sqrt(var(x) + eps)
+    const float* varWeights = static_cast<const float*>(normData.get("running_var").values);
+    for (size_t i = 0; i < scaleWeights.size(); ++i)
+    {
+        const float den = std::sqrt(varWeights[i] + EPS);
+        scaleWeights[i] /= den;
+    }
+
+    // x - mean(x)
+    ILayer* const shiftedLayer = network.addScale(
+        *batchInput, ScaleMode::kCHANNEL, TRTUtils::toWeights(negativeMeanWeights), emptyWeights, emptyWeights);
+    shiftedLayer->setName((name + ".shift").c_str());
+
+    // ((x - mean(x)) / sqrt(var(x) + eps)) * weight + bias
+    ILayer* const scaleLayer = network.addScale(*shiftedLayer->getOutput(0), ScaleMode::kCHANNEL,
+        TRTUtils::toWeights(normBias), TRTUtils::toWeights(scaleWeights), emptyWeights);
+    scaleLayer->setName((name + ".scale").c_str());
+
+    ITensor* const actInput = scaleLayer->getOutput(0);
+
+    // ACTIVATION ///////////////////////////////////////////////////////////////
+
+    ILayer* outputLayer;
+
+    if (activation == "relu")
+    {
+        outputLayer = network.addActivation(*actInput, ActivationType::kRELU);
+        outputLayer->setName((name + ".relu").c_str());
+    }
+    else if (activation == "tanh")
+    {
+        outputLayer = network.addActivation(*actInput, ActivationType::kTANH);
+        outputLayer->setName((name + ".tanh").c_str());
+    }
+    else if (activation == "none")
+    {
+        outputLayer = scaleLayer;
+    }
+    else
+    {
+        throw std::runtime_error("Unknown activation '" + activation + "'.");
+    }
+
+    return outputLayer;
+}
+
+/******************************************************************************
+ * PRIVATE METHODS ************************************************************
+ *****************************************************************************/
+
+std::vector<float>& ConvBatchNormCreator::newVector(const float* const begin, const float* const end)
+{
+    mData.emplace_back(new std::vector<float>(begin, end));
+
+    return *mData.back().get();
+}
+
+} // namespace tts
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/convBatchNormCreator.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/convBatchNormCreator.h
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_CONVBATCHNORMCREATOR_H
+#define TT2I_CONVBATCHNORMCREATOR_H
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace nvinfer1
+{
+class INetworkDefinition;
+class ILayer;
+class ITensor;
+} // namespace nvinfer1
+
+namespace tts
+{
+
+class LayerData;
+
+class ConvBatchNormCreator
+{
+public:
+    /**
+     * @brief Add a 1d-convolution plus batch normalization followed by
+     * activation to the network,
+     * where the convolution has kernel size of 5, and padding 2 (to preserve
+     * shape).
+     * ```
+     * y = conv(x)
+     * z = ( (y-Mean[y]) / sqrt(Var[y]+eps) ) * weight + bias
+     * ```
+     *
+     * WARNING: This sets pointers from the network to this object's members,
+     * and so this object must not be destroyed or moved while until after the
+     * lifetime of the network has ended.
+     *
+     * @param network The network to add to.
+     * @param input The input tensor.
+     * @param convData The LayerData object that has `weight` and `bias` for the
+     * convolution.
+     * @param normData The LayerData object that has `running_mean`,
+     * `running_var`, `weight`, and `bias` entries for the batch norm.
+     * @param activation May be "relu", "tanh", or "none".
+     * @param name The name to prefix the layers with.
+     *
+     * @return The last of the newly added layers.
+     */
+    nvinfer1::ILayer* add(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input, const LayerData& convData,
+        const LayerData& normData, const std::string& activation, const std::string& name);
+
+private:
+    std::vector<std::unique_ptr<std::vector<float>>> mData{};
+
+    /**
+     * @brief Create a new vector to be stored inside of this object.
+     *
+     * @param begin The starting iterator to initialize with.
+     * @param end The ending iterator to initialize with.
+     *
+     * @return The vector.
+     */
+    std::vector<float>& newVector(const float* begin, const float* end);
+};
+
+} // namespace tts
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/lstm.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/lstm.cpp
@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "lstm.h"
+
+#include "NvInfer.h"
+
+using namespace nvinfer1;
+
+namespace tts
+{
+
+/******************************************************************************
+ * PUBLIC STATIC METHODS ******************************************************
+ *****************************************************************************/
+
+ILayer* LSTM::addPaddedBidirectional(INetworkDefinition* const network, ITensor* const input,
+    ITensor* const inputLength, const int numDimensions, const LayerData& lstmData)
+{
+    // build LSTM
+    const int hiddenSize = numDimensions / 2;
+    IRNNv2Layer* lstm = network->addRNNv2(*input, 1, hiddenSize, input->getDimensions().d[1], RNNOperation::kLSTM);
+    lstm->setDirection(RNNDirection::kBIDIRECTION);
+    lstm->setSequenceLengths(*inputLength);
+
+    {
+        const int64_t inputBlockSize = numDimensions * hiddenSize;
+
+        // pytorch weights are stored in "weight_ih_l0" = {W_ii|W_if|W_ig|W_io}
+        const float* inputWeights = (const float*) lstmData.get("weight_ih_l0").values;
+        Weights wii{DataType::kFLOAT, (void*) (inputWeights), inputBlockSize};
+        Weights wif{DataType::kFLOAT, (void*) (inputWeights + inputBlockSize), inputBlockSize};
+        Weights wig{DataType::kFLOAT, (void*) (inputWeights + 2 * inputBlockSize), inputBlockSize};
+        Weights wio{DataType::kFLOAT, (void*) (inputWeights + 3 * inputBlockSize), inputBlockSize};
+
+        lstm->setWeightsForGate(0, RNNGateType::kINPUT, true, wii);
+        lstm->setWeightsForGate(0, RNNGateType::kCELL, true, wig);
+        lstm->setWeightsForGate(0, RNNGateType::kFORGET, true, wif);
+        lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, true, wio);
+
+        const float* inputBias = (const float*) lstmData.get("bias_ih_l0").values;
+        Weights bii{DataType::kFLOAT, (void*) (inputBias), hiddenSize};
+        Weights bif{DataType::kFLOAT, (void*) (inputBias + hiddenSize), hiddenSize};
+        Weights big{DataType::kFLOAT, (void*) (inputBias + 2 * hiddenSize), hiddenSize};
+        Weights bio{DataType::kFLOAT, (void*) (inputBias + 3 * hiddenSize), hiddenSize};
+
+        lstm->setBiasForGate(0, RNNGateType::kINPUT, true, bii);
+        lstm->setBiasForGate(0, RNNGateType::kCELL, true, big);
+        lstm->setBiasForGate(0, RNNGateType::kFORGET, true, bif);
+        lstm->setBiasForGate(0, RNNGateType::kOUTPUT, true, bio);
+
+        const int64_t hiddenBlockSize = hiddenSize * hiddenSize;
+
+        // pytorch weights are stored in "weight_hh_l0" = {W_hi|W_hf|W_hg|W_ho}
+        const float* hiddenWeights = (const float*) lstmData.get("weight_hh_l0").values;
+        Weights whi{DataType::kFLOAT, (void*) (hiddenWeights), hiddenBlockSize};
+        Weights whf{DataType::kFLOAT, (void*) (hiddenWeights + hiddenBlockSize), hiddenBlockSize};
+        Weights whg{DataType::kFLOAT, (void*) (hiddenWeights + 2 * hiddenBlockSize), hiddenBlockSize};
+        Weights who{DataType::kFLOAT, (void*) (hiddenWeights + 3 * hiddenBlockSize), hiddenBlockSize};
+
+        lstm->setWeightsForGate(0, RNNGateType::kINPUT, false, whi);
+        lstm->setWeightsForGate(0, RNNGateType::kCELL, false, whg);
+        lstm->setWeightsForGate(0, RNNGateType::kFORGET, false, whf);
+        lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, false, who);
+
+        const float* hiddenBias = (const float*) lstmData.get("bias_hh_l0").values;
+        Weights bhi{DataType::kFLOAT, (void*) (hiddenBias), hiddenSize};
+        Weights bhf{DataType::kFLOAT, (void*) (hiddenBias + hiddenSize), hiddenSize};
+        Weights bhg{DataType::kFLOAT, (void*) (hiddenBias + 2 * hiddenSize), hiddenSize};
+        Weights bho{DataType::kFLOAT, (void*) (hiddenBias + 3 * hiddenSize), hiddenSize};
+
+        lstm->setBiasForGate(0, RNNGateType::kINPUT, false, bhi);
+        lstm->setBiasForGate(0, RNNGateType::kCELL, false, bhg);
+        lstm->setBiasForGate(0, RNNGateType::kFORGET, false, bhf);
+        lstm->setBiasForGate(0, RNNGateType::kOUTPUT, false, bho);
+    }
+
+    {
+        const int64_t inputBlockSize = numDimensions * hiddenSize;
+
+        // pytorch weights are stored in "weight_ih_l0" = {W_ii|W_if|W_ig|W_io}
+        const float* inputWeights = (const float*) lstmData.get("weight_ih_l0_reverse").values;
+        Weights wii{DataType::kFLOAT, (void*) (inputWeights), inputBlockSize};
+        Weights wif{DataType::kFLOAT, (void*) (inputWeights + inputBlockSize), inputBlockSize};
+        Weights wig{DataType::kFLOAT, (void*) (inputWeights + 2 * inputBlockSize), inputBlockSize};
+        Weights wio{DataType::kFLOAT, (void*) (inputWeights + 3 * inputBlockSize), inputBlockSize};
+
+        lstm->setWeightsForGate(1, RNNGateType::kINPUT, true, wii);
+        lstm->setWeightsForGate(1, RNNGateType::kCELL, true, wig);
+        lstm->setWeightsForGate(1, RNNGateType::kFORGET, true, wif);
+        lstm->setWeightsForGate(1, RNNGateType::kOUTPUT, true, wio);
+
+        const float* inputBias = (const float*) lstmData.get("bias_ih_l0_reverse").values;
+        Weights bii{DataType::kFLOAT, (void*) (inputBias), hiddenSize};
+        Weights bif{DataType::kFLOAT, (void*) (inputBias + hiddenSize), hiddenSize};
+        Weights big{DataType::kFLOAT, (void*) (inputBias + 2 * hiddenSize), hiddenSize};
+        Weights bio{DataType::kFLOAT, (void*) (inputBias + 3 * hiddenSize), hiddenSize};
+
+        lstm->setBiasForGate(1, RNNGateType::kINPUT, true, bii);
+        lstm->setBiasForGate(1, RNNGateType::kCELL, true, big);
+        lstm->setBiasForGate(1, RNNGateType::kFORGET, true, bif);
+        lstm->setBiasForGate(1, RNNGateType::kOUTPUT, true, bio);
+
+        const int64_t hiddenBlockSize = hiddenSize * hiddenSize;
+
+        // pytorch weights are stored in "weight_hh_l0" = {W_hi|W_hf|W_hg|W_ho}
+        const float* hiddenWeights = (const float*) lstmData.get("weight_hh_l0_reverse").values;
+        Weights whi{DataType::kFLOAT, (void*) (hiddenWeights), hiddenBlockSize};
+        Weights whf{DataType::kFLOAT, (void*) (hiddenWeights + hiddenBlockSize), hiddenBlockSize};
+        Weights whg{DataType::kFLOAT, (void*) (hiddenWeights + 2 * hiddenBlockSize), hiddenBlockSize};
+        Weights who{DataType::kFLOAT, (void*) (hiddenWeights + 3 * hiddenBlockSize), hiddenBlockSize};
+
+        lstm->setWeightsForGate(1, RNNGateType::kINPUT, false, whi);
+        lstm->setWeightsForGate(1, RNNGateType::kCELL, false, whg);
+        lstm->setWeightsForGate(1, RNNGateType::kFORGET, false, whf);
+        lstm->setWeightsForGate(1, RNNGateType::kOUTPUT, false, who);
+
+        const float* hiddenBias = (const float*) lstmData.get("bias_hh_l0_reverse").values;
+        Weights bhi{DataType::kFLOAT, (void*) (hiddenBias), hiddenSize};
+        Weights bhf{DataType::kFLOAT, (void*) (hiddenBias + hiddenSize), hiddenSize};
+        Weights bhg{DataType::kFLOAT, (void*) (hiddenBias + 2 * hiddenSize), hiddenSize};
+        Weights bho{DataType::kFLOAT, (void*) (hiddenBias + 3 * hiddenSize), hiddenSize};
+
+        lstm->setBiasForGate(1, RNNGateType::kINPUT, false, bhi);
+        lstm->setBiasForGate(1, RNNGateType::kCELL, false, bhg);
+        lstm->setBiasForGate(1, RNNGateType::kFORGET, false, bhf);
+        lstm->setBiasForGate(1, RNNGateType::kOUTPUT, false, bho);
+    }
+
+    return lstm;
+}
+
+ILayer* LSTM::addUnidirectionalCell(INetworkDefinition* const network, ITensor* const input,
+    ITensor* const hiddenStatesIn, ITensor* const cellStatesIn, const int numDimensions, const LayerData& lstmData)
+{
+    // build LSTM
+    const int hiddenSize = numDimensions;
+    const int inputLength = input->getDimensions().d[2];
+    IRNNv2Layer* lstm = network->addRNNv2(*input, 1, hiddenSize, input->getDimensions().d[1], RNNOperation::kLSTM);
+    lstm->setDirection(RNNDirection::kUNIDIRECTION);
+
+    const int64_t inputBlockSize = inputLength * hiddenSize;
+
+    // pytorch weights are stored in "weight_ih" = {W_ii|W_if|W_ig|W_io}
+    const float* inputWeights = (const float*) lstmData.get("weight_ih").values;
+    Weights wii{DataType::kFLOAT, (void*) (inputWeights), inputBlockSize};
+    Weights wif{DataType::kFLOAT, (void*) (inputWeights + inputBlockSize), inputBlockSize};
+    Weights wig{DataType::kFLOAT, (void*) (inputWeights + 2 * inputBlockSize), inputBlockSize};
+    Weights wio{DataType::kFLOAT, (void*) (inputWeights + 3 * inputBlockSize), inputBlockSize};
+
+    lstm->setWeightsForGate(0, RNNGateType::kINPUT, true, wii);
+    lstm->setWeightsForGate(0, RNNGateType::kCELL, true, wig);
+    lstm->setWeightsForGate(0, RNNGateType::kFORGET, true, wif);
+    lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, true, wio);
+
+    const float* inputBias = (const float*) lstmData.get("bias_ih").values;
+    Weights bii{DataType::kFLOAT, (void*) (inputBias), hiddenSize};
+    Weights bif{DataType::kFLOAT, (void*) (inputBias + hiddenSize), hiddenSize};
+    Weights big{DataType::kFLOAT, (void*) (inputBias + 2 * hiddenSize), hiddenSize};
+    Weights bio{DataType::kFLOAT, (void*) (inputBias + 3 * hiddenSize), hiddenSize};
+
+    lstm->setBiasForGate(0, RNNGateType::kINPUT, true, bii);
+    lstm->setBiasForGate(0, RNNGateType::kCELL, true, big);
+    lstm->setBiasForGate(0, RNNGateType::kFORGET, true, bif);
+    lstm->setBiasForGate(0, RNNGateType::kOUTPUT, true, bio);
+
+    const int64_t hiddenBlockSize = hiddenSize * hiddenSize;
+
+    // pytorch weights are stored in "weight_hh" = {W_hi|W_hf|W_hg|W_ho}
+    const float* hiddenWeights = (const float*) lstmData.get("weight_hh").values;
+    Weights whi{DataType::kFLOAT, (void*) (hiddenWeights), hiddenBlockSize};
+    Weights whf{DataType::kFLOAT, (void*) (hiddenWeights + hiddenBlockSize), hiddenBlockSize};
+    Weights whg{DataType::kFLOAT, (void*) (hiddenWeights + 2 * hiddenBlockSize), hiddenBlockSize};
+    Weights who{DataType::kFLOAT, (void*) (hiddenWeights + 3 * hiddenBlockSize), hiddenBlockSize};
+
+    lstm->setWeightsForGate(0, RNNGateType::kINPUT, false, whi);
+    lstm->setWeightsForGate(0, RNNGateType::kCELL, false, whg);
+    lstm->setWeightsForGate(0, RNNGateType::kFORGET, false, whf);
+    lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, false, who);
+
+    const float* hiddenBias = (const float*) lstmData.get("bias_hh").values;
+    Weights bhi{DataType::kFLOAT, (void*) (hiddenBias), hiddenSize};
+    Weights bhf{DataType::kFLOAT, (void*) (hiddenBias + hiddenSize), hiddenSize};
+    Weights bhg{DataType::kFLOAT, (void*) (hiddenBias + 2 * hiddenSize), hiddenSize};
+    Weights bho{DataType::kFLOAT, (void*) (hiddenBias + 3 * hiddenSize), hiddenSize};
+
+    lstm->setBiasForGate(0, RNNGateType::kINPUT, false, bhi);
+    lstm->setBiasForGate(0, RNNGateType::kCELL, false, bhg);
+    lstm->setBiasForGate(0, RNNGateType::kFORGET, false, bhf);
+    lstm->setBiasForGate(0, RNNGateType::kOUTPUT, false, bho);
+
+    lstm->setHiddenState(*hiddenStatesIn);
+    lstm->setCellState(*cellStatesIn);
+
+    return lstm;
+}
+
+} // namespace tts
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/lstm.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/layers/lstm.h
@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_LSTM_H
+#define TT2I_LSTM_H
+
+#include "layerData.h"
+
+namespace nvinfer1
+{
+class INetworkDefinition;
+class ITensor;
+class ILayer;
+} // namespace nvinfer1
+
+namespace tts
+{
+
+class LSTM
+{
+public:
+    /**
+     * @brief Add a new bidirection LSTM layer to the network with padding at the
+     * end of the sequence, and with a number of
+     * hidden layers equal to half the number of output layers.
+     *
+     * @param network The network to add to.
+     * @param input The input tensor.
+     * @param inputLength The length of each input sequence.
+     * @param numDimensions The number of output dimensions of the LSTM.
+     * @param lstmData The LSTM weights (must be in
+     * scope until the network is finished building).
+     * @param name The name to prefix the layers with.
+     *
+     * @return The last of the newly added layrs.
+     */
+    static nvinfer1::ILayer* addPaddedBidirectional(nvinfer1::INetworkDefinition* network, nvinfer1::ITensor* input,
+        nvinfer1::ITensor* inputLength, int numDimensions, const LayerData& lstmData);
+
+    /**
+     * @brief Add a new unidirection LSTM layer to the network, with a number of
+     * hidden layers equal to half the number of output layers.
+     *
+     * @param network The network to add to.
+     * @param input The input tensor.
+     * @param input The input hidden states.
+     * @param input The input cell states.
+     * @param numDimensions The number of output dimensions of the LSTM.
+     * @param lstmData The LSTM weights (must be in
+     * scope until the network is finished building).
+     *
+     * @return The last of the newly added layrs.
+     */
+    static nvinfer1::ILayer* addUnidirectionalCell(nvinfer1::INetworkDefinition* network, nvinfer1::ITensor* input,
+        nvinfer1::ITensor* hiddenStatesIn, nvinfer1::ITensor* cellStatesIn, int numDimensions,
+        const LayerData& lstmData);
+};
+
+} // namespace tts
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/CMakeLists.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/CMakeLists.txt
@ -0,0 +1,18 @@
+#
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+file(GLOB SRCS *.cpp *.cu)
+set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
+set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerKernel.cu
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerKernel.cu
@ -0,0 +1,440 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "taco2AttentionLayerKernel.h"
+#include "taco2Utils.h"
+
+using namespace tts;
+
+namespace nvinfer1
+{
+namespace plugin
+{
+
+/******************************************************************************
+ * CONSTANTS ******************************************************************
+ *****************************************************************************/
+
+namespace
+{
+constexpr const int ENERGY_BLOCK_SIZE = 128;
+constexpr const int CONV_BLOCK_SIZE = 128;
+constexpr const int QUERY_NUM_COLS = 1024;
+constexpr const int QUERY_COL_SIZE = 128;
+constexpr const int WARP_SIZE = 32;
+
+static_assert(QUERY_NUM_COLS % QUERY_COL_SIZE == 0, "QUERY_NUM_COLS must be a multiple of QUERY_COL_SIZE");
+
+} // namespace
+
+const float Taco2AttentionLayerKernel::ONE = 1.0f;
+const float Taco2AttentionLayerKernel::ZERO = 0.0f;
+
+/******************************************************************************
+ * CUDA KERNELS ***************************************************************
+ *****************************************************************************/
+
+template <typename T, int NUM_THREADS>
+__device__ inline T warpSum(T const initVal)
+{
+    constexpr const uint32_t mask = 0xffffffff >> (WARP_SIZE - NUM_THREADS);
+    T val = initVal;
+#pragma unroll
+    for (int d = NUM_THREADS / 2; d > 0; d /= 2)
+    {
+        val += __shfl_down_sync(mask, val, d, NUM_THREADS);
+    }
+
+    return val;
+}
+
+template <typename T, int BLOCK_SIZE>
+__device__ T cooperativeSum(T const initVal, T* const buffer)
+{
+    // first all warps reduce to single value
+    assert(BLOCK_SIZE % WARP_SIZE == 0);
+    assert(BLOCK_SIZE <= WARP_SIZE * WARP_SIZE);
+
+    T val = warpSum<T, WARP_SIZE>(initVal);
+
+    if (BLOCK_SIZE > WARP_SIZE)
+    {
+        if (threadIdx.x % WARP_SIZE == 0)
+        {
+            buffer[threadIdx.x / WARP_SIZE] = val;
+        }
+
+        __syncthreads();
+
+        if (threadIdx.x < (BLOCK_SIZE / WARP_SIZE))
+        {
+            val = warpSum<T, BLOCK_SIZE / WARP_SIZE>(buffer[threadIdx.x]);
+        }
+    }
+
+    return val;
+}
+
+__global__ void attentionQueryGemvKernel(const float* const weights, const float* const input, float* const output,
+    const int inputLength, const int outputLength)
+{
+    __shared__ float shared[QUERY_COL_SIZE];
+
+    assert(gridDim.x == outputLength);
+    assert(inputLength == QUERY_NUM_COLS);
+
+    // perform mat vec
+    float v = 0.0f;
+    for (int col = threadIdx.x; col < QUERY_NUM_COLS; col += QUERY_COL_SIZE)
+    {
+        // load chunk
+        v += input[col] * weights[blockIdx.x * QUERY_NUM_COLS + col];
+    }
+
+    v = cooperativeSum<float, QUERY_COL_SIZE>(v, shared);
+
+    // add bias and write
+    if (threadIdx.x == 0)
+    {
+        output[blockIdx.x] = v;
+    }
+}
+
+__global__ void attentionEnergyKernel(const float* const query, const float* const processedMemory,
+    const float* const location, const float* const weights, const int inputLength, float* const blockSums)
+{
+    // first every thread must load their 'query' cell
+    const float q = query[threadIdx.x];
+
+    // should be 32x128 = 4k
+    __shared__ float summation[ENERGY_BLOCK_SIZE];
+
+    // iterate over rows to create sums and perform tanh
+    const int gIdx = blockIdx.x * ENERGY_BLOCK_SIZE + threadIdx.x;
+    const float v = q + processedMemory[gIdx] + location[gIdx];
+    float val = tanh(v) * weights[threadIdx.x];
+
+    val = cooperativeSum<float, ENERGY_BLOCK_SIZE>(val, summation);
+
+    // perform simplistic reduction
+    if (threadIdx.x == 0)
+    {
+        // write summation back to shared memory
+        blockSums[blockIdx.x] = exp(val);
+    }
+}
+
+__global__ void attentionNormalizeAndSumKernel(
+    const float* const elemAccumsIn, float* const elems, const int numElems, const float* const blockSums)
+{
+    __shared__ float sums[ENERGY_BLOCK_SIZE];
+    __shared__ float invSum;
+
+    // each block sums up the blockSums on its own
+    float v = 0;
+    for (int i = threadIdx.x; i < gridDim.x; i += blockDim.x)
+    {
+        v += blockSums[i];
+    }
+
+    v = cooperativeSum<float, ENERGY_BLOCK_SIZE>(v, sums);
+    if (threadIdx.x == 0)
+    {
+        invSum = 1.0f / v;
+    }
+
+    __syncthreads();
+
+    // normalize and sum
+    float* const elemAccumsOut = elems + numElems;
+    for (int i = threadIdx.x + (blockIdx.x * blockDim.x); i < numElems; i += gridDim.x * blockDim.x)
+    {
+        const float val = blockSums[i] * invSum;
+        elems[i] = val;
+        elemAccumsOut[i] = val + elemAccumsIn[i];
+    }
+}
+
+__global__ void attentionConvolutionKernel(const float* const convWeights, const float* const attWeights,
+    float* const output, const int inputLength, const int kernelSize)
+{
+    __shared__ float kernels[32 * 2];
+    __shared__ float input[(CONV_BLOCK_SIZE + 32) * 2];
+    __shared__ float sum[CONV_BLOCK_SIZE * 2];
+
+    const int halfKernel = (kernelSize - 1) / 2;
+    const int inputOffset = 32 - halfKernel;
+
+    // all threads work to populate the shared memory kernels
+    if (threadIdx.x < kernelSize)
+    {
+        kernels[threadIdx.x + threadIdx.y * 32]
+            = convWeights[blockIdx.x * (kernelSize * 2) + (threadIdx.x + threadIdx.y * kernelSize)];
+    }
+
+    // set initial input zero for second half
+    if (threadIdx.x < 32)
+    {
+        if (threadIdx.x < halfKernel || threadIdx.x - halfKernel >= inputLength)
+        {
+            input[CONV_BLOCK_SIZE + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)] = 0;
+        }
+        else
+        {
+            input[CONV_BLOCK_SIZE + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)]
+                = attWeights[threadIdx.x - halfKernel + threadIdx.y * inputLength];
+        }
+    }
+    __syncthreads();
+
+    for (int i = 0; i < inputLength; i += CONV_BLOCK_SIZE)
+    {
+        // shift second half into first half
+        if (threadIdx.x < 32)
+        {
+            input[threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)]
+                = input[CONV_BLOCK_SIZE + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)];
+        }
+        __syncthreads();
+
+        // copy in second half
+        float v = 0;
+        if (i + threadIdx.x + inputOffset < inputLength)
+        {
+            v = attWeights[i + threadIdx.x + inputOffset + threadIdx.y * inputLength];
+        }
+        input[32 + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)] = v;
+
+        __syncthreads();
+
+        // multiply with kernel
+        float a = 0.0f;
+        for (int j = 0; j < kernelSize; ++j)
+        {
+            const int k = threadIdx.x + j + threadIdx.y * (CONV_BLOCK_SIZE + 32);
+            a += input[k] * kernels[j + threadIdx.y * 32];
+        }
+
+        sum[threadIdx.x + threadIdx.y * CONV_BLOCK_SIZE] = a;
+
+        __syncthreads();
+
+        // write to global memory
+        if (threadIdx.y == 0 && threadIdx.x + i < inputLength)
+        {
+            output[(blockIdx.x * inputLength) + i + threadIdx.x]
+                = sum[threadIdx.x] + sum[threadIdx.x + CONV_BLOCK_SIZE];
+        }
+    }
+}
+
+/******************************************************************************
+ * CONSTRUCTORS / DESTRUCTOR **************************************************
+ *****************************************************************************/
+
+Taco2AttentionLayerKernel::Taco2AttentionLayerKernel(
+    const std::vector<float>& queryWeightsHost,
+    const std::vector<float>& convWeightsHost,
+    const std::vector<float>& locationWeightsHost,
+    const std::vector<float>& energyWeightsHost,
+    const int encLength,
+    const int numQueryDimension,
+    const int numFilters,
+    const int convKernelSize,
+    const int numAttentionDimension) :
+    mNumEncodingDimension(encLength),
+    mNumQueryDimension(numQueryDimension),
+    mNumFilters(numFilters),
+    mConvKernelSize(convKernelSize),
+    mNumAttentionDimension(numAttentionDimension),
+    mQueryWeightsDevice(),
+    mConvWeightsDevice(),
+    mLocationWeightsDevice(),
+    mEnergyWeightsDevice(),
+    mCublasHandle{}
+{
+    const size_t numExpectedQueryWeights = mNumAttentionDimension * mNumQueryDimension;
+    const size_t numExpectedConvWeights = mNumFilters * mConvKernelSize * 2;
+    const size_t numExpectedLocationWeights = mNumAttentionDimension * mNumFilters;
+    const size_t numExpectedEnergyWeights = mNumAttentionDimension;
+
+    if (queryWeightsHost.size() != numExpectedQueryWeights)
+    {
+        throw std::runtime_error("Expected " + std::to_string(numExpectedQueryWeights) + " query weights but got "
+            + std::to_string(queryWeightsHost.size()) + " instead.");
+    }
+    else if (convWeightsHost.size() != numExpectedConvWeights)
+    {
+        throw std::runtime_error("Expected " + std::to_string(numExpectedConvWeights) + " convolution weights but got "
+            + std::to_string(convWeightsHost.size()) + " instead.");
+    }
+    else if (locationWeightsHost.size() != numExpectedLocationWeights)
+    {
+        throw std::runtime_error("Expected " + std::to_string(numExpectedLocationWeights) + " location weights but got "
+            + std::to_string(locationWeightsHost.size()) + " instead.");
+    }
+    else if (energyWeightsHost.size() != numExpectedEnergyWeights)
+    {
+        throw std::runtime_error("Expected " + std::to_string(numExpectedEnergyWeights) + " energy weights but got "
+            + std::to_string(energyWeightsHost.size()) + " instead.");
+    }
+
+    // copy up weights to GPU
+
+    // keep in row major [128x1024]
+    mQueryWeightsDevice = CudaMemory<float>(queryWeightsHost);
+
+    // convolution has [32x2x31] weights (filters x kernel size).
+    mConvWeightsDevice = CudaMemory<float>(convWeightsHost);
+
+    // transpose from column major [32x128] to column major [128x32]
+    std::vector<float> transLocationWeights(locationWeightsHost.size());
+    for (int j = 0; j < mNumAttentionDimension; ++j)
+    {
+        for (int i = 0; i < mNumFilters; ++i)
+        {
+            transLocationWeights[i * mNumAttentionDimension + j] = locationWeightsHost[j * mNumFilters + i];
+        }
+    }
+    mLocationWeightsDevice = CudaMemory<float>(transLocationWeights);
+
+    // energy FC is [1x128]
+    mEnergyWeightsDevice = CudaMemory<float>(energyWeightsHost);
+
+    // initialize cublas
+    if (cublasCreate(&mCublasHandle) != CUBLAS_STATUS_SUCCESS)
+    {
+        throw std::runtime_error("Failed to create cublas handle.");
+    }
+}
+
+Taco2AttentionLayerKernel::~Taco2AttentionLayerKernel()
+{
+    cublasDestroy(mCublasHandle);
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+void Taco2AttentionLayerKernel::execute(const float* const memoryDevice, const float* const processedMemoryDevice,
+    const float* const weightsDevice, const float* const attentionHiddenDevice, float* const outputContextDevice,
+    float* const outputWeightsDevice, const int inputLength, float* const workspace, cudaStream_t stream)
+{
+    float* const queryOutput = workspace;
+    float* const convOutput = queryOutput + mNumAttentionDimension;
+    float* const elemSum = convOutput + (inputLength * mNumFilters);
+    float* const energyScratch = elemSum + (inputLength * mNumAttentionDimension);
+
+    cublasSetStream(mCublasHandle, stream);
+
+    // launch fully connected layer to parse LSTM hidden states -
+    // multiplying 128x1024 weights with 1024 inputs, to get 128 outputs
+    {
+        const dim3 grid(mNumAttentionDimension);
+        const dim3 block(QUERY_COL_SIZE);
+
+        attentionQueryGemvKernel<<<grid, block, 0, stream>>>(
+            mQueryWeightsDevice.data(),
+            attentionHiddenDevice,
+            queryOutput,
+            mNumQueryDimension,
+            mNumAttentionDimension);
+    }
+
+    // perform convolution
+    {
+        const dim3 grid(mNumFilters);
+        const dim3 block(CONV_BLOCK_SIZE, 2);
+
+        // only works for 2 channels
+        assert(mConvKernelSize <= CONV_BLOCK_SIZE);
+
+        attentionConvolutionKernel<<<grid, block, 0, stream>>>(
+            mConvWeightsDevice.data(),
+            weightsDevice,
+            convOutput,
+            inputLength,
+            mConvKernelSize);
+    }
+
+    // location linear layer - 128x128x32
+    cublasStatus_t err = cublasSgemm(
+        mCublasHandle,
+        CUBLAS_OP_N,
+        CUBLAS_OP_T,
+        mNumAttentionDimension,
+        inputLength,
+        mNumFilters,
+        &ONE,
+        mLocationWeightsDevice.data(),
+        mNumAttentionDimension,
+        convOutput,
+        inputLength,
+        &ZERO,
+        elemSum,
+        mNumAttentionDimension);
+
+    if (err != CUBLAS_STATUS_SUCCESS)
+    {
+        throw std::runtime_error("Location layer failed in cublas.");
+    }
+
+    // perform energy calculation
+    {
+        const int numBlocks = inputLength;
+
+        if (ENERGY_BLOCK_SIZE != mNumAttentionDimension)
+        {
+            throw std::runtime_error("mNumAttentionDimension must be " + std::to_string(ENERGY_BLOCK_SIZE));
+        }
+
+        const dim3 grid(numBlocks);
+        const dim3 block(ENERGY_BLOCK_SIZE);
+
+        attentionEnergyKernel<<<grid, block, 0, stream>>>(
+            queryOutput,
+            processedMemoryDevice,
+            elemSum,
+            mEnergyWeightsDevice.data(),
+            inputLength,
+            energyScratch);
+
+        attentionNormalizeAndSumKernel<<<grid, block, 0, stream>>>(
+            weightsDevice + inputLength, outputWeightsDevice, inputLength, energyScratch);
+    }
+
+    // finally perform mmLayer
+    err = cublasSgemv(mCublasHandle, CUBLAS_OP_N, mNumEncodingDimension, inputLength, &ONE, memoryDevice,
+        mNumEncodingDimension, outputWeightsDevice, 1, &ZERO, outputContextDevice, 1);
+    if (err != CUBLAS_STATUS_SUCCESS)
+    {
+        throw std::runtime_error("Matrix multiply layer failed in cublas.");
+    }
+}
+
+} // namespace plugin
+} // namespace nvinfer1
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerKernel.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerKernel.h
@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_ATTENTIONLAYERKERNEL_H
+#define TT2I_ATTENTIONLAYERKERNEL_H
+
+#include "cudaMemory.h"
+#include "cuda_runtime.h"
+
+#include "cublas_v2.h"
+#include <vector>
+
+namespace nvinfer1
+{
+namespace plugin
+{
+
+class Taco2AttentionLayerKernel
+{
+public:
+    /**
+     * @brief Create a new Taco2AttentionLayerKernel.
+     *
+     * @param queryWeights The query weights.
+     * @param convWeights The convolution weights.
+     * @param locationWeights The location weights.
+     * @param energyWeights The energy weights.
+     * @param encLength The encoding length.
+     * @param queryDimension The number of query dimensions.
+     * @param numFilters The number of convolution filters.
+     * @param convKernelSize The convolution kernel size.
+     * @param attDimension The number of attention dimensions.
+     */
+    Taco2AttentionLayerKernel(const std::vector<float>& queryWeights, const std::vector<float>& convWeights,
+        const std::vector<float>& locationWeights, const std::vector<float>& energyWeights, int encLength,
+        int queryDimension, int numFilters, int convKernelSize, int attDimension);
+
+    // delete copy constructor and operator
+    Taco2AttentionLayerKernel(const Taco2AttentionLayerKernel& other) = delete;
+    Taco2AttentionLayerKernel& operator=(const Taco2AttentionLayerKernel& other) = delete;
+
+    /**
+     * @brief Destructor.
+     */
+    ~Taco2AttentionLayerKernel();
+
+    /**
+     * @brief Execute this kernel.
+     *
+     * @param memoryDevice The "Memory" tensor on the device.
+     * @param processedMemoryDevice The "Processed Memory" tensor on the
+     * device.
+     * @param weightsDevice The "Weights" tensor for input on the device.
+     * @param attentionHiddenDevice The hidden states from the attention LSTM
+     * on the device.
+     * @param outputContextDevice The attention context on the device to write
+     * to.
+     * @param outputWeightsDevice The "Weights" tensor to use as output.
+     * @param inputLength The length of the input to process (number chars).
+     * @param workspace The workspace.
+     * @param stream The stream to operate on.
+     */
+    void execute(const float* memoryDevice, const float* processedMemoryDevice, const float* weightsDevice,
+        const float* attentionHiddenDevice, float* const outputContextDevice, float* const outputWeightsDevice,
+        const int inputLength, float* const workspace, cudaStream_t stream);
+
+private:
+    static const float ONE;
+    static const float ZERO;
+
+    int mNumEncodingDimension;
+    int mNumQueryDimension;
+    int mNumFilters;
+    int mConvKernelSize;
+    int mNumAttentionDimension;
+
+    tts::CudaMemory<float> mQueryWeightsDevice;
+    tts::CudaMemory<float> mConvWeightsDevice;
+    tts::CudaMemory<float> mLocationWeightsDevice;
+    tts::CudaMemory<float> mEnergyWeightsDevice;
+
+    cublasHandle_t mCublasHandle;
+};
+
+} // namespace plugin
+} // namespace nvinfer1
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPlugin.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPlugin.cpp
@ -0,0 +1,483 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "taco2AttentionLayerPlugin.h"
+#include "taco2AttentionLayerKernel.h"
+#include "taco2Utils.h"
+
+#include <cassert>
+#include <cstdlib>
+#include <cstring>
+#include <cuda_runtime.h> // cudaError_t
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+
+using namespace nvinfer1;
+
+namespace nvinfer1
+{
+namespace plugin
+{
+
+using value_type = Taco2AttentionLayerPlugin::value_type;
+
+/******************************************************************************
+ * CONSTANTS ******************************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+constexpr const char* const PLUGIN_NAME = "Taco2Attention";
+constexpr const char* const PLUGIN_VERSION = "0.1.0";
+
+} // namespace
+
+/******************************************************************************
+ * HELPER FUNCTIONS ***********************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+std::vector<value_type> toVector(const Weights& weights)
+{
+    if (weights.type != DataType::kFLOAT)
+    {
+        throw std::runtime_error(
+            "Invalid data type for Attention weights: " + std::to_string(static_cast<int>(weights.type)));
+    }
+    const value_type* const valuesBegin = static_cast<const value_type*>(weights.values);
+    const value_type* const valuesEnd = valuesBegin + weights.count;
+    return std::vector<value_type>(valuesBegin, valuesEnd);
+}
+
+const void* offset(const void* ptr, const size_t offset)
+{
+    return reinterpret_cast<const void*>(static_cast<const uint8_t*>(ptr) + offset);
+}
+
+} // namespace
+
+/******************************************************************************
+ * STATIC METHODS *************************************************************
+ *****************************************************************************/
+
+const char* Taco2AttentionLayerPlugin::getName()
+{
+    return PLUGIN_NAME;
+}
+
+const char* Taco2AttentionLayerPlugin::getVersion()
+{
+    return PLUGIN_VERSION;
+}
+
+Taco2AttentionLayerPlugin Taco2AttentionLayerPlugin::deserialize(const void* const data, const size_t length)
+{
+    static constexpr const size_t numDims = 5;
+    if (length < numDims * sizeof(int32_t))
+    {
+        throw std::runtime_error("Invalid serialized size: " + std::to_string(length));
+    }
+
+    const int numEncodingDimension = static_cast<const int32_t*>(data)[0];
+    const int numQueryDimension = static_cast<const int32_t*>(data)[1];
+    const int numFilters = static_cast<const int32_t*>(data)[2];
+    const int convKernelSize = static_cast<const int32_t*>(data)[3];
+    const int numAttentionDimension = static_cast<const int32_t*>(data)[4];
+
+    const int numQueryWeights = numQueryDimension * numAttentionDimension;
+    const int numConvWeights = numFilters * 2 * convKernelSize;
+    const int numLocationWeights = numFilters * numAttentionDimension;
+    const int numEnergyWeights = numAttentionDimension;
+
+    const size_t reqSize = numDims * sizeof(int32_t)
+        + sizeof(value_type) * (numQueryWeights + numConvWeights + numLocationWeights + numEnergyWeights);
+    if (reqSize != length)
+    {
+        throw std::runtime_error(
+            "Invalid serialized size: " + std::to_string(length) + " / " + std::to_string(reqSize));
+    }
+
+    const Weights queryWeights{DataType::kFLOAT, offset(data, numDims * sizeof(int32_t)), numQueryWeights};
+    const Weights convWeights{
+        DataType::kFLOAT, offset(queryWeights.values, sizeof(value_type) * numQueryWeights), numConvWeights};
+    const Weights locationWeights{
+        DataType::kFLOAT, offset(convWeights.values, sizeof(value_type) * numConvWeights), numLocationWeights};
+    const Weights energyWeights{
+        DataType::kFLOAT, offset(locationWeights.values, sizeof(value_type) * numLocationWeights), numEnergyWeights};
+
+    return Taco2AttentionLayerPlugin(numEncodingDimension, numQueryDimension, numFilters, convKernelSize,
+        numAttentionDimension, queryWeights, convWeights, locationWeights, energyWeights);
+}
+
+/******************************************************************************
+ * CONSTRUCTORS / DESTRUCTOR **************************************************
+ *****************************************************************************/
+
+Taco2AttentionLayerPlugin::Taco2AttentionLayerPlugin(int encDimension, int queryDimension, int numFilters,
+    int convKernelSize, int attDimension, const nvinfer1::Weights& queryWeights, const nvinfer1::Weights& convWeights,
+    const nvinfer1::Weights& locationWeights, const nvinfer1::Weights& energyWeights)
+    : mNumEncodingDimension(encDimension)
+    , mNumQueryDimension(queryDimension)
+    , mNumFilters(numFilters)
+    , mConvKernelSize(convKernelSize)
+    , mNumAttentionDimension(attDimension)
+    , mQueryWeightsHost(toVector(queryWeights))
+    , mConvWeightsHost(toVector(convWeights))
+    , mLocationWeightsHost(toVector(locationWeights))
+    , mEnergyWeightsHost(toVector(energyWeights))
+    , mKernel(nullptr)
+    , mNamespace()
+{
+    const size_t expectedQueryWeights = mNumQueryDimension * mNumAttentionDimension;
+    const size_t expectedConvWeights = mNumFilters * mConvKernelSize * 2;
+    const size_t expectedLocationWeights = mNumFilters * mNumAttentionDimension;
+    const size_t expectedEnergyWeights = mNumAttentionDimension;
+
+    if (mQueryWeightsHost.size() != expectedQueryWeights)
+    {
+        throw std::runtime_error("Attention expected " + std::to_string(expectedQueryWeights)
+            + " query weights but given " + std::to_string(mQueryWeightsHost.size()));
+    }
+    if (mConvWeightsHost.size() != expectedConvWeights)
+    {
+        throw std::runtime_error("Attention expected " + std::to_string(expectedConvWeights)
+            + " conv weights but given " + std::to_string(mConvWeightsHost.size()));
+    }
+    if (mLocationWeightsHost.size() != expectedLocationWeights)
+    {
+        throw std::runtime_error("Attention expected " + std::to_string(expectedLocationWeights)
+            + " location weights but given " + std::to_string(mLocationWeightsHost.size()));
+    }
+    if (mEnergyWeightsHost.size() != expectedEnergyWeights)
+    {
+        throw std::runtime_error("Attention expected " + std::to_string(expectedEnergyWeights)
+            + " energy weights but given " + std::to_string(mEnergyWeightsHost.size()));
+    }
+}
+
+Taco2AttentionLayerPlugin::Taco2AttentionLayerPlugin(Taco2AttentionLayerPlugin&& other)
+    : mNumEncodingDimension(other.mNumEncodingDimension)
+    , mNumQueryDimension(other.mNumQueryDimension)
+    , mNumFilters(other.mNumFilters)
+    , mConvKernelSize(other.mConvKernelSize)
+    , mNumAttentionDimension(other.mNumAttentionDimension)
+    , mQueryWeightsHost(std::move(other.mQueryWeightsHost))
+    , mConvWeightsHost(std::move(other.mConvWeightsHost))
+    , mLocationWeightsHost(std::move(other.mLocationWeightsHost))
+    , mEnergyWeightsHost(std::move(other.mEnergyWeightsHost))
+    , mKernel(std::move(other.mKernel))
+    , mNamespace(std::move(other.mNamespace))
+{
+    other.mNumEncodingDimension = 0;
+    other.mNumQueryDimension = 0;
+    other.mNumFilters = 0;
+    other.mConvKernelSize = 0;
+    other.mNumAttentionDimension = 0;
+}
+
+Taco2AttentionLayerPlugin::~Taco2AttentionLayerPlugin()
+{
+    destroy();
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+Taco2AttentionLayerPlugin& Taco2AttentionLayerPlugin::operator=(Taco2AttentionLayerPlugin&& other)
+{
+    // defere to constructor
+    *this = Taco2AttentionLayerPlugin(std::move(other));
+
+    return *this;
+}
+
+DataType Taco2AttentionLayerPlugin::getOutputDataType(
+    const int /* index */, const DataType* const /* inputTypes */, const int /* nbInputs */) const
+{
+    return DataType::kFLOAT;
+}
+
+const char* Taco2AttentionLayerPlugin::getPluginType() const
+{
+    return getName();
+}
+
+const char* Taco2AttentionLayerPlugin::getPluginVersion() const
+{
+    return getVersion();
+}
+
+int Taco2AttentionLayerPlugin::getNbOutputs() const
+{
+    return 2;
+}
+
+DimsExprs Taco2AttentionLayerPlugin::getOutputDimensions(
+    const int outputIndex, const DimsExprs* inputs, const int nbInputs, IExprBuilder& exprBuilder)
+{
+    if (outputIndex >= getNbOutputs())
+    {
+        throw std::runtime_error(
+            "Invalid output index: " + std::to_string(outputIndex) + " / " + std::to_string(getNbOutputs()) + ".");
+    }
+
+    if (nbInputs != NUM_INPUTS)
+    {
+        throw std::runtime_error(
+            "Can only handle " + std::to_string(NUM_INPUTS) + " input tensors: " + std::to_string(nbInputs));
+    }
+
+    if (outputIndex == CONTEXT_OUTPUT)
+    {
+        return DimsExprs{
+            3, {inputs[MEMORY_INDEX].d[0], exprBuilder.constant(1), exprBuilder.constant(mNumEncodingDimension)}};
+    }
+    else if (outputIndex == WEIGHT_OUTPUT)
+    {
+        return DimsExprs{3, {inputs[MEMORY_INDEX].d[0], exprBuilder.constant(2), inputs[MEMORY_INDEX].d[1]}};
+    }
+    else
+    {
+        throw std::runtime_error("Unknown output index: " + std::to_string(outputIndex));
+    }
+}
+
+bool Taco2AttentionLayerPlugin::supportsFormatCombination(
+    const int pos, const PluginTensorDesc* const inOut, const int /* nbInputs */, const int /* nbOutputs */)
+{
+    return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
+}
+
+void Taco2AttentionLayerPlugin::configurePlugin(const DynamicPluginTensorDesc* const in, const int nbInputs,
+    const DynamicPluginTensorDesc* const out, const int nbOutputs)
+{
+    if (nbInputs != NUM_INPUTS)
+    {
+        throw std::runtime_error(
+            "Can only handle " + std::to_string(NUM_INPUTS) + " input tensors: " + std::to_string(nbInputs));
+    }
+
+    for (int i = 0; i < nbInputs; ++i)
+    {
+        if (in[i].desc.type != DataType::kFLOAT)
+        {
+            throw std::runtime_error("Only FLOAT supported as input " + std::to_string(i) + " : "
+                + std::to_string(static_cast<int>(in[i].desc.type)));
+        }
+    }
+
+    // assert dimensions
+    if (in[MEMORY_INDEX].desc.dims.d[2] != mNumEncodingDimension)
+    {
+        throw std::runtime_error("Memory input must be L x " + std::to_string(mNumEncodingDimension) + " but got "
+            + taco2::Taco2Utils::dimsToString(in[MEMORY_INDEX].desc.dims));
+    }
+
+    if (in[PROCESSED_MEMORY_INDEX].desc.dims.d[2] != mNumAttentionDimension)
+    {
+        throw std::runtime_error("Processed Memory input must be L x " + std::to_string(mNumAttentionDimension)
+            + " but got " + taco2::Taco2Utils::dimsToString(in[PROCESSED_MEMORY_INDEX].desc.dims));
+    }
+    if (in[WEIGHT_INDEX].desc.dims.d[1] != 2)
+    {
+        throw std::runtime_error(
+            "Weights input must be 2 x L but got " + taco2::Taco2Utils::dimsToString(in[WEIGHT_INDEX].desc.dims));
+    }
+
+    if (taco2::Taco2Utils::getDimensionsSize(in[ATTENTION_HIDDEN_INDEX].desc.dims)
+        != static_cast<size_t>(mNumQueryDimension))
+    {
+        throw std::runtime_error("Attention hidden input must be " + std::to_string(mNumQueryDimension) + " but got "
+            + taco2::Taco2Utils::dimsToString(in[ATTENTION_HIDDEN_INDEX].desc.dims) + " ("
+            + std::to_string(taco2::Taco2Utils::getDimensionsSize(in[ATTENTION_HIDDEN_INDEX].desc.dims)) + ").");
+    }
+
+    if (nbOutputs != NUM_OUTPUTS)
+    {
+        throw std::runtime_error("Only two outputs is implemented: " + std::to_string(nbOutputs));
+    }
+    for (int i = 0; i < nbOutputs; ++i)
+    {
+        if (out[i].desc.type != DataType::kFLOAT)
+        {
+            throw std::runtime_error("Only FLOAT supported as output: " + std::to_string(i) + " : "
+                + std::to_string(static_cast<int>(out[i].desc.type)));
+        }
+    }
+}
+
+int Taco2AttentionLayerPlugin::initialize()
+{
+    try
+    {
+        mKernel.reset(
+            new Taco2AttentionLayerKernel(mQueryWeightsHost, mConvWeightsHost, mLocationWeightsHost, mEnergyWeightsHost,
+                mNumEncodingDimension, mNumQueryDimension, mNumFilters, mConvKernelSize, mNumAttentionDimension));
+    }
+    catch (const std::exception& e)
+    {
+        std::cerr << "Taco2AttentionLayerPlugin initialization failed: " << e.what() << std::endl;
+        return 1;
+    }
+
+    return 0;
+}
+
+void Taco2AttentionLayerPlugin::terminate()
+{
+    mKernel.reset();
+}
+
+size_t Taco2AttentionLayerPlugin::getWorkspaceSize(
+    const PluginTensorDesc* const in, const int nbInputs, const PluginTensorDesc* const /* out */, const int /* nbOutputs */) const
+{
+    if (nbInputs != NUM_INPUTS) {
+      throw std::runtime_error("Invalid number of inputs: " +
+          std::to_string(nbInputs) + ", but expected " + std::to_string(NUM_INPUTS));
+    }
+
+    const int inputLength = in[MEMORY_INDEX].dims.d[1];
+    const int batchSize = in[MEMORY_INDEX].dims.d[0];
+
+    // space for queryOutput (num attention dimensions),
+    // convOutput (input length*num filters), elemSum (input length), and
+    // energyScratch (inputLength).
+    return sizeof(value_type) * batchSize * (mNumAttentionDimension + (inputLength * mNumFilters) + 2 * inputLength);
+}
+
+int Taco2AttentionLayerPlugin::enqueue(const PluginTensorDesc* const inputDesc,
+const PluginTensorDesc* /* outputDesc */,
+    const void* const* const inputs, void* const* const outputs, void* const workspace, cudaStream_t stream)
+{
+    const int inputLength = inputDesc[MEMORY_INDEX].dims.d[1];
+    const int batchSize = inputDesc[MEMORY_INDEX].dims.d[0];
+
+    if (batchSize != 1)
+    {
+        // we only support batch size of 1 right now
+        std::cerr << "Taco2AttentionLayerPlugin plugin does not support batch size other than "
+                     "1: got "
+                  << batchSize << std::endl;
+        std::cerr << "Recompile without plugins to use a larger batch size." << std::endl;
+        return 1;
+    }
+
+    // name inputs and outputs
+    const value_type* const memoryDevice = static_cast<const value_type*>(inputs[MEMORY_INDEX]);
+    const value_type* const processedMemoryDevice = static_cast<const value_type*>(inputs[PROCESSED_MEMORY_INDEX]);
+    const value_type* const weightsDevice = static_cast<const value_type*>(inputs[WEIGHT_INDEX]);
+    const value_type* const attentionHiddenDevice = static_cast<const value_type*>(inputs[ATTENTION_HIDDEN_INDEX]);
+
+    value_type* const outputContextDevice = static_cast<value_type*>(outputs[CONTEXT_OUTPUT]);
+    value_type* const outputWeightsDevice = static_cast<value_type*>(outputs[WEIGHT_OUTPUT]);
+
+    try
+    {
+        mKernel->execute(memoryDevice, processedMemoryDevice, weightsDevice, attentionHiddenDevice, outputContextDevice,
+            outputWeightsDevice, inputLength, static_cast<value_type*>(workspace), stream);
+    }
+    catch (const std::exception& e)
+    {
+        std::cerr << "Taco2AttentionLayerPlugin failed: " << e.what() << std::endl;
+        return 1;
+    }
+
+    return 0;
+}
+
+size_t Taco2AttentionLayerPlugin::getSerializationSize() const
+{
+    const int numQueryWeights = mNumQueryDimension * mNumAttentionDimension;
+    const int numConvWeights = mNumFilters * 2 * mConvKernelSize;
+    const int numLocationWeights = mNumFilters * mNumAttentionDimension;
+    const int numEnergyWeights = mNumAttentionDimension;
+
+    return 5 * sizeof(int32_t)
+        + sizeof(value_type) * (numQueryWeights + numConvWeights + numLocationWeights + numEnergyWeights);
+}
+
+void Taco2AttentionLayerPlugin::serialize(void* const buffer) const
+{
+    static_cast<int32_t*>(buffer)[0] = mNumEncodingDimension;
+    static_cast<int32_t*>(buffer)[1] = mNumQueryDimension;
+    static_cast<int32_t*>(buffer)[2] = mNumFilters;
+    static_cast<int32_t*>(buffer)[3] = mConvKernelSize;
+    static_cast<int32_t*>(buffer)[4] = mNumAttentionDimension;
+
+    float* const queryWeights = reinterpret_cast<float*>(static_cast<int32_t*>(buffer) + 5);
+    float* const convWeights = queryWeights + mQueryWeightsHost.size();
+    float* const locationWeights = convWeights + mConvWeightsHost.size();
+    float* const energyWeights = locationWeights + mLocationWeightsHost.size();
+
+    memcpy(queryWeights, mQueryWeightsHost.data(), sizeof(value_type) * mQueryWeightsHost.size());
+    memcpy(convWeights, mConvWeightsHost.data(), sizeof(value_type) * mConvWeightsHost.size());
+    memcpy(locationWeights, mLocationWeightsHost.data(), sizeof(value_type) * mLocationWeightsHost.size());
+    memcpy(energyWeights, mEnergyWeightsHost.data(), sizeof(value_type) * mEnergyWeightsHost.size());
+}
+
+void Taco2AttentionLayerPlugin::destroy()
+{
+    terminate();
+}
+
+IPluginV2DynamicExt* Taco2AttentionLayerPlugin::clone() const
+{
+    // call constructor which copy's data
+    Taco2AttentionLayerPlugin clone(mNumEncodingDimension, mNumQueryDimension, mNumFilters, mConvKernelSize,
+        mNumAttentionDimension,
+        Weights{DataType::kFLOAT, mQueryWeightsHost.data(), static_cast<int64_t>(mQueryWeightsHost.size())},
+        Weights{DataType::kFLOAT, mConvWeightsHost.data(), static_cast<int64_t>(mConvWeightsHost.size())},
+        Weights{DataType::kFLOAT, mLocationWeightsHost.data(), static_cast<int64_t>(mLocationWeightsHost.size())},
+        Weights{DataType::kFLOAT, mEnergyWeightsHost.data(), static_cast<int64_t>(mEnergyWeightsHost.size())});
+
+    if (mKernel)
+    {
+        // initialize the clone too
+        clone.initialize();
+    }
+
+    // move it to the heap last to avoid exceptions causing memory leaks
+    return new Taco2AttentionLayerPlugin(std::move(clone));
+}
+
+void Taco2AttentionLayerPlugin::setPluginNamespace(const char* pluginNamespace)
+{
+    mNamespace = pluginNamespace;
+}
+
+const char* Taco2AttentionLayerPlugin::getPluginNamespace() const
+{
+    return mNamespace.c_str();
+}
+
+} // namespace plugin
+} // namespace nvinfer1
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPlugin.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPlugin.h
@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_ATTENTIONLAYER_H
+#define TT2I_ATTENTIONLAYER_H
+
+#include "NvInfer.h"
+
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace nvinfer1
+{
+namespace plugin
+{
+
+class Taco2AttentionLayerKernel;
+
+class Taco2AttentionLayerPlugin : public nvinfer1::IPluginV2DynamicExt
+{
+public:
+    using value_type = float;
+
+    enum Inputs
+    {
+        MEMORY_INDEX = 0,
+        PROCESSED_MEMORY_INDEX = 1,
+        WEIGHT_INDEX = 2,
+        ATTENTION_HIDDEN_INDEX = 3,
+        NUM_INPUTS = 4
+    };
+
+    enum Outputs
+    {
+        CONTEXT_OUTPUT = 0,
+        WEIGHT_OUTPUT = 1,
+        NUM_OUTPUTS = 2
+    };
+
+    /**
+     * @brief Get the name of this plugin.
+     *
+     * @return The name.
+     */
+    static const char* getName();
+
+    /**
+     * @brief Get the version of this plugin.
+     *
+     * @return The version.
+     */
+    static const char* getVersion();
+
+    /**
+     * @brief Create a new Taco2AttentionLayerPlugin from serialized data.
+     *
+     * @param data The data.
+     * @param length The length of the data in bytes.
+     *
+     * @return The instantiated plugin.
+     */
+    static Taco2AttentionLayerPlugin deserialize(const void* data, size_t length);
+
+    /**
+     * @brief Create a new Taco2AttentionLayerPlugin.
+     *
+     * @param inputLength The length of the input.
+     * @param encDimension The number of encoding dimensions.
+     * @param queryDimension The number of query dimensions.
+     * @param numFilters The number of convolution filters.
+     * @param convKernelSize The convolution kernel size.
+     * @param attDimension The attention dimension.
+     * @param queryWeights The query questions.
+     * @param convWeights The convolution weights.
+     * @param locationWeights The location weights.
+     * @param energyWeights The energy weights.
+     */
+    Taco2AttentionLayerPlugin(int encDimension, int queryDimension, int numFilters, int convKernelSize,
+        int attDimension, const nvinfer1::Weights& queryWeights, const nvinfer1::Weights& convWeights,
+        const nvinfer1::Weights& locationWeights, const nvinfer1::Weights& energyWeights);
+
+    /**
+     * @brief Move constructor.
+     *
+     * @param other The Taco2AttentionLayer to move.
+     */
+    Taco2AttentionLayerPlugin(Taco2AttentionLayerPlugin&& other);
+
+    /**
+     * @brief Move assignment operator.
+     *
+     * @param other The Taco2AttentionLayerPlugin to move.
+     *
+     * @return This Taco2AttentionLayerPlugin.
+     */
+    Taco2AttentionLayerPlugin& operator=(Taco2AttentionLayerPlugin&& other);
+
+    /**
+     * @brief Destructor.
+     */
+    ~Taco2AttentionLayerPlugin();
+
+    // disable copying
+    Taco2AttentionLayerPlugin(const Taco2AttentionLayerPlugin& other) = delete;
+    Taco2AttentionLayerPlugin& operator=(const Taco2AttentionLayerPlugin& other) = delete;
+
+    /**
+     * @brief Return the data type of the plugin output at the requested index.
+     *
+     * @param index The output index.
+     * @param inputTypes The input data types.
+     * @param nbInputs The number of inputs.
+     *
+     * @return The type of output.
+     */
+    nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
+
+    /**
+     * @brief Get the plugin type.
+     *
+     * @return The plugin type.
+     */
+    const char* getPluginType() const override;
+
+    /**
+     * @brief Get the plugin version.
+     *
+     * @return The plugin version.
+     */
+    const char* getPluginVersion() const override;
+
+    /**
+     * @brief Get the number of outputs.
+     *
+     * @return The number of outputs.
+     */
+    int getNbOutputs() const override;
+
+    /**
+     * @brief Get the dimensions of an output tensor.
+     *
+     * @param outputIndex The index of the output tensor.
+     * @param inputs Expressions for dimensions of the input tensors.
+     * @param nbInputs The number of input tensors.
+     * @param expBuilder Object for generating new expressions.
+     *
+     * @return The resulting dimensions.
+     */
+    nvinfer1::DimsExprs getOutputDimensions(
+        int outputIndex, const DimsExprs* inputs, int nbInputs, IExprBuilder& expBuilder) override;
+
+    /**
+     * @brief Check if the given plugin format is supported.
+     *
+     * @param pos The format position/index in inOut.format[].
+     * @param inOut The input and output formats.
+     * @param nbInputs The number of inputs.
+     * @param nbOutputs The number of outputs.
+     *
+     * @return True if it is supported.
+     */
+    bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) override;
+
+    /**
+     * @brief Configure this plugin with the given inputs, outputs, and datat
+     * types.
+     *
+     * @param in The input tensor attributes that used for configuration.
+     * @param nbInputs The number of inputs.
+     * @param out The output tensor attributes that are used for configuration.
+     * @param nbOutputs The number of outputs.
+     */
+    void configurePlugin(
+        const DynamicPluginTensorDesc* in, int nbInputs, const DynamicPluginTensorDesc* out, int nbOutputs) override;
+
+    /**
+     * @brief Initialize the plugin.
+     *
+     * @return 0 if initialization was successful. Non-zero otherwise.
+     */
+    int initialize() override;
+
+    /**
+     * @brief Terminate the plugin (deinitialize).
+     */
+    void terminate() override;
+
+    /**
+     * @brief Get workspace size required by this plugin for up to the given
+     * batch size.
+     *
+     * @param in The input tensor descriptors.
+     * @param nbInputs The number of inputs.
+     * @param out The output tensor descriptors.
+     * @param nbOutputs The number of outputs.
+     *
+     * @return The workspace size in bytes.
+     */
+    size_t getWorkspaceSize(
+        const PluginTensorDesc* in, int nbInputs, const PluginTensorDesc* out, int nbOutputs) const override;
+
+    /**
+     * @brief Set this plugin for execution on the stream.
+     *
+     * @param inputDesc The input tensor descriptors.
+     * @param outputDesc The output tensor descriptors.
+     * @param inputs The input tensors.
+     * @param outputs The output tensors.
+     * @param workspace The allocated workspace.
+     * @param stream The stream to operate on.
+     *
+     * @return 0 if successfully queued, non-zero otherwise.
+     */
+    int enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc, const void* const* inputs,
+        void* const* outputs, void* workspace, cudaStream_t stream);
+
+    /**
+     * @brief Get the number of bytes occupied by this plugin if serialized.
+     *
+     * @return The size in bytes.
+     */
+    size_t getSerializationSize() const override;
+
+    /**
+     * @brief Serialize this plugin.
+     *
+     * @param buffer The buffer to write to.
+     */
+    void serialize(void* buffer) const override;
+
+    /**
+     * @brief Destroy this plugin instance.
+     */
+    void destroy() override;
+
+    /**
+     * @brief Clone this pulgin instance.
+     *
+     * @return The cloned plugin.
+     */
+    IPluginV2DynamicExt* clone() const override;
+
+    /**
+     * @brief Set the namespace of this plugin.
+     *
+     * @param pluginNamespace The namespace.
+     */
+    void setPluginNamespace(const char* pluginNamespace) override;
+
+    /**
+     * @brief Get the namespace of this plugin.
+     *
+     * @return The namespace.
+     */
+    const char* getPluginNamespace() const override;
+
+private:
+    int mNumEncodingDimension;
+    int mNumQueryDimension;
+    int mNumFilters;
+    int mConvKernelSize;
+    int mNumAttentionDimension;
+    std::vector<value_type> mQueryWeightsHost;
+    std::vector<value_type> mConvWeightsHost;
+    std::vector<value_type> mLocationWeightsHost;
+    std::vector<value_type> mEnergyWeightsHost;
+
+    std::unique_ptr<Taco2AttentionLayerKernel> mKernel;
+
+    std::string mNamespace;
+};
+
+} // namespace plugin
+} // namespace nvinfer1
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPluginCreator.cpp
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPluginCreator.cpp
@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "taco2AttentionLayerPluginCreator.h"
+#include "taco2AttentionLayerPlugin.h"
+
+#include <stdexcept>
+#include <vector>
+
+using namespace nvinfer1;
+
+namespace nvinfer1
+{
+namespace plugin
+{
+
+/******************************************************************************
+ * CONSTANTS ******************************************************************
+ *****************************************************************************/
+
+namespace
+{
+
+constexpr const char* const INPUT_LENGTH_STR = "InputLength";
+constexpr const char* const ENCODING_DIMENSION_STR = "EncodingDimension";
+constexpr const char* const QUERY_DIMENSION_STR = "QueryDimension";
+constexpr const char* const NUM_FILTERS_STR = "NumFilters";
+constexpr const char* const CONV_KERNEL_SIZE_STR = "ConvKernelSize";
+constexpr const char* const ATTENTION_DIMENSION_STR = "AttentionDimension";
+constexpr const char* const QUERY_WEIGHTS_STR = "QueryWeight";
+constexpr const char* const CONV_WEIGHTS_STR = "ConvWeight";
+constexpr const char* const LOCATION_WEIGHTS_STR = "LocationWeight";
+constexpr const char* const ENERGY_WEIGHTS_STR = "EnergyWeight";
+
+} // namespace
+
+/******************************************************************************
+ * PUBLIC STATIC METHODS ******************************************************
+ *****************************************************************************/
+
+PluginFieldCollection* Taco2AttentionLayerPluginCreator::getFields()
+{
+    static PluginFieldCollection* pluginPtr = nullptr;
+    static const std::vector<PluginField> fields{{INPUT_LENGTH_STR, nullptr, PluginFieldType::kINT32, 0},
+        {ENCODING_DIMENSION_STR, nullptr, PluginFieldType::kINT32, 0},
+        {QUERY_DIMENSION_STR, nullptr, PluginFieldType::kINT32, 0},
+        {NUM_FILTERS_STR, nullptr, PluginFieldType::kINT32, 0},
+        {CONV_KERNEL_SIZE_STR, nullptr, PluginFieldType::kINT32, 0},
+        {ATTENTION_DIMENSION_STR, nullptr, PluginFieldType::kINT32, 0},
+        {QUERY_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0},
+        {CONV_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0},
+        {LOCATION_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0},
+        {ENERGY_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0}};
+
+    if (!pluginPtr)
+    {
+        pluginPtr
+            = static_cast<PluginFieldCollection*>(malloc(sizeof(*pluginPtr) + fields.size() * sizeof(PluginField)));
+        pluginPtr->nbFields = static_cast<int>(fields.size());
+        pluginPtr->fields = fields.data();
+    }
+
+    return pluginPtr;
+}
+
+/******************************************************************************
+ * CONSTRUCTORS / DESTRUCTOR **************************************************
+ *****************************************************************************/
+
+Taco2AttentionLayerPluginCreator::Taco2AttentionLayerPluginCreator()
+    : mNamespace()
+{
+    // do nothing
+}
+
+/******************************************************************************
+ * PUBLIC METHODS *************************************************************
+ *****************************************************************************/
+
+const char* Taco2AttentionLayerPluginCreator::getPluginName() const
+{
+    return Taco2AttentionLayerPlugin::getName();
+}
+
+const char* Taco2AttentionLayerPluginCreator::getPluginVersion() const
+{
+    return Taco2AttentionLayerPlugin::getVersion();
+}
+
+const PluginFieldCollection* Taco2AttentionLayerPluginCreator::getFieldNames()
+{
+    return getFields();
+}
+
+IPluginV2* Taco2AttentionLayerPluginCreator::createPlugin(const char* const /*name*/, const PluginFieldCollection* fc)
+{
+    int encDimension = 0;
+    int queryDimension = 0;
+    int numFilters = 0;
+    int convKernelSize = 0;
+    int attDimension = 0;
+
+    Weights queryWeights{DataType::kFLOAT, nullptr, 0};
+    Weights locationWeights{DataType::kFLOAT, nullptr, 0};
+    Weights convWeights{DataType::kFLOAT, nullptr, 0};
+    Weights energyWeights{DataType::kFLOAT, nullptr, 0};
+
+    for (int i = 0; i < fc->nbFields; ++i)
+    {
+        const std::string name(fc->fields[i].name);
+        if (name == ENCODING_DIMENSION_STR)
+        {
+            encDimension = static_cast<const int32_t*>(fc->fields[i].data)[0];
+        }
+        else if (name == QUERY_DIMENSION_STR)
+        {
+            queryDimension = static_cast<const int32_t*>(fc->fields[i].data)[0];
+        }
+        else if (name == NUM_FILTERS_STR)
+        {
+            numFilters = static_cast<const int32_t*>(fc->fields[i].data)[0];
+        }
+        else if (name == CONV_KERNEL_SIZE_STR)
+        {
+            convKernelSize = static_cast<const int32_t*>(fc->fields[i].data)[0];
+        }
+        else if (name == ATTENTION_DIMENSION_STR)
+        {
+            attDimension = static_cast<const int32_t*>(fc->fields[i].data)[0];
+        }
+        else if (name == QUERY_WEIGHTS_STR)
+        {
+            queryWeights.values = fc->fields[i].data;
+            queryWeights.count = fc->fields[i].length;
+        }
+        else if (name == CONV_WEIGHTS_STR)
+        {
+            convWeights.values = fc->fields[i].data;
+            convWeights.count = fc->fields[i].length;
+        }
+        else if (name == LOCATION_WEIGHTS_STR)
+        {
+            locationWeights.values = fc->fields[i].data;
+            locationWeights.count = fc->fields[i].length;
+        }
+        else if (name == ENERGY_WEIGHTS_STR)
+        {
+            energyWeights.values = fc->fields[i].data;
+            energyWeights.count = fc->fields[i].length;
+        }
+        else
+        {
+            throw std::runtime_error("Unknown plugin field: '" + name + "'");
+        }
+    }
+
+    return new Taco2AttentionLayerPlugin(encDimension, queryDimension, numFilters, convKernelSize, attDimension,
+        queryWeights, convWeights, locationWeights, energyWeights);
+}
+
+IPluginV2* Taco2AttentionLayerPluginCreator::deserializePlugin(
+    const char* const /* layerName */, const void* const serialData, size_t const serialLength)
+{
+    return new Taco2AttentionLayerPlugin(Taco2AttentionLayerPlugin::deserialize(serialData, serialLength));
+}
+
+void Taco2AttentionLayerPluginCreator::setPluginNamespace(const char* pluginNamespace)
+{
+    mNamespace = pluginNamespace;
+}
+
+const char* Taco2AttentionLayerPluginCreator::getPluginNamespace() const
+{
+    return mNamespace.c_str();
+}
+
+} // namespace plugin
+} // namespace nvinfer1
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPluginCreator.h
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2AttentionPlugin/taco2AttentionLayerPluginCreator.h
@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2019-2020, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the NVIDIA CORPORATION nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TT2I_ENERGYLAYERPLUGINCREATOR_H
+#define TT2I_ENERGYLAYERPLUGINCREATOR_H
+
+#include "NvInfer.h"
+
+#include <string>
+
+#ifdef DEVEL
+// The destructor of nvinfer1::IPluginCreator is non-virtual and public, so
+// we need to supress the warning.
+#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
+#endif
+
+namespace nvinfer1
+{
+namespace plugin
+{
+
+class Taco2AttentionLayerPluginCreator : public nvinfer1::IPluginCreator
+{
+public:
+    /**
+     * @brief Get the collection of fields for this plugin, with their names only.
+     *
+     * @return The collection of fields.
+     */
+    static nvinfer1::PluginFieldCollection* getFields();
+
+    /**
+     * @brief Create a new Taco2AttentionLayerPluginCreator.
+     */
+    Taco2AttentionLayerPluginCreator();
+
+    /**
+     * @brief Get the name of the plugin.
+     *
+     * @return The name of the plugin.
+     */
+    const char* getPluginName() const override;
+
+    /**
+     * @brief Get the plugin version.
+     *
+     * @return The plugin version.
+     */
+    const char* getPluginVersion() const override;
+
+    /**
+     * @brief Get the collection of fields for this plugin.
+     *
+     * @return The collection of fields.
+     */
+    const nvinfer1::PluginFieldCollection* getFieldNames() override;
+
+    /**
+     * @brief Create a new Taco2AttentionLayerPlugin.
+     *
+     * @param name The name (unused currently).
+     * @param fc The collection of fields to initialize with.
+     *
+     * @return The created plugin.
+     */
+    nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) override;
+
+    /**
+     * @brief Create a custom layer by name from a data stream.
+     *
+     * @param layerName The name of the layer.
+     * @param serialData The serialized data for the layer.
+     * @param serialLength The length of the serialized data.
+     *
+     * @return The plugin. Clients must destroy the plugin once all consumers of
+     * it have been destroyed.
+     */
+    nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
+
+    /**
+     * @brief Set the namespace for created plugins.
+     *
+     * @param pluginNamespace The namespace.
+     */
+    void setPluginNamespace(const char* pluginNamespace) override;
+
+    /**
+     * @brief Get the namespace for created plugins.
+     *
+     * @return The namespace.
+     */
+    const char* getPluginNamespace() const override;
+
+private:
+    std::string mNamespace;
+};
+
+} // namespace plugin
+} // namespace nvinfer1
+
+#ifdef DEVEL
+#pragma GCC diagnostic pop
+#endif
+
+#endif
--- a/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2DenoiseTransformPlugin/CMakeLists.txt
+++ b/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp/src/trt/plugins/taco2DenoiseTransformPlugin/CMakeLists.txt
@ -0,0 +1,18 @@
+#
+# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+file(GLOB SRCS *.cpp *.cu)
+set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
+set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`jupyter lab --allow-root --ip=0.0.0.0 --no-browser speech_ai_demo.ipynb`