[Tacotron2/PyT] custom TensorRT backend on TensorRT Inference Server; Conversional AI demo; fixed checkpoints loading; fixed FP16 export to TensorRT

This commit is contained in:
Przemek Strzelczyk 2020-04-02 17:18:26 +02:00
parent 157a3acaa9
commit 5e3b487b89
244 changed files with 62925 additions and 61 deletions

View file

@ -710,6 +710,12 @@ December 2019
January 2020
* Updated batch sizes and performance results for Tacotron 2.
March 2020
* Added Tacotron 2 and WaveGlow inference using TensorRT Inference Server with custom TensorRT backend in `trtis_cpp`
* Added Conversational AI demo script in `notebooks/conversationalai`
* Fixed loading CUDA RNG state in `load_checkpoint()` function in `train.py`
* Fixed FP16 export to TensorRT in `trt/README.md`
### Known issues
There are no known issues in this release.

View file

@ -45,6 +45,8 @@ def parse_args(parser):
help='full path to the Tacotron2 model checkpoint file')
parser.add_argument('-o', '--output', type=str, required=True,
help='Directory for the exported Tacotron 2 ONNX model')
parser.add_argument('--fp16', action='store_true',
help='Export with half precision to ONNX')
return parser
@ -105,7 +107,7 @@ def prenet_infer(self, x):
for linear in self.layers:
x1 = F.relu(linear(x1))
x0 = x1[0].unsqueeze(0)
mask = torch.le(torch.rand(256, device='cuda').to(torch.float32), 0.5).to(torch.float32)
mask = torch.le(torch.rand(256, device='cuda').to(x.dtype), 0.5).to(x.dtype)
mask = mask.expand(x1.size(0), x1.size(1))
x1 = x1*mask*2.0
@ -216,7 +218,6 @@ class DecoderIter(torch.nn.Module):
return outputs
def test_inference(encoder, decoder_iter, postnet):
encoder.eval()
@ -236,6 +237,7 @@ def test_inference(encoder, decoder_iter, postnet):
print("Running Tacotron2 Decoder")
device = memory.device
dtype = memory.dtype
mel_lengths = torch.zeros([memory.size(0)], dtype=torch.int32, device = device)
not_finished = torch.ones([memory.size(0)], dtype=torch.int32, device = device)
mel_outputs, gate_outputs, alignments = (torch.zeros(1), torch.zeros(1), torch.zeros(1))
@ -295,7 +297,8 @@ def main():
parser = parse_args(parser)
args, _ = parser.parse_known_args()
tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2, False)
tacotron2 = load_and_setup_model('Tacotron2', parser, args.tacotron2,
amp_run=args.fp16)
opset_version = 10
@ -323,6 +326,8 @@ def main():
decoder_iter = DecoderIter(tacotron2)
memory = torch.randn((1,sequence_lengths[0],512)).cuda() #encoder_outputs
if args.fp16:
memory = memory.half()
memory_lengths = sequence_lengths
# initialize decoder states for dummy_input
decoder_input = tacotron2.decoder.get_go_frame(memory)
@ -399,6 +404,8 @@ def main():
postnet = Postnet(tacotron2)
dummy_input = torch.randn((1,80,620)).cuda()
if args.fp16:
dummy_input = dummy_input.half()
torch.onnx.export(postnet, dummy_input, args.output+"/"+"postnet.onnx",
opset_version=opset_version,
do_constant_folding=True,

View file

@ -94,6 +94,11 @@ output [
data_type: TYPE_INT32
dims: [1]
reshape: {{ shape: [ ] }}
}},
{{
name: "alignments__2"
data_type: {fp_type}
dims: [-1,-1]
}}
]
"""

View file

@ -42,7 +42,7 @@ def parse_args(parser):
help='full path to the WaveGlow model checkpoint file')
parser.add_argument('-o', '--output', type=str, required=True,
help='Directory for the exported WaveGlow ONNX model')
parser.add_argument('--amp-run', action='store_true',
parser.add_argument('--fp16', action='store_true',
help='inference with AMP')
parser.add_argument('-s', '--sigma-infer', default=0.6, type=float)
@ -165,20 +165,16 @@ def infer_onnx(self, spect, z, sigma=0.9):
def export_onnx(parser, args):
waveglow = load_and_setup_model('WaveGlow', parser, args.waveglow,
args.amp_run, forward_is_infer=False)
args.fp16, forward_is_infer=False)
# 80 mel channels, 620 mel spectrograms ~ 7 seconds of speech
mel = torch.randn(1, 80, 620).cuda()
stride = 256 # value from waveglow upsample
kernel_size = 1024 # value from waveglow upsample
n_group = 8
z_size2 = (mel.size(2)-1)*stride+(kernel_size-1)+1
# corresponds to cutoff in infer_onnx
z_size2 = z_size2 - (kernel_size-stride)
z_size2 = z_size2//n_group
z_size2 = (mel.size(2)*stride)//n_group
z = torch.randn(1, n_group, z_size2, 1).cuda()
if args.amp_run:
if args.fp16:
mel = mel.half()
z = z.half()
with torch.no_grad():
@ -187,12 +183,12 @@ def export_onnx(parser, args):
# export to ONNX
convert_1d_to_2d_(waveglow)
if args.fp16:
waveglow = waveglow.half()
fType = types.MethodType
waveglow.forward = fType(infer_onnx, waveglow)
if args.amp_run:
waveglow.half()
mel = mel.unsqueeze(3)
opset_version = 10

View file

@ -31,6 +31,8 @@ import torch
import argparse
import numpy as np
from scipy.io.wavfile import write
import matplotlib
import matplotlib.pyplot as plt
import sys
@ -208,14 +210,14 @@ def main():
except:
print("Could not read file")
sys.exit(1)
if args.include_warmup:
sequence = torch.randint(low=0, high=148, size=(1,50),
dtype=torch.long).cuda()
input_lengths = torch.IntTensor([sequence.size(1)]).cuda().long()
for i in range(3):
with torch.no_grad():
mel, mel_lengths = jitted_tacotron2(sequence, input_lengths)
mel, mel_lengths, _ = jitted_tacotron2(sequence, input_lengths)
_ = waveglow(mel)
measurements = {}
@ -223,7 +225,7 @@ def main():
sequences_padded, input_lengths = prepare_input_sequence(texts)
with torch.no_grad(), MeasureTime(measurements, "tacotron2_time"):
mel, mel_lengths = jitted_tacotron2(sequences_padded, input_lengths)
mel, mel_lengths, alignments = jitted_tacotron2(sequences_padded, input_lengths)
with torch.no_grad(), MeasureTime(measurements, "waveglow_time"):
audios = waveglow(mel, sigma=args.sigma_infer)
@ -240,7 +242,14 @@ def main():
DLLogger.log(step=0, data={"waveglow_latency": measurements['waveglow_time']})
DLLogger.log(step=0, data={"latency": (measurements['tacotron2_time']+measurements['waveglow_time'])})
alignments = alignments.unfold(1, audios.size(0), audios.size(0)).transpose(0,2)
for i, audio in enumerate(audios):
plt.imshow(alignments[i].float().data.cpu().numpy().T, aspect="auto", origin="lower")
figure_path = args.output+"alignment_"+str(i)+"_"+args.suffix+".png"
plt.savefig(figure_path)
audio = audio[:mel_lengths[i]*args.stft_hop_length]
audio = audio/torch.max(torch.abs(audio))
audio_path = args.output+"audio_"+str(i)+"_"+args.suffix+".wav"

View file

@ -95,7 +95,7 @@ def main():
dtype=torch.long).cuda()
input_lengths = torch.IntTensor([text_padded.size(1)]*args.batch_size).cuda().long()
with torch.no_grad(), MeasureTime(measurements, "inference_time"):
mels, _ = model(text_padded, input_lengths)
mels, _, _ = model(text_padded, input_lengths)
num_items = mels.size(0)*mels.size(2)
if args.model_name == 'WaveGlow':

View file

@ -0,0 +1,233 @@
## Model Preparation
### Clone the repository
```bash
git clone https://github.com/NVIDIA/DeepLearningExamples.git
cd DeepLearningExamples
```
You will build our ConversationalAI in the Tacotron2 folder:
```bash
cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai
```
### Download checkpoints
Download the PyTorch checkpoints from [NGC](https://ngc.nvidia.com/models):
* [Jasper](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_fp16/files)
```bash
wget https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_fp16/versions/1/files/jasper_fp16.pt
```
* [BERT](https://ngc.nvidia.com/catalog/models/nvidia:bert_large_pyt_amp_ckpt_squad_qa1_1/files?version=1)
```bash
wget https://api.ngc.nvidia.com/v2/models/nvidia/bert_large_pyt_amp_ckpt_squad_qa1_1/versions/1/files/bert_large_qa.pt
```
* [Tacotron 2](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16/files?version=2)
```bash
wget https://api.ngc.nvidia.com/v2/models/nvidia/tacotron2pyt_fp16/versions/2/files/nvidia_tacotron2pyt_fp16_20190427
```
* [WaveGlow](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16/files)
```bash
wget https://api.ngc.nvidia.com/v2/models/nvidia/waveglow256pyt_fp16/versions/1/files/nvidia_waveglow256pyt_fp16
```
Move the downloaded checkpoints to `models` directory:
```bash
cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai
bert_large_qa.pt nvidia_tacotron2pyt_fp16_20190427 nvidia_waveglow256pyt_fp16 models/
```
### Prepare Jasper
First, let's generate a TensorRT engine for Jasper using TensorRT version 7.
Download the Jasper checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_fp16/files)
and move it to `Jasper/checkpoints/` direcotry:
```bash
mkdir -p DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/checkpoints
mv jasper_fp16.pt DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/checkpoints
```
Apply a patch to enable support of TensorRT 7:
```bash
cd DeepLearningExamples/
git apply --ignore-space-change --reject --whitespace=fix ../patch_jasper_trt7
```
Now, build a container for Jasper:
```bash
cd DeepLearningExamples/PyTorch/SpeechRecognition/Jasper/
bash trt/scripts/docker/build.sh
```
To run the container, type:
```bash
cd DeepLearningExamples/PyTorch/SpeechRecognition/Jasper
export JASPER_DIR=${PWD}
export DATA_DIR=$JASPER_DIR/data/
export CHECKPOINT_DIR=$JASPER_DIR/checkpoints/
export RESULT_DIR=$JASPER_DIR/results/
cd $JASPER_DIR
mkdir -p $DATA_DIR $CHECKPOINT_DIR $RESULT_DIR
bash trt/scripts/docker/launch.sh $DATA_DIR $CHECKPOINT_DIR $RESULT_DIR
```
Inside the container export Jasper TensorRT engine by executing:
```bash
mkdir -p /results/onnxs/ /results/engines/
cd /jasper
python trt/perf.py --batch_size 1 --engine_batch_size 1 --model_toml configs/jasper10x5dr_nomask.toml --ckpt_path /checkpoints/jasper_fp16.pt --trt_fp16 --pyt_fp16 --engine_path /results/engines/fp16_DYNAMIC.engine --onnx_path /results/onnxs/fp32_DYNAMIC.onnx --seq_len 3600 --make_onnx
```
After successful export, copy the engine to model_repo:
```bash
cd DeepLearningExamples/Pytorch
mkdir -p SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/1
cp SpeechRecognition/Jasper/results/engines/fp16_DYNAMIC.engine SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/jasper-trt/1/jasper_fp16.engine
```
You will also need Jasper feature extractor and decoder. Download them from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:jasperpyt_jit_fp16/files) and move to the model_repo:
```bash
cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/
mkdir -p jasper-decoder/1 jasper-feature-extractor/1
wget -P jasper-decoder/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-decoder/config.pbtxt
wget -P jasper-decoder/1/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-decoder/1/jasper-decoder.pt
wget -P jasper-feature-extractor/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-feature-extractor/config.pbtxt
wget -P jasper-feature-extractor/1/ https://api.ngc.nvidia.com/v2/models/nvidia/jasperpyt_jit_fp16/versions/1/files/jasper-feature-extractor/1/jasper-feature-extractor.pt
```
### Prepare BERT
With the generated Jasper model, we can proceed to BERT.
Download the BERT checkpoint from [NGC](https://ngc.nvidia.com/catalog/models/nvidia:bert_large_pyt_amp_ckpt_squad_qa1_1/files)
and move it to `BERT/checkpoints/` direcotry:
```bash
mkdir -p DeepLearningExamples/PyTorch/LanguageModeling/BERT/checkpoints/
mv bert_large_qa.pt DeepLearningExamples/PyTorch/LanguageModeling/BERT/checkpoints/
```
Now, build a container for BERT:
```bash
cd PyTorch/LanguageModeling/BERT/
bash scripts/docker/build.sh
```
Use the Triton export script to convert the model `checkpoints/bert_large_qa.pt` to ONNX:
```bash
bash triton/export_model.sh
```
The model will be saved in `results/triton_models/bertQA-onnx`, together with Triton configuration file. Copy the model and configuration file to the model_repo:
```bash
cd DeepLearningExamples
cp -r PyTorch/LanguageModeling/BERT/results/triton_models/bertQA-onnx DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/model_repo/
```
### Prepare Tacotron 2 and WaveGlow
Now to the final part - TTS system.
Download the [Tacotron 2](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16/files?version=2) and [WaveGlow](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16/files) checkpoints from [NGC](https://ngc.nvidia.com/catalog/models/)
and move them to `Tacotron2/checkpoints/` direcotry:
```bash
mkdir -p DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/checkpoints/
mv nvidia_tacotron2pyt_fp16_20190427 nvidia_waveglow256pyt_fp16 DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/checkpoints/
```
Build the Tacotron 2 container:
```bash
cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/
bash scripts/docker/build.sh
```
Run the container in th interactive mode by typing:
```bash
bash scripts/docker/interactive.sh
```
Export Tacotron 2 to TorchScript:
```bash
cd /workspace/tacotron2/
mkdir -p output
python exports/export_tacotron2_ts.py --tacotron2 checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/model.pt --amp-run
```
To export WaveGlow to TensorRT 7, install ONNX-TRT
```bash
cd /workspace && git clone https://github.com/onnx/onnx-tensorrt.git
cd /workspace/onnx-tensorrt/ && git submodule update --init --recursive
cd /workspace/onnx-tensorrt && mkdir -p build
cd /workspace/onnx-tensorrt/build && cmake .. -DCMAKE_CXX_FLAGS=-isystem\\ /usr/local/cuda/include && make -j12 && make install
cd /workspace/tacotron2
```
Export WaveGlow to ONNX intermediate representation:
```bash
python exports/export_waveglow_onnx.py --waveglow checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 --amp-run -o output/
```
Use the exported ONNX IR to generate TensorRT engine:
```bash
python trt/export_onnx2trt.py --waveglow output/waveglow.onnx -o output/ --fp16
```
After successful export, exit the container and copy the Tacotron 2 model and the WaveGlow engine to `model_repo`:
```bash
cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/
mkdir -p notebooks/conversationalai/model_repo/tacotron2/1/ notebooks/conversationalai/model_repo/waveglow-trt/1/
cp output/model.pt notebooks/conversationalai/model_repo/tacotron2/1/
cp output/waveglow_fp16.engine mnotebooks/conversationalai/odel_repo/waveglow-trt/1/
```
## Deployment
Will all models ready for deployment, go to the `conversationalai/client` folder and build the Triron client:
```bash
cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/notebooks/conversationalai/client
docker build -f Dockerfile --network=host -t speech_ai_client:demo .
```
From terminal start the Triton server:
```bash
NV_GPU=1 nvidia-docker run --ipc=host --network=host --rm -p8000:8000 -p8001:8001 \\
-v /home/gkarch/dev/gtc2020/speechai/model_repo/:/models nvcr.io/nvidia/tensorrtserver:20.01-py3 trtserver --model-store=/models --log-verbose 1
```
In another another terminal run the client:
```bash
docker run -it --rm --network=host --device /dev/snd:/dev/snd --device /dev/usb:/dev/usb speech_ai_client:demo bash /workspace/speech_ai_demo/start_jupyter.sh
```

View file

@ -0,0 +1,41 @@
# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM gitlab-master.nvidia.com:5005/dl/dgx/tensorrtserver:master-py3.1164446-client AS trtserver
# FROM nvcr.io/nvidia/tensorrtserver:20.01-py3-clientsdk AS trtserver
FROM continuumio/miniconda3
RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract mc iputils-ping wget
WORKDIR /workspace/speech_ai_demo/
# Copy the perf_client over
COPY --from=trtserver /workspace/install/ /workspace/install/
ENV LD_LIBRARY_PATH /workspace/install/lib:${LD_LIBRARY_PATH}
# set up env variables
ENV PATH="$PATH:/opt/conda/bin"
RUN cd /workspace/speech_ai_demo/
# jupyter lab extensions
RUN conda install -c conda-forge jupyterlab=1.0 ipywidgets=7.5 nodejs python-sounddevice librosa unidecode inflect
RUN jupyter labextension install @jupyter-widgets/jupyterlab-manager
RUN pip install /workspace/install/python/tensorrtserver*.whl
# Copy the python wheel and install with pip
COPY --from=trtserver /workspace/install/python/tensorrtserver*.whl /tmp/
RUN pip install /tmp/tensorrtserver*.whl && rm /tmp/tensorrtserver*.whl
COPY start_jupyter.sh /workspace/speech_ai_demo/
COPY speech_ai_demo/utils /workspace/speech_ai_demo/utils
COPY speech_ai_demo/speech_ai_demo.ipynb /workspace/speech_ai_demo/
RUN chmod a+x /workspace/speech_ai_demo/start_jupyter.sh

View file

@ -0,0 +1,486 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import sounddevice as sd\n",
"print(sd.query_devices())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sd.default.device = 11"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import time\n",
"import numpy as np\n",
"import collections\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"from matplotlib import cm as cm\n",
"from IPython.display import Audio, display, clear_output, Markdown, Image\n",
"import librosa\n",
"import librosa.display\n",
"import ipywidgets as widgets\n",
"# \n",
"# import tacotron2 preprocessing utilities\n",
"from utils.tacotron2.symbols import symbols\n",
"from utils.tacotron2 import text_to_sequence as text_to_sequence_internal\n",
"# import bert pre- and postprocessing utilities\n",
"from utils.bert.preprocessing import convert_example_to_feature, read_squad_example, get_predictions\n",
"from utils.bert.tokenization import BertTokenizer\n",
"# import jasper pre- and postprocessing utilities\n",
"from utils.jasper.speech_utils import AudioSegment, SpeechClient\n",
"# import trtis api\n",
"from tensorrtserver.api import *\n",
"\n",
"\n",
"defaults = {\n",
" # settings\n",
" 'sigma_infer': 0.6, # don't touch this\n",
" 'sampling_rate': 22050, # don't touch this\n",
" 'stft_hop_length': 256, # don't touch this\n",
" 'url': 'localhost:8000', # don't touch this\n",
" 'protocol': 0, # 0: http, 1: grpc \n",
" 'autoplay': True, # autoplay\n",
" 'character_limit_min': 4, # don't touch this\n",
" 'character_limit_max': 124, # don't touch this\n",
" 'vocab_file': \"./utils/bert/vocab.txt\", # don't touch this\n",
" 'do_lower_case': True, # don't touch this\n",
" 'version_2_with_negative': False, # if true, the model may give 'i don't know' as an answer. the model has to be trained for it. \n",
" 'max_seq_length': 384, # the maximum total input sequence length after WordPiece tokenization. Sequences longer than this will be truncated, and sequences shorter than this will be padded. \n",
" 'doc_stride': 128, # when splitting up a long document into chunks, how much stride to take between chunks\n",
" 'max_query_length': 64, # the maximum number of tokens in the question. Questions longer than this will be truncated to this length\n",
" 'n_best_size': 10, # don't touch this\n",
" 'max_answer_length': 30, # don't touch this\n",
" 'do_lower_case': True, # don't touch this\n",
" 'null_score_diff_threshold': 0.0, # don't touch this\n",
" 'jasper_batch_size': 1, # don't touch this\n",
" 'jasper_sampling_rate': 44100, # don't touch this\n",
" 'record_maximum_seconds': 4.0 # maximum number of seconds to record\n",
"}\n",
"\n",
"\n",
"# create args object\n",
"class Struct:\n",
" def __init__(self, **entries):\n",
" self.__dict__.update(entries)\n",
"\n",
"\n",
"args = Struct(**defaults)\n",
"\n",
"\n",
"# create the inference context for the models\n",
"infer_ctx_bert = InferContext(args.url, args.protocol, 'bertQA-onnx', -1)\n",
"infer_ctx_tacotron2 = InferContext(args.url, args.protocol, 'tacotron2', -1)\n",
"infer_ctx_waveglow = InferContext(args.url, args.protocol, 'waveglow-trt', -1)\n",
"infer_jasper = SpeechClient(args.url, args.protocol, 'jasper-trt-ensemble', -1, \n",
" args.jasper_batch_size, 'pyt', verbose=False, \n",
" mode='asynchronous', from_features=False)\n",
"\n",
"\n",
"def display_sequences(sequences, labels, colors):\n",
" ''' displays sequences on a dotted plot '''\n",
" plt.figure(figsize=(10, 2.5))\n",
" plt.tick_params(\n",
" axis='both',\n",
" which='both',\n",
" bottom=False,\n",
" top=False,\n",
" left=False,\n",
" right=False,\n",
" labelbottom=False,\n",
" labelleft=False)\n",
" for sequence,color,label in zip(sequences,colors,labels):\n",
" plt.plot(sequence, color, label=label)\n",
" plt.legend(loc='upper right')\n",
" plt.show()\n",
"\n",
"\n",
"def display_heatmap(sequence, title='preprocessed text'):\n",
" ''' displays sequence as a heatmap '''\n",
" clear_output(wait=True)\n",
" sequence = sequence[None, :]\n",
" plt.figure(figsize=(10, 2.5))\n",
" plt.title(title)\n",
" plt.tick_params(\n",
" axis='both',\n",
" which='both',\n",
" bottom=False,\n",
" top=False,\n",
" left=False,\n",
" right=False,\n",
" labelbottom=False,\n",
" labelleft=False)\n",
" plt.imshow(sequence, cmap='BrBG_r', interpolation='nearest')\n",
" plt.show()\n",
"\n",
"\n",
"def display_sound(signal, title, color):\n",
" ''' displays signal '''\n",
" clear_output(wait=True)\n",
" plt.figure(figsize=(10, 2.5))\n",
" plt.title(title)\n",
" plt.tick_params(\n",
" axis='both',\n",
" which='both',\n",
" bottom=True,\n",
" top=False,\n",
" left=False,\n",
" right=False,\n",
" labelbottom=True,\n",
" labelleft=False)\n",
" librosa.display.waveplot(signal, color=color)\n",
" plt.show()\n",
"\n",
"\n",
"def display_spectrogram(mel, title):\n",
" ''' displays mel spectrogram '''\n",
" clear_output(wait=True)\n",
" fig = plt.figure(figsize=(10, 2.5))\n",
" ax = fig.add_subplot(111)\n",
"# plt.title(title)\n",
" plt.tick_params(\n",
" axis='both',\n",
" which='both',\n",
" bottom=True,\n",
" top=False,\n",
" left=False,\n",
" right=False,\n",
" labelbottom=True,\n",
" labelleft=False)\n",
" plt.xlabel('Time')\n",
" cmap = cm.get_cmap('jet', 30)\n",
" cax = ax.imshow(mel.astype(np.float32), interpolation=\"nearest\", cmap=cmap)\n",
" ax.grid(True)\n",
" plt.show()\n",
"\n",
"\n",
"def text_to_sequence(text):\n",
" ''' preprocessor of tacotron2\n",
" ::text:: the input str\n",
" ::returns:: sequence, the preprocessed text\n",
" '''\n",
" sequence = text_to_sequence_internal(text, ['english_cleaners'])\n",
" sequence = np.array(sequence, dtype=np.int64)\n",
" return sequence\n",
"\n",
"\n",
"def sequence_to_mel(sequence):\n",
" ''' calls tacotron2\n",
" ::sequence:: int64 numpy array, contains the preprocessed text\n",
" ::returns:: (mel, mel_lengths) pair\n",
" mel is the mel-spectrogram, np.array\n",
" mel_lengths contains the length of the unpadded mel, np.array\n",
" '''\n",
" input_lengths = [len(sequence)]\n",
" input_lengths = np.array(input_lengths, dtype=np.int64)\n",
" # prepare input/output\n",
" input_dict = {}\n",
" input_dict['sequence__0'] = (sequence,)\n",
" input_dict['input_lengths__1'] = (input_lengths,)\n",
" output_dict = {}\n",
" output_dict['mel_outputs_postnet__0'] = InferContext.ResultFormat.RAW\n",
" output_dict['mel_lengths__1'] = InferContext.ResultFormat.RAW\n",
" batch_size = 1\n",
" # call tacotron2\n",
" result = infer_ctx_tacotron2.run(input_dict, output_dict, batch_size)\n",
" # get results\n",
" mel = result['mel_outputs_postnet__0'][0] # take only the first instance in the output batch\n",
" mel_lengths = result['mel_lengths__1'][0] # take only the first instance in the output batch\n",
" return mel, mel_lengths\n",
"\n",
"\n",
"def mel_to_signal(mel, mel_lengths):\n",
" ''' calls waveglow\n",
" ::mel:: mel spectrogram\n",
" ::mel_lengths:: original length of mel spectrogram\n",
" ::returns:: waveform\n",
" '''\n",
" # padding/trimming mel to dimension 620\n",
" mel = mel[:,:,None]\n",
" # prepare input/output\n",
" input_dict = {}\n",
" input_dict['mel'] = (mel,)\n",
" stride = 256\n",
" kernel_size = 1024\n",
" n_group = 8\n",
" z_size = (mel.shape[1]-1)*stride + (kernel_size-1) + 1 - (kernel_size-stride)\n",
" z_size = z_size//n_group\n",
" shape = (n_group,z_size,1)\n",
" input_dict['z'] = np.random.normal(0.0, 1.0, shape).astype(mel.dtype)\n",
" input_dict['z'] = (input_dict['z'],)\n",
" output_dict = {}\n",
" output_dict['audio'] = InferContext.ResultFormat.RAW\n",
" batch_size = 1\n",
" # call waveglow\n",
" result = infer_ctx_waveglow.run(input_dict, output_dict, batch_size)\n",
" # get the results\n",
" signal = result['audio'][0] # take only the first instance in the output batch\n",
" # postprocessing of waveglow: trimming signal to its actual size\n",
" trimmed_length = mel_lengths[0] * args.stft_hop_length\n",
" signal = signal[:trimmed_length] # trim\n",
" signal = signal.astype(np.float32)\n",
" return signal\n",
"\n",
"\n",
"def question_and_context_to_feature(question_text, context):\n",
" tokenizer = BertTokenizer(args.vocab_file, do_lower_case=args.do_lower_case, max_len=512) # for bert large\n",
" example = read_squad_example(question_text, \n",
" context, \n",
" version_2_with_negative=args.version_2_with_negative)\n",
" feature = convert_example_to_feature(\n",
" example=example, \n",
" tokenizer=tokenizer, \n",
" max_seq_length=args.max_seq_length, \n",
" doc_stride=args.doc_stride, \n",
" max_query_length=args.max_query_length)\n",
" return example, feature\n",
"\n",
"\n",
"def button_rec_clicked(change):\n",
" if record_seconds.value > 0.0:\n",
" with plot_jasper_audio:\n",
" clear_output(wait=True)\n",
" recording = sd.rec(int(record_seconds.value*args.jasper_sampling_rate), samplerate=args.jasper_sampling_rate, channels=1)\n",
" while record_seconds.value > 0:\n",
" time.sleep(0.01)\n",
" record_seconds.value -= 0.01\n",
" sd.wait()\n",
" recording = recording.squeeze()\n",
" display_sound(recording,'recorded audio','orange')\n",
" audio = AudioSegment(recording, args.jasper_sampling_rate).samples\n",
" hypotheses = infer_jasper.recognize([audio], ['audio recording'])\n",
" question_text.value = str(hypotheses[0]) + '? '\n",
"\n",
"\n",
"button_rec = widgets.Button(description=\"RECORD\")\n",
"button_rec.on_click(button_rec_clicked)\n",
"record_seconds = widgets.FloatSlider(min=0.0, max=args.record_maximum_seconds, value=args.record_maximum_seconds, \n",
" step=0.1, continuous_update=True, description = \"seconds\")\n",
"buttons = widgets.HBox([button_rec, record_seconds])\n",
"\n",
"\n",
"question_text = widgets.Textarea(\n",
" value='jasper output / bert input question',\n",
" placeholder='',\n",
" description='',\n",
" disabled=False,\n",
" continuous_update=True,\n",
" layout=widgets.Layout(width='550px', height='40px')\n",
")\n",
"\n",
"\n",
"context = widgets.Textarea(\n",
" value='bert input context',\n",
" placeholder='',\n",
" description='',\n",
" disabled=False,\n",
" continuous_update=True,\n",
" layout=widgets.Layout(width='550px', height='80px')\n",
")\n",
"\n",
"question_context = widgets.HBox([question_text, context])\n",
"\n",
"response_text = widgets.Textarea(\n",
" value='',\n",
" placeholder='',\n",
" description='',\n",
" disabled=False,\n",
" continuous_update=True,\n",
" layout=widgets.Layout(width='550px', height='40px')\n",
")\n",
"\n",
"\n",
"def text_to_logits(input_ids_data, segment_ids_data, input_mask_data):\n",
" # call bert\n",
" input_dict = {}\n",
" input_dict['input__0'] = (input_ids_data.astype(np.int64),)\n",
" input_dict['input__1'] = (segment_ids_data.astype(np.int64),)\n",
" input_dict['input__2'] = (input_mask_data.astype(np.int64),)\n",
" batch_size = 1\n",
" output_dict = {}\n",
" output_dict['output__0'] = InferContext.ResultFormat.RAW\n",
" output_dict['output__1'] = InferContext.ResultFormat.RAW\n",
" # \n",
" result = infer_ctx_bert.run(input_dict, output_dict, batch_size)\n",
" # \n",
" print(\"BANGLA\")\n",
" start_logits = [float(x) for x in result[\"output__0\"][0].flat]\n",
" end_logits = [float(x) for x in result[\"output__1\"][0].flat]\n",
" return start_logits, end_logits\n",
"\n",
"\n",
"def question_text_change(change):\n",
" text = change['new']\n",
" text = text.strip(' ')\n",
" length = len(text)\n",
" if length < args.character_limit_min: # too short text\n",
" return\n",
" if text[-1] != '?':\n",
" return\n",
" # preprocess bert\n",
" example, feature = question_and_context_to_feature(text, context.value)\n",
" input_ids_data = np.array(feature.input_ids, dtype=np.int64)\n",
" input_mask_data = np.array(feature.input_mask, dtype=np.int64)\n",
" segment_ids_data = np.array(feature.segment_ids, dtype=np.int64)\n",
" L = segment_ids_data.shape[0] - 1\n",
" while L > 20 and segment_ids_data[L-20] == 0:\n",
" L -= 20\n",
" with plot_tensor:\n",
" clear_output(wait=True)\n",
" C = input_ids_data.max()\n",
" sequences = (input_ids_data[:L],C//2*input_mask_data[:L],C*segment_ids_data[:L])\n",
" display_sequences(sequences, ('input','mask','segment'), ('r.','b.','g.'))\n",
" \n",
" # call bert\n",
" start_logits, end_logits = text_to_logits(input_ids_data, segment_ids_data, input_mask_data)\n",
" with plot_logits:\n",
" clear_output(wait=True)\n",
" start = np.array(start_logits, dtype=np.float32)\n",
" end = np.array(end_logits, dtype=np.float32)\n",
" sequences = (start[:L], end[:L])\n",
" display_sequences(sequences, ('start_logits', 'end_logits'), ('black', 'violet'))\n",
" # postprocess bert\n",
" prediction = get_predictions(example, feature, start_logits, end_logits, \n",
" args.n_best_size, args.max_answer_length, args.do_lower_case, \n",
" args.version_2_with_negative, args.null_score_diff_threshold)\n",
" response_text.value = prediction[0][\"text\"] + '. \\n'\n",
"\n",
"\n",
"def context_change(change):\n",
" text = change['new']\n",
" length = len(text)\n",
" if length < args.character_limit_min: # too short text\n",
" return\n",
" # inference\n",
" question_text.value += ' '\n",
"\n",
"def response_text_change(change):\n",
" ''' this gets called each time text_area.value changes '''\n",
" text = change['new']\n",
" text = text.strip(' ')\n",
" length = len(text)\n",
" if length < args.character_limit_min: # too short text\n",
" return\n",
" if length > args.character_limit_max: # too long text\n",
" text_area.value = text[:args.character_limit_max]\n",
" return\n",
" # preprocess tacotron2\n",
" sequence = text_to_sequence(text)\n",
" with plot_response_text_preprocessed:\n",
" display_heatmap(sequence)\n",
" # run tacotron2\n",
" mel, mel_lengths = sequence_to_mel(sequence)\n",
" with plot_spectrogram:\n",
" display_spectrogram(mel, change['new'])\n",
" # run waveglow\n",
" signal = mel_to_signal(mel, mel_lengths)\n",
" with plot_signal:\n",
" display_sound(signal, change['new'], 'green')\n",
" with plot_play:\n",
" clear_output(wait=True)\n",
" display(Audio(signal, rate=args.sampling_rate, autoplay=args.autoplay))\n",
"\n",
"def get_output_widget(width, height, object_fit='fill'):\n",
" ''' creates an output widget with default values and returns it '''\n",
" layout = widgets.Layout(width=width,\n",
" height=height,\n",
" object_fit=object_fit,\n",
" object_position = '{center} {center}')\n",
" ret = widgets.Output(layout=layout)\n",
" return ret\n",
"\n",
"\n",
"plot_tensor = get_output_widget(width='5in',height='1.75in')\n",
"plot_logits = get_output_widget(width='5in',height='1.75in')\n",
"plot_response_text_preprocessed = get_output_widget(width='10in',height='1in')\n",
"plot_spectrogram = get_output_widget(width='10in',height='2.0in', object_fit='scale-down')\n",
"plot_jasper_audio = get_output_widget(width='10in',height='2.0in')\n",
"plot_signal = get_output_widget(width='10in',height='2.0in')\n",
"plot_play = get_output_widget(width='4in',height='1in')\n",
"\n",
"empty = widgets.VBox([], layout=widgets.Layout(height='1in'))\n",
"markdown_z0 = Markdown('**Jasper input**')\n",
"markdown_m0 = Markdown('**Jasper output / BERT input**')\n",
"markdown_bert = Markdown('**BERT**')\n",
"markdown_tacotron2 = Markdown('**Tacotron 2**')\n",
"markdown_3 = Markdown('**WaveGlow**')\n",
"\n",
"bert_widgets = widgets.HBox([plot_tensor, plot_logits])\n",
"tacotron2_widgets = widgets.HBox([response_text, plot_spectrogram])\n",
"\n",
"display(\n",
" empty, \n",
" markdown_z0, \n",
" buttons, \n",
" markdown_m0, question_context,\n",
" markdown_bert,\n",
" bert_widgets,\n",
" markdown_tacotron2,\n",
" tacotron2_widgets,\n",
" markdown_3, \n",
" plot_play, \n",
" empty\n",
")\n",
"\n",
"\n",
"def fill_initial_values():\n",
" with plot_jasper_audio:\n",
" display_sound(np.zeros(100),\"input audio\",'orange')\n",
" # \n",
" context.value = \"The man holding the telescope went into a shop to purchase some flowers on the occasion of all saints day. \"\n",
" # context.value = \"William Shakespeare was an English poet, playwright and actor, widely regarded as the greatest writer in the English language and the world's greatest dramatist. He is often called England's national poet and the \\\"Bard of Avon\\\".\"\n",
" question_text.value = \"\"\n",
" \n",
"fill_initial_values()\n",
"\n",
"response_text.observe(response_text_change, names='value')\n",
"question_text.observe(question_text_change, names='value')\n",
"context.observe(context_change, names='value')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

View file

@ -0,0 +1,19 @@
Copyright (c) 2017 Keith Ito
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View file

@ -0,0 +1,554 @@
# coding=utf-8
# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import json
import numpy as np
import collections
from utils.bert.tokenization import (BasicTokenizer, BertTokenizer, whitespace_tokenize)
class SquadExample(object):
"""
A single training/test example for the Squad dataset.
For examples without an answer, the start and end position are -1.
"""
def __init__(self,
qas_id,
question_text,
doc_tokens,
orig_answer_text=None,
start_position=None,
end_position=None,
is_impossible=None):
self.qas_id = qas_id
self.question_text = question_text
self.doc_tokens = doc_tokens
self.orig_answer_text = orig_answer_text
self.start_position = start_position
self.end_position = end_position
self.is_impossible = is_impossible
def __str__(self):
return self.__repr__()
def __repr__(self):
s = ""
s += "qas_id: %s" % (self.qas_id)
s += ", question_text: %s" % (
self.question_text)
s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
if self.start_position:
s += ", start_position: %d" % (self.start_position)
if self.end_position:
s += ", end_position: %d" % (self.end_position)
if self.is_impossible:
s += ", is_impossible: %r" % (self.is_impossible)
return s
class InputFeatures(object):
"""A single set of features of data."""
def __init__(self,
unique_id,
example_index,
doc_span_index,
tokens,
token_to_orig_map,
token_is_max_context,
input_ids,
input_mask,
segment_ids,
start_position=None,
end_position=None,
is_impossible=None):
self.unique_id = unique_id
self.example_index = example_index
self.doc_span_index = doc_span_index
self.tokens = tokens
self.token_to_orig_map = token_to_orig_map
self.token_is_max_context = token_is_max_context
self.input_ids = input_ids
self.input_mask = input_mask
self.segment_ids = segment_ids
self.start_position = start_position
self.end_position = end_position
self.is_impossible = is_impossible
def read_squad_example(question_text, context, version_2_with_negative):
""" reads a question and a context, and turns it into a SquadExample """
#
def is_whitespace(c):
if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
return True
return False
#
doc_tokens = []
prev_is_whitespace = True
for c in context:
if is_whitespace(c):
prev_is_whitespace = True
else:
if prev_is_whitespace:
doc_tokens.append(c)
else:
doc_tokens[-1] += c
prev_is_whitespace = False
#
example = SquadExample(
qas_id=0,
question_text=question_text,
doc_tokens=doc_tokens,
orig_answer_text=None,
start_position=None,
end_position=None,
is_impossible=False
)
return example
def convert_example_to_feature(example, tokenizer, max_seq_length,
doc_stride, max_query_length):
""" converts an example into a feature """
unique_id = 1000000000
examples = [example]
features = []
for (example_index, example) in enumerate(examples):
query_tokens = tokenizer.tokenize(example.question_text)
if len(query_tokens) > max_query_length:
query_tokens = query_tokens[0:max_query_length]
tok_to_orig_index = []
orig_to_tok_index = []
all_doc_tokens = []
for (i, token) in enumerate(example.doc_tokens):
orig_to_tok_index.append(len(all_doc_tokens))
sub_tokens = tokenizer.tokenize(token)
for sub_token in sub_tokens:
tok_to_orig_index.append(i)
all_doc_tokens.append(sub_token)
tok_start_position = None
tok_end_position = None
# The -3 accounts for [CLS], [SEP] and [SEP]
max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
# We can have documents that are longer than the maximum sequence length.
# To deal with this we do a sliding window approach, where we take chunks
# of the up to our max length with a stride of `doc_stride`.
_DocSpan = collections.namedtuple( # pylint: disable=invalid-name
"DocSpan", ["start", "length"])
doc_spans = []
start_offset = 0
while start_offset < len(all_doc_tokens):
length = len(all_doc_tokens) - start_offset
if length > max_tokens_for_doc:
length = max_tokens_for_doc
doc_spans.append(_DocSpan(start=start_offset, length=length))
if start_offset + length == len(all_doc_tokens):
break
start_offset += min(length, doc_stride)
for (doc_span_index, doc_span) in enumerate(doc_spans):
tokens = []
token_to_orig_map = {}
token_is_max_context = {}
segment_ids = []
tokens.append("[CLS]")
segment_ids.append(0)
for token in query_tokens:
tokens.append(token)
segment_ids.append(0)
tokens.append("[SEP]")
segment_ids.append(0)
for i in range(doc_span.length):
split_token_index = doc_span.start + i
token_to_orig_map[len(tokens)] = tok_to_orig_index[split_token_index]
is_max_context = _check_is_max_context(doc_spans, doc_span_index,
split_token_index)
token_is_max_context[len(tokens)] = is_max_context
tokens.append(all_doc_tokens[split_token_index])
segment_ids.append(1)
tokens.append("[SEP]")
segment_ids.append(1)
input_ids = tokenizer.convert_tokens_to_ids(tokens)
# The mask has 1 for real tokens and 0 for padding tokens. Only real
# tokens are attended to.
input_mask = [1] * len(input_ids)
# Zero-pad up to the sequence length.
while len(input_ids) < max_seq_length:
input_ids.append(0)
input_mask.append(0)
segment_ids.append(0)
assert len(input_ids) == max_seq_length
assert len(input_mask) == max_seq_length
assert len(segment_ids) == max_seq_length
start_position = None
end_position = None
features.append(
InputFeatures(
unique_id=unique_id,
example_index=example_index,
doc_span_index=doc_span_index,
tokens=tokens,
token_to_orig_map=token_to_orig_map,
token_is_max_context=token_is_max_context,
input_ids=input_ids,
input_mask=input_mask,
segment_ids=segment_ids,
start_position=start_position,
end_position=end_position,
is_impossible=example.is_impossible))
unique_id += 1
assert len(features) == 1, "too large input"
return features[0]
def _check_is_max_context(doc_spans, cur_span_index, position):
"""Check if this is the 'max context' doc span for the token."""
best_score = None
best_span_index = None
for (span_index, doc_span) in enumerate(doc_spans):
end = doc_span.start + doc_span.length - 1
if position < doc_span.start:
continue
if position > end:
continue
num_left_context = position - doc_span.start
num_right_context = end - position
score = min(num_left_context, num_right_context) + 0.01 * doc_span.length
if best_score is None or score > best_score:
best_score = score
best_span_index = span_index
return cur_span_index == best_span_index
RawResult = collections.namedtuple("RawResult", ["unique_id", "start_logits", "end_logits"])
def get_predictions(example, feature, start_logits, end_logits, n_best_size,
max_answer_length, do_lower_case,
version_2_with_negative, null_score_diff_threshold):
"""Write final predictions to the json file and log-odds of null if needed."""
all_examples = [example]
all_features = [feature]
all_results = [RawResult(unique_id=1000000000,start_logits=start_logits,end_logits=end_logits)]
example_index_to_features = collections.defaultdict(list)
for feature in all_features:
example_index_to_features[feature.example_index].append(feature)
unique_id_to_result = {}
for result in all_results:
unique_id_to_result[result.unique_id] = result
_PrelimPrediction = collections.namedtuple( # pylint: disable=invalid-name
"PrelimPrediction",
["feature_index", "start_index", "end_index", "start_logit", "end_logit"])
all_predictions = collections.OrderedDict()
all_nbest_json = collections.OrderedDict()
scores_diff_json = collections.OrderedDict()
for (example_index, example) in enumerate(all_examples):
features = example_index_to_features[example_index]
prelim_predictions = []
# keep track of the minimum score of null start+end of position 0
score_null = 1000000 # large and positive
min_null_feature_index = 0 # the paragraph slice with min mull score
null_start_logit = 0 # the start logit at the slice with min null score
null_end_logit = 0 # the end logit at the slice with min null score
for (feature_index, feature) in enumerate(features):
result = unique_id_to_result[feature.unique_id]
start_indexes = _get_indices_of_largest_logits(result.start_logits)
end_indexes = _get_indices_of_largest_logits(result.end_logits)
# if we could have irrelevant answers, get the min score of irrelevant
if version_2_with_negative:
feature_null_score = result.start_logits[0] + result.end_logits[0]
if feature_null_score < score_null:
score_null = feature_null_score
min_null_feature_index = feature_index
null_start_logit = result.start_logits[0]
null_end_logit = result.end_logits[0]
for start_index in start_indexes:
for end_index in end_indexes:
# We could hypothetically create invalid predictions, e.g., predict
# that the start of the span is in the question. We throw out all
# invalid predictions.
if start_index >= len(feature.tokens):
continue
if end_index >= len(feature.tokens):
continue
if start_index not in feature.token_to_orig_map:
continue
if end_index not in feature.token_to_orig_map:
continue
if not feature.token_is_max_context.get(start_index, False):
continue
if end_index < start_index:
continue
length = end_index - start_index + 1
if length > max_answer_length:
continue
prelim_predictions.append(
_PrelimPrediction(
feature_index=feature_index,
start_index=start_index,
end_index=end_index,
start_logit=result.start_logits[start_index],
end_logit=result.end_logits[end_index]))
if version_2_with_negative:
prelim_predictions.append(
_PrelimPrediction(
feature_index=min_null_feature_index,
start_index=0,
end_index=0,
start_logit=null_start_logit,
end_logit=null_end_logit))
prelim_predictions = sorted(
prelim_predictions,
key=lambda x: (x.start_logit + x.end_logit),
reverse=True)
_NbestPrediction = collections.namedtuple( # pylint: disable=invalid-name
"NbestPrediction", ["text", "start_logit", "end_logit"])
seen_predictions = {}
nbest = []
for pred in prelim_predictions:
if len(nbest) >= n_best_size:
break
feature = features[pred.feature_index]
if pred.start_index > 0: # this is a non-null prediction
tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1)]
orig_doc_start = feature.token_to_orig_map[pred.start_index]
orig_doc_end = feature.token_to_orig_map[pred.end_index]
orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end + 1)]
tok_text = " ".join(tok_tokens)
# De-tokenize WordPieces that have been split off.
tok_text = tok_text.replace(" ##", "")
tok_text = tok_text.replace("##", "")
# Clean whitespace
tok_text = tok_text.strip()
tok_text = " ".join(tok_text.split())
orig_text = " ".join(orig_tokens)
final_text = get_final_text(tok_text, orig_text, do_lower_case)
if final_text in seen_predictions:
continue
seen_predictions[final_text] = True
else:
final_text = ""
seen_predictions[final_text] = True
nbest.append(
_NbestPrediction(
text=final_text,
start_logit=pred.start_logit,
end_logit=pred.end_logit))
# if we didn't include the empty option in the n-best, include it
if version_2_with_negative:
if "" not in seen_predictions:
nbest.append(
_NbestPrediction(
text="",
start_logit=null_start_logit,
end_logit=null_end_logit))
# In very rare edge cases we could only have single null prediction.
# So we just create a nonce prediction in this case to avoid failure.
if len(nbest) == 1:
nbest.insert(0,
_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
# In very rare edge cases we could have no valid predictions. So we
# just create a nonce prediction in this case to avoid failure.
if not nbest:
nbest.append(
_NbestPrediction(text="empty", start_logit=0.0, end_logit=0.0))
assert len(nbest) >= 1
total_scores = []
best_non_null_entry = None
for entry in nbest:
total_scores.append(entry.start_logit + entry.end_logit)
if not best_non_null_entry:
if entry.text:
best_non_null_entry = entry
probs = _compute_softmax(total_scores)
nbest_json = []
for (i, entry) in enumerate(nbest):
output = collections.OrderedDict()
output["text"] = entry.text
output["probability"] = probs[i]
output["start_logit"] = entry.start_logit
output["end_logit"] = entry.end_logit
nbest_json.append(output)
assert len(nbest_json) >= 1
if not version_2_with_negative:
all_predictions[example.qas_id] = nbest_json[0]["text"]
else:
# predict "" iff the null score - the score of best non-null > threshold
score_diff = score_null - best_non_null_entry.start_logit - (
best_non_null_entry.end_logit)
scores_diff_json[example.qas_id] = score_diff
if score_diff > null_score_diff_threshold:
all_predictions[example.qas_id] = ""
else:
all_predictions[example.qas_id] = best_non_null_entry.text
all_nbest_json[example.qas_id] = nbest_json
return nbest_json
def get_final_text(pred_text, orig_text, do_lower_case):
"""Project the tokenized prediction back to the original text."""
# When we created the data, we kept track of the alignment between original
# (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
# now `orig_text` contains the span of our original text corresponding to the
# span that we predicted.
#
# However, `orig_text` may contain extra characters that we don't want in
# our prediction.
#
# For example, let's say:
# pred_text = steve smith
# orig_text = Steve Smith's
#
# We don't want to return `orig_text` because it contains the extra "'s".
#
# We don't want to return `pred_text` because it's already been normalized
# (the SQuAD eval script also does punctuation stripping/lower casing but
# our tokenizer does additional normalization like stripping accent
# characters).
#
# What we really want to return is "Steve Smith".
#
# Therefore, we have to apply a semi-complicated alignment heruistic between
# `pred_text` and `orig_text` to get a character-to-charcter alignment. This
# can fail in certain cases in which case we just return `orig_text`.
def _strip_spaces(text):
ns_chars = []
ns_to_s_map = collections.OrderedDict()
for (i, c) in enumerate(text):
if c == " ":
continue
ns_to_s_map[len(ns_chars)] = i
ns_chars.append(c)
ns_text = "".join(ns_chars)
return (ns_text, ns_to_s_map)
# We first tokenize `orig_text`, strip whitespace from the result
# and `pred_text`, and check if they are the same length. If they are
# NOT the same length, the heuristic has failed. If they are the same
# length, we assume the characters are one-to-one aligned.
tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
tok_text = " ".join(tokenizer.tokenize(orig_text))
start_position = tok_text.find(pred_text)
if start_position == -1:
return orig_text
end_position = start_position + len(pred_text) - 1
(orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
(tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
if len(orig_ns_text) != len(tok_ns_text):
return orig_text
# We then project the characters in `pred_text` back to `orig_text` using
# the character-to-character alignment.
tok_s_to_ns_map = {}
for (i, tok_index) in tok_ns_to_s_map.items():
tok_s_to_ns_map[tok_index] = i
orig_start_position = None
if start_position in tok_s_to_ns_map:
ns_start_position = tok_s_to_ns_map[start_position]
if ns_start_position in orig_ns_to_s_map:
orig_start_position = orig_ns_to_s_map[ns_start_position]
if orig_start_position is None:
return orig_text
orig_end_position = None
if end_position in tok_s_to_ns_map:
ns_end_position = tok_s_to_ns_map[end_position]
if ns_end_position in orig_ns_to_s_map:
orig_end_position = orig_ns_to_s_map[ns_end_position]
if orig_end_position is None:
return orig_text
output_text = orig_text[orig_start_position:(orig_end_position + 1)]
return output_text
def _compute_softmax(scores):
"""Compute softmax probability over raw logits."""
if not scores:
return []
max_score = None
for score in scores:
if max_score is None or score > max_score:
max_score = score
exp_scores = []
total_sum = 0.0
for score in scores:
x = math.exp(score - max_score)
exp_scores.append(x)
total_sum += x
probs = []
for score in exp_scores:
probs.append(score / total_sum)
return probs
def _get_indices_of_largest_logits(logits):
""" sort logits and return the indices of the sorted array """
indices_and_score = sorted(enumerate(logits), key=lambda x: x[1], reverse=True)
indices = map(lambda x: x[0], indices_and_score)
indices = list(indices)
return indices

View file

@ -0,0 +1,391 @@
# coding=utf-8
# Copyright (c) 2019 NVIDIA CORPORATION. All rights reserved.
# Copyright 2018 The Google AI Language Team Authors and The HugginFace Inc. team.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Tokenization classes."""
from __future__ import absolute_import, division, print_function, unicode_literals
import collections
import logging
import os
import unicodedata
import six
from io import open
logger = logging.getLogger(__name__)
PRETRAINED_VOCAB_ARCHIVE_MAP = {
'bert-base-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt",
'bert-large-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-uncased-vocab.txt",
'bert-base-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-vocab.txt",
'bert-large-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-large-cased-vocab.txt",
'bert-base-multilingual-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-uncased-vocab.txt",
'bert-base-multilingual-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-multilingual-cased-vocab.txt",
'bert-base-chinese': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-chinese-vocab.txt",
}
PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP = {
'bert-base-uncased': 512,
'bert-large-uncased': 512,
'bert-base-cased': 512,
'bert-large-cased': 512,
'bert-base-multilingual-uncased': 512,
'bert-base-multilingual-cased': 512,
'bert-base-chinese': 512,
}
VOCAB_NAME = 'vocab.txt'
def convert_to_unicode(text):
"""Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
if six.PY3:
if isinstance(text, str):
return text
elif isinstance(text, bytes):
return text.decode("utf-8", "ignore")
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
elif six.PY2:
if isinstance(text, str):
return text.decode("utf-8", "ignore")
elif isinstance(text, unicode):
return text
else:
raise ValueError("Unsupported string type: %s" % (type(text)))
else:
raise ValueError("Not running on Python2 or Python 3?")
def load_vocab(vocab_file):
"""Loads a vocabulary file into a dictionary."""
vocab = collections.OrderedDict()
index = 0
with open(vocab_file, "r", encoding="utf-8") as reader:
while True:
token = reader.readline()
if not token:
break
token = token.strip()
vocab[token] = index
index += 1
return vocab
def whitespace_tokenize(text):
"""Runs basic whitespace cleaning and splitting on a piece of text."""
text = text.strip()
if not text:
return []
tokens = text.split()
return tokens
class BertTokenizer(object):
"""Runs end-to-end tokenization: punctuation splitting + wordpiece"""
def __init__(self, vocab_file, do_lower_case=True, max_len=None,
never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
if not os.path.isfile(vocab_file):
raise ValueError(
"Can't find a vocabulary file at path '{}'. To load the vocabulary from a Google pretrained "
"model use `tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)`".format(vocab_file))
self.vocab = load_vocab(vocab_file)
self.ids_to_tokens = collections.OrderedDict(
[(ids, tok) for tok, ids in self.vocab.items()])
self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case,
never_split=never_split)
self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
self.max_len = max_len if max_len is not None else int(1e12)
def tokenize(self, text):
split_tokens = []
for token in self.basic_tokenizer.tokenize(text):
for sub_token in self.wordpiece_tokenizer.tokenize(token):
split_tokens.append(sub_token)
return split_tokens
def convert_tokens_to_ids(self, tokens):
"""Converts a sequence of tokens into ids using the vocab."""
ids = []
for token in tokens:
ids.append(self.vocab[token])
if len(ids) > self.max_len:
raise ValueError(
"Token indices sequence length is longer than the specified maximum "
" sequence length for this BERT model ({} > {}). Running this"
" sequence through BERT will result in indexing errors".format(len(ids), self.max_len)
)
return ids
def convert_ids_to_tokens(self, ids):
"""Converts a sequence of ids in wordpiece tokens using the vocab."""
tokens = []
for i in ids:
tokens.append(self.ids_to_tokens[i])
return tokens
@classmethod
def from_pretrained(cls, pretrained_model_name_or_path, cache_dir=None, *inputs, **kwargs):
"""
Instantiate a PreTrainedBertModel from a pre-trained model file.
Download and cache the pre-trained model file if needed.
"""
if pretrained_model_name_or_path in PRETRAINED_VOCAB_ARCHIVE_MAP:
vocab_file = PRETRAINED_VOCAB_ARCHIVE_MAP[pretrained_model_name_or_path]
else:
vocab_file = pretrained_model_name_or_path
if os.path.isdir(vocab_file):
vocab_file = os.path.join(vocab_file, VOCAB_NAME)
# redirect to the cache, if necessary
try:
resolved_vocab_file = vocab_file
except EnvironmentError:
logger.error(
"Model name '{}' was not found in model name list ({}). "
"We assumed '{}' was a path or url but couldn't find any file "
"associated to this path or url.".format(
pretrained_model_name_or_path,
', '.join(PRETRAINED_VOCAB_ARCHIVE_MAP.keys()),
vocab_file))
return None
if resolved_vocab_file == vocab_file:
logger.info("loading vocabulary file {}".format(vocab_file))
else:
logger.info("loading vocabulary file {} from cache at {}".format(
vocab_file, resolved_vocab_file))
if pretrained_model_name_or_path in PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP:
# if we're using a pretrained model, ensure the tokenizer wont index sequences longer
# than the number of positional embeddings
max_len = PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP[pretrained_model_name_or_path]
kwargs['max_len'] = min(kwargs.get('max_len', int(1e12)), max_len)
# Instantiate tokenizer.
tokenizer = cls(resolved_vocab_file, *inputs, **kwargs)
return tokenizer
class BasicTokenizer(object):
"""Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
def __init__(self,
do_lower_case=True,
never_split=("[UNK]", "[SEP]", "[PAD]", "[CLS]", "[MASK]")):
"""Constructs a BasicTokenizer.
Args:
do_lower_case: Whether to lower case the input.
"""
self.do_lower_case = do_lower_case
self.never_split = never_split
def tokenize(self, text):
"""Tokenizes a piece of text."""
text = self._clean_text(text)
# This was added on November 1st, 2018 for the multilingual and Chinese
# models. This is also applied to the English models now, but it doesn't
# matter since the English models were not trained on any Chinese data
# and generally don't have any Chinese data in them (there are Chinese
# characters in the vocabulary because Wikipedia does have some Chinese
# words in the English Wikipedia.).
text = self._tokenize_chinese_chars(text)
orig_tokens = whitespace_tokenize(text)
split_tokens = []
for token in orig_tokens:
if self.do_lower_case and token not in self.never_split:
token = token.lower()
token = self._run_strip_accents(token)
split_tokens.extend(self._run_split_on_punc(token))
output_tokens = whitespace_tokenize(" ".join(split_tokens))
return output_tokens
def _run_strip_accents(self, text):
"""Strips accents from a piece of text."""
text = unicodedata.normalize("NFD", text)
output = []
for char in text:
cat = unicodedata.category(char)
if cat == "Mn":
continue
output.append(char)
return "".join(output)
def _run_split_on_punc(self, text):
"""Splits punctuation on a piece of text."""
if text in self.never_split:
return [text]
chars = list(text)
i = 0
start_new_word = True
output = []
while i < len(chars):
char = chars[i]
if _is_punctuation(char):
output.append([char])
start_new_word = True
else:
if start_new_word:
output.append([])
start_new_word = False
output[-1].append(char)
i += 1
return ["".join(x) for x in output]
def _tokenize_chinese_chars(self, text):
"""Adds whitespace around any CJK character."""
output = []
for char in text:
cp = ord(char)
if self._is_chinese_char(cp):
output.append(" ")
output.append(char)
output.append(" ")
else:
output.append(char)
return "".join(output)
def _is_chinese_char(self, cp):
"""Checks whether CP is the codepoint of a CJK character."""
# This defines a "chinese character" as anything in the CJK Unicode block:
# https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
#
# Note that the CJK Unicode block is NOT all Japanese and Korean characters,
# despite its name. The modern Korean Hangul alphabet is a different block,
# as is Japanese Hiragana and Katakana. Those alphabets are used to write
# space-separated words, so they are not treated specially and handled
# like the all of the other languages.
if ((cp >= 0x4E00 and cp <= 0x9FFF) or #
(cp >= 0x3400 and cp <= 0x4DBF) or #
(cp >= 0x20000 and cp <= 0x2A6DF) or #
(cp >= 0x2A700 and cp <= 0x2B73F) or #
(cp >= 0x2B740 and cp <= 0x2B81F) or #
(cp >= 0x2B820 and cp <= 0x2CEAF) or
(cp >= 0xF900 and cp <= 0xFAFF) or #
(cp >= 0x2F800 and cp <= 0x2FA1F)): #
return True
return False
def _clean_text(self, text):
"""Performs invalid character removal and whitespace cleanup on text."""
output = []
for char in text:
cp = ord(char)
if cp == 0 or cp == 0xfffd or _is_control(char):
continue
if _is_whitespace(char):
output.append(" ")
else:
output.append(char)
return "".join(output)
class WordpieceTokenizer(object):
"""Runs WordPiece tokenization."""
def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
self.vocab = vocab
self.unk_token = unk_token
self.max_input_chars_per_word = max_input_chars_per_word
def tokenize(self, text):
"""Tokenizes a piece of text into its word pieces.
This uses a greedy longest-match-first algorithm to perform tokenization
using the given vocabulary.
For example:
input = "unaffable"
output = ["un", "##aff", "##able"]
Args:
text: A single token or whitespace separated tokens. This should have
already been passed through `BasicTokenizer`.
Returns:
A list of wordpiece tokens.
"""
output_tokens = []
for token in whitespace_tokenize(text):
chars = list(token)
if len(chars) > self.max_input_chars_per_word:
output_tokens.append(self.unk_token)
continue
is_bad = False
start = 0
sub_tokens = []
while start < len(chars):
end = len(chars)
cur_substr = None
while start < end:
substr = "".join(chars[start:end])
if start > 0:
substr = "##" + substr
if substr in self.vocab:
cur_substr = substr
break
end -= 1
if cur_substr is None:
is_bad = True
break
sub_tokens.append(cur_substr)
start = end
if is_bad:
output_tokens.append(self.unk_token)
else:
output_tokens.extend(sub_tokens)
return output_tokens
def _is_whitespace(char):
"""Checks whether `chars` is a whitespace character."""
# \t, \n, and \r are technically contorl characters but we treat them
# as whitespace since they are generally considered as such.
if char == " " or char == "\t" or char == "\n" or char == "\r":
return True
cat = unicodedata.category(char)
if cat == "Zs":
return True
return False
def _is_control(char):
"""Checks whether `chars` is a control character."""
# These are technically control characters but we count them as whitespace
# characters.
if char == "\t" or char == "\n" or char == "\r":
return False
cat = unicodedata.category(char)
if cat.startswith("C"):
return True
return False
def _is_punctuation(char):
"""Checks whether `chars` is a punctuation character."""
cp = ord(char)
# We treat all non-letter/number ASCII as punctuation.
# Characters such as "^", "$", and "`" are not in the Unicode
# Punctuation class but we treat them as punctuation anyways, for
# consistency.
if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
(cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
return True
cat = unicodedata.category(char)
if cat.startswith("P"):
return True
return False

View file

@ -0,0 +1,19 @@
Copyright (c) 2017 Keith Ito
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View file

@ -0,0 +1,446 @@
#!/usr/bin/python
# Copyright (c) 2018-2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import librosa
import soundfile as sf
import math
from os import system
import numpy as np
from tensorrtserver.api import *
import tensorrtserver.api.model_config_pb2 as model_config
import grpc
from tensorrtserver.api import api_pb2
from tensorrtserver.api import grpc_service_pb2
from tensorrtserver.api import grpc_service_pb2_grpc
WINDOWS_FNS = {"hanning": np.hanning, "hamming": np.hamming, "none": None}
def model_dtype_to_np(model_dtype):
if model_dtype == model_config.TYPE_BOOL:
return np.bool
elif model_dtype == model_config.TYPE_INT8:
return np.int8
elif model_dtype == model_config.TYPE_INT16:
return np.int16
elif model_dtype == model_config.TYPE_INT32:
return np.int32
elif model_dtype == model_config.TYPE_INT64:
return np.int64
elif model_dtype == model_config.TYPE_UINT8:
return np.uint8
elif model_dtype == model_config.TYPE_UINT16:
return np.uint16
elif model_dtype == model_config.TYPE_UINT32:
return np.uint32
elif model_dtype == model_config.TYPE_FP16:
return np.float16
elif model_dtype == model_config.TYPE_FP32:
return np.float32
elif model_dtype == model_config.TYPE_FP64:
return np.float64
elif model_dtype == model_config.TYPE_STRING:
return np.dtype(object)
return None
def ctc_decoder_predictions_tensor(prediction_cpu_tensor, batch_size, labels):
"""
Takes output of greedy ctc decoder and performs ctc decoding algorithm to
remove duplicates and special symbol. Returns prediction
Args:
tensor: model output tensor
label: A list of labels
Returns:
prediction
"""
blank_id = len(labels) - 1
hypotheses = []
labels_map = dict([(i, labels[i]) for i in range(len(labels))])
# iterate over batch
prediction_cpu_tensor = prediction_cpu_tensor.reshape((batch_size, int(prediction_cpu_tensor.size/batch_size)))
for ind in range(batch_size):
prediction = prediction_cpu_tensor[ind].tolist()
# CTC decoding procedure
decoded_prediction = []
previous = len(labels) - 1 # id of a blank symbol
for p in prediction:
if (p != previous or previous == blank_id) and p != blank_id:
decoded_prediction.append(p)
previous = p
hypothesis = ''.join([labels_map[c] for c in decoded_prediction])
hypotheses.append(hypothesis)
return hypotheses
class SpeechClient(object):
def __init__(self, url, protocol, model_name, model_version, batch_size,
model_platform=None, verbose=False,
mode="batch",
from_features=True):
self.model_name = model_name
self.model_version = model_version
self.verbose = verbose
self.batch_size = batch_size
self.transpose_audio_features = False
self.grpc_stub = None
self.ctx = None
self.correlation_id = 0
self.first_run = True
if mode == "streaming" or mode == "asynchronous":
self.correlation_id = 1
self.buffer = []
self.ctx = InferContext(url, protocol, model_name, model_version,
verbose, self.correlation_id, False)
server_ctx = ServerStatusContext(url, protocol, model_name,
verbose)
server_status = server_ctx.get_server_status()
self.audio_signals_name, self.num_samples_name, self.transcripts_name, \
self.audio_signals_type, self.num_samples_type, self.transcripts_type = self.parse_model(server_status, model_name,
batch_size, model_platform, verbose)
self.labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'", "<BLANK>"]
def postprocess(self, results, labels):
if len(results) != 1:
raise Exception("expected 1 result, got {}".format(len(results)))
transcript_values = results['TRANSCRIPT']
for transcript, filename in zip(transcript_values,
labels):
hypotheses = ctc_decoder_predictions_tensor(transcript, self.batch_size, self.labels)
print('---')
print('File: ', filename)
print("Final transcript: ", hypotheses)
print('---')
return hypotheses
def check_num_samples(self, num_samples):
if num_samples.data_type != model_config.TYPE_UINT32 and num_samples.data_type != model_config.TYPE_INT32:
raise Exception(
"expecting num_samples datatype to be TYPE_UINT32/TYPE_INT32, "
"model '" + model_name + "' output type is " +
model_config.DataType.Name(num_samples.data_type))
if len(num_samples.dims) != 1:
raise Exception("Expecting num_samples to have 1 dimension, "
"model '{}' num_samples has {}".format(
model_name,len(num_samples.dims)))
def parse_model(self, server_status,
model_name, batch_size,
model_platform=None, verbose=False):
"""
Check the configuration of the ensemble model
"""
if model_name not in server_status.model_status:
raise Exception("unable to get status for '" + model_name + "'")
status = server_status.model_status[model_name]
config = status.config
self.model_platform = model_platform
# Inputs are:
# 1) audio_signal: raw audio samples [num_samples]
# 2) sample_rate: sample rate of audio
# 3) num_samples: length of audio
if len(config.input) < 2:
raise Exception(
"expecting 2-3 inputs, got {}".format(len(config.input)))
# Outputs are:
# 1) transcripts: candidate transcripts
if len(config.output) != 1:
raise Exception(
"expecting 1 output, got {}".format(len(config.output)))
audio_signal = config.input[0]
if len(config.input) > 1:
num_samples = config.input[1]
self.check_num_samples(num_samples);
transcripts = config.output[0]
expected_audio_signal_dim = 1
expected_audio_signal_type = model_config.TYPE_FP32
if audio_signal.data_type != expected_audio_signal_type:
raise Exception("expecting audio_signal datatype to be " +
model_config.DataType.Name(
expected_audio_signal_type) +
"model '" + model_name + "' output type is " +
model_config.DataType.Name(audio_signal.data_type))
# Model specifying maximum batch size of 0 indicates that batching
# is not supported and so the input tensors do not expect an "N"
# dimension (and 'batch_size' should be 1 so that only a single
# image instance is inferred at a time).
max_batch_size = config.max_batch_size
if max_batch_size == 0:
if batch_size != 1:
raise Exception(
"batching not supported for model '" + model_name + "'")
else: # max_batch_size > 0
if batch_size > max_batch_size:
raise Exception(
"expecting batch size <= {} for model {}".format(
max_batch_size, model_name))
if len(audio_signal.dims) != expected_audio_signal_dim:
raise Exception("Expecting audio signal to have {} dimensions, "
"model '{}' audio_signal has {}".format(
expected_audio_signal_dim,
model_name,
len(audio_signal.dims)))
return (audio_signal.name, num_samples.name, transcripts.name,
model_dtype_to_np(audio_signal.data_type),
model_dtype_to_np(num_samples.data_type),
model_dtype_to_np(transcripts.data_type),
)
def update_audio_request(self, request, audio_generator):
for audio_signal, sample_rate, start, end in audio_generator:
# Delete the current inputs
input_batch = [audio_signal.astype(self.audio_signals_type)]
num_samples_batch = audio_signal.shape[0]
num_samples_batch = [np.asarray([num_samples_batch],
dtype=self.num_samples_type)]
flags = InferRequestHeader.FLAG_NONE
input_batch[0] = np.expand_dims(input_batch[0], axis=0)
audio_bytes = input_batch[0].tobytes()
num_samples_bytes = num_samples_batch[0].tobytes()
request.meta_data.input[0].dims[0] = audio_signal.shape[0]
request.meta_data.input[0].batch_byte_size = len(audio_bytes)
request.meta_data.input[1].dims[0] = 1
request.meta_data.input[1].batch_byte_size = len(num_samples_bytes)
if start:
request.meta_data.flags = flags | \
InferRequestHeader.FLAG_SEQUENCE_START
else:
request.meta_data.flags = flags;
# Send request with audio signal
del request.raw_input[:]
request.raw_input.extend([audio_bytes])
request.raw_input.extend([num_samples_bytes])
yield request
# If end, send empty request to flush out remaining audio
if end:
request.meta_data.flags = flags | \
InferRequestHeader.FLAG_SEQUENCE_END
zero_bytes = np.zeros(shape=input_batch[0].shape,
dtype=input_batch[0].dtype).tobytes()
del request.raw_input[:]
request.raw_input.extend([zero_bytes])
request.raw_input.extend([num_samples_bytes])
yield request
def recognize(self, audio_signal, filenames):
# Send requests of FLAGS.batch_size audio signals. If the number of
# audios isn't an exact multiple of FLAGS.batch_size then just
# start over with the first audio until the batch is filled.
flags = InferRequestHeader.FLAG_NONE
flags = flags | InferRequestHeader.FLAG_SEQUENCE_START
input_batch = []
input_filenames = []
max_num_samples_batch = 0
for idx in range(self.batch_size):
input_batch.append(audio_signal[idx].astype(
self.audio_signals_type))
input_filenames.append(filenames[idx])
num_samples = audio_signal[idx].shape[0]
if (num_samples > max_num_samples_batch):
max_num_samples_batch = num_samples
for idx in range(self.batch_size):
num_samples = input_batch[idx].shape[0]
print("num_samples : ", num_samples)
# input_batch[idx] = np.pad(input_batch[idx],
# ((0,
# max_num_samples_batch -
# num_samples)),
# mode='constant')
mean = np.mean(input_batch[idx])
std_var = np.std(input_batch[idx])
gauss_noise = np.random.normal(
mean,std_var,
max_num_samples_batch-num_samples)
input_batch[idx]= np.concatenate(
(input_batch[idx], gauss_noise.astype(
self.audio_signals_type)))
max_num_samples_batch = np.asarray([max_num_samples_batch],
dtype=self.num_samples_type)
num_samples_batch = [max_num_samples_batch] * self.batch_size
#print(num_samples_batch)
#print(input_batch)
#print(input_sample_rates)
# Send request
print("Sending request to transcribe file(s):", ",".join(
input_filenames))
if (self.model_platform == "obsolete_pyt"):
result = self.ctx.run(
{self.audio_signals_name: input_batch,
self.num_samples_name: num_samples_batch},
{self.transcripts_name: InferContext.ResultFormat.RAW},
self.batch_size, flags)
else:
result = self.ctx.run(
{self.audio_signals_name: input_batch,
self.num_samples_name: num_samples_batch},
{self.transcripts_name: InferContext.ResultFormat.RAW},
self.batch_size, flags)
hypotheses = self.postprocess(result, input_filenames)
return hypotheses
def preemphasis(signal, coeff=0.97):
return np.append(signal[0], signal[1:] - coeff * signal[:-1])
def normalize_signal(signal, gain=None):
"""
Normalize float32 signal to [-1, 1] range
"""
if gain is None:
gain = 1.0 / (np.max(np.abs(signal)) + 1e-5)
return signal * gain
class AudioSegment(object):
"""Monaural audio segment abstraction.
:param samples: Audio samples [num_samples x num_channels].
:type samples: ndarray.float32
:param sample_rate: Audio sample rate.
:type sample_rate: int
:raises TypeError: If the sample data type is not float or int.
"""
def __init__(self, samples, sample_rate, target_sr=16000, trim=False,
trim_db=60):
"""Create audio segment from samples.
Samples are convert float32 internally, with int scaled to [-1, 1].
"""
samples = self._convert_samples_to_float32(samples)
if target_sr is not None and target_sr != sample_rate:
samples = librosa.core.resample(samples, sample_rate, target_sr)
sample_rate = target_sr
if trim:
samples, _ = librosa.effects.trim(samples, trim_db)
self._samples = samples
self._sample_rate = sample_rate
if self._samples.ndim >= 2:
self._samples = np.mean(self._samples, 1)
@staticmethod
def _convert_samples_to_float32(samples):
"""Convert sample type to float32.
Audio sample type is usually integer or float-point.
Integers will be scaled to [-1, 1] in float32.
"""
float32_samples = samples.astype('float32')
if samples.dtype in np.sctypes['int']:
bits = np.iinfo(samples.dtype).bits
float32_samples *= (1. / 2 ** (bits - 1))
elif samples.dtype in np.sctypes['float']:
pass
else:
raise TypeError("Unsupported sample type: %s." % samples.dtype)
return float32_samples
@classmethod
def from_file(cls, filename, target_sr=16000, int_values=False, offset=0,
duration=0, trim=False):
"""
Load a file supported by librosa and return as an AudioSegment.
:param filename: path of file to load
:param target_sr: the desired sample rate
:param int_values: if true, load samples as 32-bit integers
:param offset: offset in seconds when loading audio
:param duration: duration in seconds when loading audio
:return: numpy array of samples
"""
with sf.SoundFile(filename, 'r') as f:
dtype = 'int32' if int_values else 'float32'
sample_rate = f.samplerate
if offset > 0:
f.seek(int(offset * sample_rate))
if duration > 0:
samples = f.read(int(duration * sample_rate), dtype=dtype)
else:
samples = f.read(dtype=dtype)
samples = samples.transpose()
return cls(samples, sample_rate, target_sr=target_sr, trim=trim)
@property
def samples(self):
return self._samples.copy()
@property
def sample_rate(self):
return self._sample_rate
# define our clear function
def clear_screen():
_ = system('clear')

View file

@ -0,0 +1,19 @@
Copyright (c) 2017 Keith Ito
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View file

@ -0,0 +1,74 @@
""" from https://github.com/keithito/tacotron """
import re
from utils.tacotron2 import cleaners
from utils.tacotron2.symbols import symbols
# Mappings from symbol to numeric ID and vice versa:
_symbol_to_id = {s: i for i, s in enumerate(symbols)}
_id_to_symbol = {i: s for i, s in enumerate(symbols)}
# Regular expression matching text enclosed in curly braces:
_curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
def text_to_sequence(text, cleaner_names):
'''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
The text can optionally have ARPAbet sequences enclosed in curly braces embedded
in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street."
Args:
text: string to convert to a sequence
cleaner_names: names of the cleaner functions to run the text through
Returns:
List of integers corresponding to the symbols in the text
'''
sequence = []
# Check for curly braces and treat their contents as ARPAbet:
while len(text):
m = _curly_re.match(text)
if not m:
sequence += _symbols_to_sequence(_clean_text(text, cleaner_names))
break
sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
sequence += _arpabet_to_sequence(m.group(2))
text = m.group(3)
return sequence
def sequence_to_text(sequence):
'''Converts a sequence of IDs back to a string'''
result = ''
for symbol_id in sequence:
if symbol_id in _id_to_symbol:
s = _id_to_symbol[symbol_id]
# Enclose ARPAbet back in curly braces:
if len(s) > 1 and s[0] == '@':
s = '{%s}' % s[1:]
result += s
return result.replace('}{', ' ')
def _clean_text(text, cleaner_names):
for name in cleaner_names:
cleaner = getattr(cleaners, name)
if not cleaner:
raise Exception('Unknown cleaner: %s' % name)
text = cleaner(text)
return text
def _symbols_to_sequence(symbols):
return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
def _arpabet_to_sequence(text):
return _symbols_to_sequence(['@' + s for s in text.split()])
def _should_keep_symbol(s):
return s in _symbol_to_id and s is not '_' and s is not '~'

View file

@ -0,0 +1,90 @@
""" from https://github.com/keithito/tacotron """
'''
Cleaners are transformations that run over the input text at both training and eval time.
Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
hyperparameter. Some cleaners are English-specific. You'll typically want to use:
1. "english_cleaners" for English text
2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
the Unidecode library (https://pypi.python.org/pypi/Unidecode)
3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
the symbols in symbols.py to match your data).
'''
import re
from unidecode import unidecode
from .numbers import normalize_numbers
# Regular expression matching whitespace:
_whitespace_re = re.compile(r'\s+')
# List of (regular expression, replacement) pairs for abbreviations:
_abbreviations = [(re.compile('\\b%s\\.' % x[0], re.IGNORECASE), x[1]) for x in [
('mrs', 'misess'),
('mr', 'mister'),
('dr', 'doctor'),
('st', 'saint'),
('co', 'company'),
('jr', 'junior'),
('maj', 'major'),
('gen', 'general'),
('drs', 'doctors'),
('rev', 'reverend'),
('lt', 'lieutenant'),
('hon', 'honorable'),
('sgt', 'sergeant'),
('capt', 'captain'),
('esq', 'esquire'),
('ltd', 'limited'),
('col', 'colonel'),
('ft', 'fort'),
]]
def expand_abbreviations(text):
for regex, replacement in _abbreviations:
text = re.sub(regex, replacement, text)
return text
def expand_numbers(text):
return normalize_numbers(text)
def lowercase(text):
return text.lower()
def collapse_whitespace(text):
return re.sub(_whitespace_re, ' ', text)
def convert_to_ascii(text):
return unidecode(text)
def basic_cleaners(text):
'''Basic pipeline that lowercases and collapses whitespace without transliteration.'''
text = lowercase(text)
text = collapse_whitespace(text)
return text
def transliteration_cleaners(text):
'''Pipeline for non-English text that transliterates to ASCII.'''
text = convert_to_ascii(text)
text = lowercase(text)
text = collapse_whitespace(text)
return text
def english_cleaners(text):
'''Pipeline for English text, including number and abbreviation expansion.'''
text = convert_to_ascii(text)
text = lowercase(text)
text = expand_numbers(text)
text = expand_abbreviations(text)
text = collapse_whitespace(text)
return text

View file

@ -0,0 +1,65 @@
""" from https://github.com/keithito/tacotron """
import re
valid_symbols = [
'AA', 'AA0', 'AA1', 'AA2', 'AE', 'AE0', 'AE1', 'AE2', 'AH', 'AH0', 'AH1', 'AH2',
'AO', 'AO0', 'AO1', 'AO2', 'AW', 'AW0', 'AW1', 'AW2', 'AY', 'AY0', 'AY1', 'AY2',
'B', 'CH', 'D', 'DH', 'EH', 'EH0', 'EH1', 'EH2', 'ER', 'ER0', 'ER1', 'ER2', 'EY',
'EY0', 'EY1', 'EY2', 'F', 'G', 'HH', 'IH', 'IH0', 'IH1', 'IH2', 'IY', 'IY0', 'IY1',
'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OW0', 'OW1', 'OW2', 'OY', 'OY0',
'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UH0', 'UH1', 'UH2', 'UW',
'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH'
]
_valid_symbol_set = set(valid_symbols)
class CMUDict:
'''Thin wrapper around CMUDict data. http://www.speech.cs.cmu.edu/cgi-bin/cmudict'''
def __init__(self, file_or_path, keep_ambiguous=True):
if isinstance(file_or_path, str):
with open(file_or_path, encoding='latin-1') as f:
entries = _parse_cmudict(f)
else:
entries = _parse_cmudict(file_or_path)
if not keep_ambiguous:
entries = {word: pron for word, pron in entries.items() if len(pron) == 1}
self._entries = entries
def __len__(self):
return len(self._entries)
def lookup(self, word):
'''Returns list of ARPAbet pronunciations of the given word.'''
return self._entries.get(word.upper())
_alt_re = re.compile(r'\([0-9]+\)')
def _parse_cmudict(file):
cmudict = {}
for line in file:
if len(line) and (line[0] >= 'A' and line[0] <= 'Z' or line[0] == "'"):
parts = line.split(' ')
word = re.sub(_alt_re, '', parts[0])
pronunciation = _get_pronunciation(parts[1])
if pronunciation:
if word in cmudict:
cmudict[word].append(pronunciation)
else:
cmudict[word] = [pronunciation]
return cmudict
def _get_pronunciation(s):
parts = s.strip().split(' ')
for part in parts:
if part not in _valid_symbol_set:
return None
return ' '.join(parts)

View file

@ -0,0 +1,71 @@
""" from https://github.com/keithito/tacotron """
import inflect
import re
_inflect = inflect.engine()
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
_pounds_re = re.compile(r'£([0-9\,]*[0-9]+)')
_dollars_re = re.compile(r'\$([0-9\.\,]*[0-9]+)')
_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
_number_re = re.compile(r'[0-9]+')
def _remove_commas(m):
return m.group(1).replace(',', '')
def _expand_decimal_point(m):
return m.group(1).replace('.', ' point ')
def _expand_dollars(m):
match = m.group(1)
parts = match.split('.')
if len(parts) > 2:
return match + ' dollars' # Unexpected format
dollars = int(parts[0]) if parts[0] else 0
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
if dollars and cents:
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
cent_unit = 'cent' if cents == 1 else 'cents'
return '%s %s, %s %s' % (dollars, dollar_unit, cents, cent_unit)
elif dollars:
dollar_unit = 'dollar' if dollars == 1 else 'dollars'
return '%s %s' % (dollars, dollar_unit)
elif cents:
cent_unit = 'cent' if cents == 1 else 'cents'
return '%s %s' % (cents, cent_unit)
else:
return 'zero dollars'
def _expand_ordinal(m):
return _inflect.number_to_words(m.group(0))
def _expand_number(m):
num = int(m.group(0))
if num > 1000 and num < 3000:
if num == 2000:
return 'two thousand'
elif num > 2000 and num < 2010:
return 'two thousand ' + _inflect.number_to_words(num % 100)
elif num % 100 == 0:
return _inflect.number_to_words(num // 100) + ' hundred'
else:
return _inflect.number_to_words(num, andword='', zero='oh', group=2).replace(', ', ' ')
else:
return _inflect.number_to_words(num, andword='')
def normalize_numbers(text):
text = re.sub(_comma_number_re, _remove_commas, text)
text = re.sub(_pounds_re, r'\1 pounds', text)
text = re.sub(_dollars_re, _expand_dollars, text)
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
text = re.sub(_ordinal_re, _expand_ordinal, text)
text = re.sub(_number_re, _expand_number, text)
return text

View file

@ -0,0 +1,18 @@
""" from https://github.com/keithito/tacotron """
'''
Defines the set of symbols used in text input to the model.
The default is a set of ASCII characters that works well for English or text that has been run through Unidecode. For other data, you can modify _characters. See TRAINING_DATA.md for details. '''
from utils.tacotron2 import cmudict
_pad = '_'
_punctuation = '!\'(),.:;? '
_special = '-'
_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
# Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters):
_arpabet = ['@' + s for s in cmudict.valid_symbols]
# Export all symbols:
symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters) + _arpabet

View file

@ -0,0 +1 @@
jupyter lab --allow-root --ip=0.0.0.0 --no-browser speech_ai_demo.ipynb

View file

@ -0,0 +1,68 @@
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
index e598a67..562be83 100644
--- a/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
+++ b/PyTorch/SpeechRecognition/Jasper/trt/Dockerfile
@@ -1,4 +1,4 @@
-ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:19.10-py3
+ARG FROM_IMAGE_NAME=nvcr.io/nvidia/pytorch:20.01-py3
FROM ${FROM_IMAGE_NAME}
RUN apt-get update && apt-get install -y python3
@@ -6,7 +6,7 @@ RUN apt-get update && apt-get install -y python3
WORKDIR /tmp/onnx-trt
COPY trt/onnx-trt.patch .
RUN git clone https://github.com/onnx/onnx-tensorrt.git && cd onnx-tensorrt && git submodule update --init --recursive && \
- patch -f < ../onnx-trt.patch && mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
+ mkdir build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr -DGPU_ARCHS="60 70 75" && make -j16 && make install && mv -f /usr/lib/libnvonnx* /usr/lib/x86_64-linux-gnu/ && ldconfig
# Here's a good place to install pip reqs from JoC repo.
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/perf.py b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
index 426ee66..5917a1f 100755
--- a/PyTorch/SpeechRecognition/Jasper/trt/perf.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/perf.py
@@ -64,6 +64,9 @@ def main(args):
print("TRANSCRIPT: ", hypotheses)
return
+ if pyt_components['data_layer'] is None:
+ return
+
wer, preds, times = perfprocedures.compare_times_trt_pyt_exhaustive(engine,
pyt_components,
args)
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
index 0e44c7f..62e7446 100755
--- a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
+++ b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/build.sh
@@ -1,5 +1,5 @@
#!/bin/bash
# Constructs a docker image containing dependencies for execution of JASPER through TRT
-echo "docker build . -f ./trt/Dockerfile -t jasper:trt6"
-docker build . -f ./trt/Dockerfile -t jasper:trt6
+echo "docker build . -f ./trt/Dockerfile -t jasper:trt7"
+docker build . -f ./trt/Dockerfile -t jasper:trt7
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
index 9959062..ed5e711 100755
--- a/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
+++ b/PyTorch/SpeechRecognition/Jasper/trt/scripts/docker/launch.sh
@@ -40,4 +40,4 @@ nvidia-docker run -it --rm \
-v $RESULT_DIR:/results/ \
-v ${JASPER_REPO}:/jasper \
${EXTRA_JASPER_ENV} \
- jasper:trt6 bash $PROGRAM_PATH
+ jasper:trt7 bash $PROGRAM_PATH
diff --git a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
index 92460b2..01c8b6a 100644
--- a/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
+++ b/PyTorch/SpeechRecognition/Jasper/trt/trtutils.py
@@ -40,7 +40,7 @@ def build_engine_from_parser(args):
'''
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if args.verbose else trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(TRT_LOGGER)
- builder.max_batch_size = 64
+ builder.max_batch_size = 16
if args.trt_fp16:
builder.fp16_mode = True

View file

@ -0,0 +1,44 @@
name: "bert-onnx"
platform: "onnxruntime_onnx"
max_batch_size: 8
input [
{
name: "input__0"
data_type: TYPE_INT64
dims: [384]
},
{
name: "input__1"
data_type: TYPE_INT64
dims: [384]
},
{
name: "input__2"
data_type: TYPE_INT64
dims: [384]
}
]
output [
{
name: "output__0"
data_type: TYPE_FP16
dims: [384]
},
{
name: "output__1"
data_type: TYPE_FP16
dims: [384]
}
]
optimization {
cuda {
graphs: 0
}
}
instance_group [
{
count: 1
kind: KIND_GPU
gpus: [ 0 ]
}
]

View file

@ -0,0 +1,45 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
default_model_filename: "jasper-decoder.pt"
name: "jasper-decoder"
platform: "pytorch_libtorch"
max_batch_size: 16
input [
{
name: "CLASS_LOGITS__0"
data_type: TYPE_FP32
dims: [ -1, 29 ]
}
]
output [
{
name: "CANDIDATE_TRANSCRIPT__0"
data_type: TYPE_INT32
dims: [ -1]
}
]

View file

@ -0,0 +1,32 @@
name: "jasper-feature-extractor"
platform: "pytorch_libtorch"
default_model_filename: "jasper-feature-extractor.pt"
max_batch_size: 16
input [ {
name: "AUDIO_SIGNAL__0"
data_type: TYPE_FP32
dims: [ -1 ]
},
{
name: "NUM_SAMPLES__1"
data_type: TYPE_INT32
dims: [ 1 ]
reshape { shape: [] }
}
]
output [
{
name: "AUDIO_FEATURES__0"
data_type: TYPE_FP32
dims: [64, -1]
}
,
{
name: "NUM_TIME_STEPS__1"
data_type: TYPE_INT32
dims: [ 1 ]
reshape: { shape: [] }
}
]

View file

@ -0,0 +1,60 @@
name: "jasper-trt-ensemble"
platform: "ensemble"
max_batch_size: 1
input {
name: "AUDIO_SIGNAL"
data_type: TYPE_FP32
dims: -1
}
input {
name: "NUM_SAMPLES"
data_type: TYPE_INT32
dims: [ 1 ]
}
output {
name: "TRANSCRIPT"
data_type: TYPE_INT32
dims: [-1]
}
ensemble_scheduling {
step {
model_name: "jasper-feature-extractor"
model_version: -1
input_map {
key: "AUDIO_SIGNAL__0"
value: "AUDIO_SIGNAL"
}
input_map {
key: "NUM_SAMPLES__1"
value: "NUM_SAMPLES"
}
output_map {
key: "AUDIO_FEATURES__0"
value: "AUDIO_FEATURES"
}
}
step {
model_name: "jasper-trt"
model_version: -1
input_map {
key: "FEATURES"
value: "AUDIO_FEATURES"
}
output_map {
key: "LOGITS"
value: "CHARACTER_PROBABILITIES"
}
}
step {
model_name: "jasper-decoder"
model_version: -1
input_map {
key: "CLASS_LOGITS__0"
value: "CHARACTER_PROBABILITIES"
}
output_map {
key: "CANDIDATE_TRANSCRIPT__0"
value: "TRANSCRIPT"
}
}
}

View file

@ -0,0 +1,52 @@
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
name: "jasper-trt"
platform: "tensorrt_plan"
default_model_filename: "jasper_fp16.engine"
max_batch_size: 16
input [
{
name: "FEATURES"
data_type: TYPE_FP32
dims: [64, -1]
}
]
output [
{
name: "LOGITS"
data_type: TYPE_FP32
dims: [-1, 29 ]
}
]
cc_model_filenames: [
{ key: "7.0"
value: "jasper_fp16.engine"}
]

View file

@ -0,0 +1,31 @@
name: "tacotron2"
platform: "pytorch_libtorch"
default_model_filename: "tacotron2_fp16.pt"
max_batch_size: 8
input [
{
name: "sequence__0"
data_type: TYPE_INT64
dims: [-1]
},
{
name: "input_lengths__1"
data_type: TYPE_INT64
dims: [1]
reshape: { shape: [ ] }
}
]
output [
{
name: "mel_outputs_postnet__0"
data_type: TYPE_FP16
dims: [80,-1]
},
{
name: "mel_lengths__1"
data_type: TYPE_INT32
dims: [1]
reshape: { shape: [ ] }
}
]

View file

@ -0,0 +1,21 @@
name: "waveglow-trt"
platform: "tensorrt_plan"
default_model_filename: "waveglow_fp16.engine"
max_batch_size: 1
input {
name: "mel"
data_type: TYPE_FP16
dims: [80, -1, 1]
}
input {
name: "z"
data_type: TYPE_FP16
dims: [8, -1, 1]
}
output {
name: "audio"
data_type: TYPE_FP16
dims: [-1]
}

View file

@ -123,9 +123,10 @@
"def sequence_to_mel(sequence):\n",
" ''' calls tacotron2\n",
" ::sequence:: int64 numpy array, contains the preprocessed text\n",
" ::returns:: (mel, mel_lengths) pair\n",
" ::returns:: (mel, mel_lengths, alignments) tuple\n",
" mel is the mel-spectrogram, np.array\n",
" mel_lengths contains the length of the unpadded mel, np.array\n",
" alignments contains attention weigths, np.array\n",
" '''\n",
" input_lengths = [len(sequence)]\n",
" input_lengths = np.array(input_lengths, dtype=np.int64)\n",
@ -136,13 +137,15 @@
" output_dict = {}\n",
" output_dict['mel_outputs_postnet__0'] = InferContext.ResultFormat.RAW\n",
" output_dict['mel_lengths__1'] = InferContext.ResultFormat.RAW\n",
" output_dict['alignments__2'] = InferContext.ResultFormat.RAW\n",
" batch_size = 1\n",
" # call tacotron2\n",
" result = infer_ctx_tacotron2.run(input_dict, output_dict, batch_size)\n",
" # get results\n",
" mel = result['mel_outputs_postnet__0'][0] # take only the first instance in the output batch\n",
" mel_lengths = result['mel_lengths__1'][0] # take only the first instance in the output batch\n",
" return mel, mel_lengths\n",
" alignments = result['alignments__2'][0] # take only the first instance in the output batch\n",
" return mel, mel_lengths, alignments\n",
"\n",
"\n",
"def force_to_shape(mel, length):\n",
@ -239,7 +242,7 @@
" with plot_text_area_preprocessed:\n",
" display_heatmap(sequence)\n",
" # run tacotron2\n",
" mel, mel_lengths = sequence_to_mel(sequence)\n",
" mel, mel_lengths, alignments = sequence_to_mel(sequence)\n",
" with plot_spectrogram:\n",
" display_spectrogram(mel, change['new'])\n",
" # run waveglow\n",

View file

@ -685,4 +685,4 @@ class Tacotron2(nn.Module):
mel_outputs_postnet = self.postnet(mel_outputs)
mel_outputs_postnet = mel_outputs + mel_outputs_postnet
return mel_outputs_postnet, mel_lengths
return mel_outputs_postnet, mel_lengths, alignments

View file

@ -187,7 +187,7 @@ def main():
with torch.no_grad():
with MeasureTime(measurements, "latency"):
with MeasureTime(measurements, "tacotron2_latency"):
mel, mel_lengths = tacotron2.infer(sequences_padded, input_lengths)
mel, mel_lengths, _ = tacotron2.infer(sequences_padded, input_lengths)
with MeasureTime(measurements, "waveglow_latency"):
audios = waveglow.infer(mel, sigma=args.sigma_infer)

View file

@ -81,8 +81,6 @@ def parse_args(parser):
help='Number of epochs per checkpoint')
training.add_argument('--checkpoint-path', type=str, default='',
help='Checkpoint path to resume training')
training.add_argument('--seed', type=int, default=1234,
help='Seed for PyTorch random number generators')
training.add_argument('--dynamic-loss-scaling', type=bool, default=True,
help='Enable dynamic loss scaling')
training.add_argument('--amp-run', action='store_true',
@ -196,12 +194,13 @@ def save_checkpoint(model, optimizer, epoch, config, amp_run, filepath):
torch.save(checkpoint, filepath)
def load_checkpoint(model, optimizer, epoch, config, amp_run, filepath):
def load_checkpoint(model, optimizer, epoch, config, amp_run, filepath, rank):
checkpoint = torch.load(filepath, map_location='cpu')
epoch[0] = checkpoint['epoch']+1
torch.cuda.set_rng_state_all(checkpoint['cuda_rng_state_all'])
device_id = rank % torch.cuda.device_count()
torch.cuda.set_rng_state(checkpoint['cuda_rng_state_all'][device_id])
torch.random.set_rng_state(checkpoint['random_rng_state'])
config = checkpoint['config']
model.load_state_dict(checkpoint['state_dict'])
@ -355,7 +354,7 @@ def main():
if args.checkpoint_path is not "":
load_checkpoint(model, optimizer, start_epoch, model_config,
args.amp_run, args.checkpoint_path)
args.amp_run, args.checkpoint_path, local_rank)
start_epoch = start_epoch[0]
@ -475,7 +474,7 @@ def main():
DLLogger.log(step=(epoch,), data={'train_loss': (train_epoch_avg_loss/num_iters if num_iters > 0 else 0.0)})
DLLogger.log(step=(epoch,), data={'train_epoch_time': epoch_time})
val_loss = validate(model, criterion, valset, epoch, i,
val_loss = validate(model, criterion, valset, epoch, iteration,
args.batch_size, world_size, collate_fn,
distributed_run, local_rank, batch_to_gpu)

View file

@ -52,13 +52,13 @@ NVIDIA TensorRT is a platform for high-performance deep learning inference. It i
```bash
mkdir -p output
python exports/export_tacotron2_onnx.py --tacotron2 ./checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/
python exports/export_tacotron2_onnx.py --tacotron2 ./checkpoints/nvidia_tacotron2pyt_fp16_20190427 -o output/ --fp16
```
Export WaveGlow to ONNX IR:
```bash
python exports/export_waveglow_onnx.py --waveglow ./checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 -o output/
python exports/export_waveglow_onnx.py --waveglow ./checkpoints/nvidia_waveglow256pyt_fp16 --wn-channels 256 -o output/ --fp16
```
After running the above commands, there should be four new ONNX files in `./output/` directory:
@ -76,7 +76,7 @@ NVIDIA TensorRT is a platform for high-performance deep learning inference. It i
8. Run TTS inference pipeline with fp16:
```bash
python trt/inference_trt.py -i phrases/phrase.txt --encoder output/encoder_fp16.engine --decoder output/decoder_iter_fp16.engine --postnet output/postnet_fp16.engine --waveglow output/waveglow_fp16.engine -o output/
python trt/inference_trt.py -i phrases/phrase.txt --encoder output/encoder_fp16.engine --decoder output/decoder_iter_fp16.engine --postnet output/postnet_fp16.engine --waveglow output/waveglow_fp16.engine -o output/ --fp16
```
## Inference performance: NVIDIA T4
@ -85,5 +85,5 @@ Our results were obtained by running the `./trt/run_latency_tests_trt.sh` script
|Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)|Latency confidence interval 90% (s)|Latency confidence interval 95% (s)|Latency confidence interval 99% (s)|Throughput (samples/sec)|Speed-up PyT+TRT/TRT|Avg mels generated (81 mels=1 sec of speech)|Avg audio length (s)|Avg RTF|
|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|---:|
|PyT+TRT|1| 128| FP16| 1.14| 0.02| 1.16| 1.16| 1.21| 137,050| 1.45| 611| 7.09| 6.20|
|PyT+TRT|1| 128| FP16| 1.13| 0.01| 1.15| 1.15| 1.16| 137,738| 1.45| 608| 7.06| 6.24|
|PyT |1| 128| FP16| 1.63| 0.07| 1.71| 1.73| 1.81| 94,758| 1.00| 601| 6.98| 4.30|

View file

@ -71,12 +71,16 @@ def parse_args(parser):
help='Sampling rate')
parser.add_argument('--stft-hop-length', type=int, default=256,
help='STFT hop length for estimating audio length from mel size')
parser.add_argument('--fp16', action='store_true',
help='inference with FP16')
return parser
def init_decoder_inputs(memory, processed_memory, memory_lengths):
device = memory.device
dtype = memory.dtype
bs = memory.size(0)
seq_len = memory.size(1)
attention_rnn_dim = 1024
@ -84,15 +88,15 @@ def init_decoder_inputs(memory, processed_memory, memory_lengths):
encoder_embedding_dim = 512
n_mel_channels = 80
attention_hidden = torch.zeros(bs, attention_rnn_dim).cuda().float()
attention_cell = torch.zeros(bs, attention_rnn_dim).cuda().float()
decoder_hidden = torch.zeros(bs, decoder_rnn_dim).cuda().float()
decoder_cell = torch.zeros(bs, decoder_rnn_dim).cuda().float()
attention_weights = torch.zeros(bs, seq_len).cuda().float()
attention_weights_cum = torch.zeros(bs, seq_len).cuda().float()
attention_context = torch.zeros(bs, encoder_embedding_dim).cuda().float()
mask = get_mask_from_lengths(memory_lengths).cuda()
decoder_input = torch.zeros(bs, n_mel_channels).cuda().float()
attention_hidden = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
attention_cell = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
decoder_hidden = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
decoder_cell = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
attention_weights = torch.zeros(bs, seq_len, device=device, dtype=dtype)
attention_weights_cum = torch.zeros(bs, seq_len, device=device, dtype=dtype)
attention_context = torch.zeros(bs, encoder_embedding_dim, device=device, dtype=dtype)
mask = get_mask_from_lengths(memory_lengths).to(device)
decoder_input = torch.zeros(bs, n_mel_channels, device=device, dtype=dtype)
return (decoder_input, attention_hidden, attention_cell, decoder_hidden,
decoder_cell, attention_weights, attention_weights_cum,
@ -100,6 +104,8 @@ def init_decoder_inputs(memory, processed_memory, memory_lengths):
def init_decoder_outputs(memory, memory_lengths):
device = memory.device
dtype = memory.dtype
bs = memory.size(0)
seq_len = memory.size(1)
attention_rnn_dim = 1024
@ -107,15 +113,15 @@ def init_decoder_outputs(memory, memory_lengths):
encoder_embedding_dim = 512
n_mel_channels = 80
attention_hidden = torch.zeros(bs, attention_rnn_dim).cuda().float()
attention_cell = torch.zeros(bs, attention_rnn_dim).cuda().float()
decoder_hidden = torch.zeros(bs, decoder_rnn_dim).cuda().float()
decoder_cell = torch.zeros(bs, decoder_rnn_dim).cuda().float()
attention_weights = torch.zeros(bs, seq_len).cuda().float()
attention_weights_cum = torch.zeros(bs, seq_len).cuda().float()
attention_context = torch.zeros(bs, encoder_embedding_dim).cuda().float()
decoder_output = torch.zeros(bs, n_mel_channels).cuda().float()
gate_prediction = torch.zeros(bs, 1).cuda().float()
attention_hidden = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
attention_cell = torch.zeros(bs, attention_rnn_dim, device=device, dtype=dtype)
decoder_hidden = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
decoder_cell = torch.zeros(bs, decoder_rnn_dim, device=device, dtype=dtype)
attention_weights = torch.zeros(bs, seq_len, device=device, dtype=dtype)
attention_weights_cum = torch.zeros(bs, seq_len, device=device, dtype=dtype)
attention_context = torch.zeros(bs, encoder_embedding_dim, device=device, dtype=dtype)
decoder_output = torch.zeros(bs, n_mel_channels, device=device, dtype=dtype)
gate_prediction = torch.zeros(bs, 1, device=device, dtype=dtype)
return (attention_hidden, attention_cell, decoder_hidden,
decoder_cell, attention_weights, attention_weights_cum,
@ -178,10 +184,15 @@ def swap_inputs_outputs(decoder_inputs, decoder_outputs):
def infer_tacotron2_trt(encoder, decoder_iter, postnet,
encoder_context, decoder_context, postnet_context,
sequences, sequence_lengths, measurements):
sequences, sequence_lengths, measurements, fp16):
memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],512)).cuda().float()
processed_memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],128)).cuda().float()
memory = torch.zeros((len(sequence_lengths), sequence_lengths[0], 512)).cuda()
if fp16:
memory = memory.half()
device = memory.device
dtype = memory.dtype
processed_memory = torch.zeros((len(sequence_lengths),sequence_lengths[0],128), device=device, dtype=dtype)
lens = torch.zeros_like(sequence_lengths)
encoder_tensors = {
@ -237,7 +248,7 @@ def infer_tacotron2_trt(encoder, decoder_iter, postnet,
decoder_inputs, decoder_outputs = swap_inputs_outputs(decoder_inputs, decoder_outputs)
mel_outputs_postnet = torch.zeros_like(mel_outputs).cuda().float()
mel_outputs_postnet = torch.zeros_like(mel_outputs, device=device, dtype=dtype)
postnet_tensors = {
# inputs
@ -254,7 +265,7 @@ def infer_tacotron2_trt(encoder, decoder_iter, postnet,
return mel_outputs_postnet, mel_lengths
def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements):
def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, fp16):
mel = mel.unsqueeze(3)
mel_size = mel.size(2)
@ -268,7 +279,7 @@ def infer_waveglow_trt(waveglow, waveglow_context, mel, measurements):
z = torch.randn(batch_size, n_group, z_size, 1).cuda()
audios = torch.zeros(batch_size, mel_size*stride).cuda()
if "HALF" in str(waveglow.get_binding_dtype(waveglow.get_binding_index("mel"))):
if fp16:
z = z.half()
mel = mel.half()
audios = audios.half()
@ -337,8 +348,8 @@ def main():
with MeasureTime(measurements, "latency"):
mel, mel_lengths = infer_tacotron2_trt(encoder, decoder_iter, postnet,
encoder_context, decoder_context, postnet_context,
sequences, sequence_lengths, measurements)
audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements)
sequences, sequence_lengths, measurements, args.fp16)
audios = infer_waveglow_trt(waveglow, waveglow_context, mel, measurements, args.fp16)
with encoder_context, decoder_context, postnet_context, waveglow_context:
pass
@ -365,7 +376,7 @@ def main():
DLLogger.log(step=0, data={"denoiser": measurements['denoiser']})
DLLogger.flush()
prec = "fp16" if "fp16" in args.encoder else "fp32"
prec = "fp16" if args.fp16 else "fp32"
latency = measurements['latency']
throughput = audios.size(1)/latency
log_data = "1,"+str(sequence_lengths[0].item())+","+prec+","+str(latency)+","+str(throughput)+","+str(mel_lengths[0].item())+"\n"

View file

@ -68,8 +68,9 @@ def engine_info(engine_filepath):
dims: {dims}
}}"""
type_mapping = {"DataType.HALF": "TYPE_FP16",
"DataType.FLOAT": "TYPE_FP32",
"DataType.INT32": "TYPE_INT32"}
"DataType.FLOAT": "TYPE_FP32",
"DataType.INT32": "TYPE_INT32",
"DataType.BOOL" : "TYPE_BOOL"}
print("engine name", engine.name)
print("has_implicit_batch_dimension", engine.has_implicit_batch_dimension)

View file

@ -0,0 +1,36 @@
BasedOnStyle: LLVM
IndentWidth: 2
ColumnLimit: 80
ContinuationIndentWidth: 4
BinPackParameters: false
BinPackArguments: false
AllowAllParametersOfDeclarationOnNextLine: true
BreakBeforeBraces: Custom
BraceWrapping:
AfterEnum: true
AfterStruct: true
AfterClass: true
AfterControlStatement: false
AfterFunction: true
AfterNamespace: true
AfterUnion: true
AfterExternBlock: false
BeforeElse: false
IndentBraces: false
SplitEmptyRecord: true
# BreakInheritanceList: AfterColon
DerivePointerAlignment: false
PointerAlignment: Left
AlignAfterOpenBracket: AlwaysBreak
NamespaceIndentation: None
SpaceBeforeParens: true
# SpaceBeforeRangeBasedForLoopColon: true
# SpaceBeforeInheritanceColon: true
AlwaysBreakTemplateDeclarations: true
BreakBeforeBinaryOperators: true
AllowShortBlocksOnASingleLine: false
AllowShortFunctionsOnASingleLine: false
AllowShortLoopsOnASingleLine: false
AllowShortIfStatementsOnASingleLine: false
BreakConstructorInitializers: AfterColon
ConstructorInitializerAllOnOneLineOrOnePerLine: true

View file

@ -0,0 +1,18 @@
*.swp
*.swo
*.swn
*.swm
*.pyc
*.csv
*.wav
test.json
__pycache__
build
models
engines
logs
audio
mels
Makefile
trtis_sdk
sampleTacotron2WaveGlow

View file

@ -0,0 +1,60 @@
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
cmake_minimum_required(VERSION 3.8 FATAL_ERROR)
project(tacotron2_inference LANGUAGES CXX CUDA)
if (DEFINED DEVEL AND NOT DEVEL EQUAL 0)
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
# g++ warnings
set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Wall")
set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Werror")
set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Wpedantic")
set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Weffc++")
set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -Wextra")
set(CPP_DEVEL_FLAGS "${CPP_DEVEL_FLAGS} -DDEVEL=1")
# nvcc warnings
set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Wall")
set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Werror")
set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Weffc++")
set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-Wextra")
set(CUDA_DEVEL_FLAGS "${CUDA_DEVEL_FLAGS} -Xcompiler=-DDEVEL=1")
endif()
endif()
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CUDA_STANDARD 11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CPP_DEVEL_FLAGS} -fPIC")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_DEVEL_FLAGS} -rdc=true -Xcompiler=-fPIC")
enable_testing()
add_subdirectory("src")

View file

@ -0,0 +1,15 @@
FROM nvcr.io/nvidia/pytorch:20.01-py3
# Make sure we have all needed modules
RUN python3 -c "import torch; import onnx; import scipy; import numpy; import librosa"
WORKDIR "/workspace"
ADD ./tacotron2 ./tacotron2
ADD ./waveglow ./waveglow
ADD ./common ./common
ADD ./trtis_cpp/scripts ./trtis_cpp/scripts
WORKDIR "/workspace/trtis_cpp"
ENTRYPOINT ["/bin/bash", "-c"]

View file

@ -0,0 +1,37 @@
ARG TRTIS_IMAGE=nvcr.io/nvidia/tensorrtserver:20.02-py3
FROM ${TRTIS_IMAGE}
RUN mkdir -p /workspace/trt-tacotron2-waveglow
WORKDIR /workspace/trt-tacotron2-waveglow
# Download custom backend SDK
RUN wget https://github.com/NVIDIA/tensorrt-inference-server/releases/download/v1.11.0/v1.11.0_ubuntu1804.custombackend.tar.gz
RUN tar xf v1.11.0_ubuntu1804.custombackend.tar.gz && mv custom-backend-sdk ./trtis_sdk
# install cmake
RUN apt-get update && apt-get install -qy cmake && apt-get clean
# build the source code
ADD src/ "./src"
ADD CMakeLists.txt "./"
ADD configure "./"
RUN ./configure --trtis
RUN make
ARG TACOTRON2_MODEL="tacotron.json"
ARG WAVEGLOW_MODEL="waveglow.onnx"
ARG DENOISER_MODEL="denoiser.json"
RUN mkdir -p "/models" "/engines"
ADD "${TACOTRON2_MODEL}" /models/
ADD "${WAVEGLOW_MODEL}" /models/
ADD "${DENOISER_MODEL}" /models/
ADD model-config/tacotron2waveglow /models/tacotron2waveglow
RUN mkdir -p /models/tacotron2waveglow/1
RUN cp -v "./build/lib/libtt2i_trtis.so" /models/tacotron2waveglow/1/
ADD scripts "./scripts"

View file

@ -0,0 +1,247 @@
# Tacotron2+WaveGlow Inference Using TensorRT Inference Server with TensorRT
This is a subfolder of the Tacotron2 for PyTorch repository that provides
scripts to deploy high-performance inference using NVIDIA TensorRT Inference
Server with a custom TensorRT
[backend](https://docs.nvidia.com/deeplearning/sdk/tensorrt-inference-server-guide/docs/build.html#building-a-custom-backend).
## Table of contents
* [Model overview](#model-overview)
- [Tacotron2 plugins](#tacotron2-plugins)
* [Setup](#setup)
- [Requirements](#requirements)
* [Quick Start Guide](#quick-start-guide)
- [Export the models](#export-the-models)
- [Setup the TRTIS server](#setup-the-trtis-server)
- [Setup the TRTIS client](#setup-the-trtis-client)
- [Starting the TRTIS server](#starting-the-trtis-server)
- [Running the TRTIS client](#running-the-trtis-client)
* [Advanced](#advanced)
- [Code structure](#code-structure)
- [Precision](#precision)
* [Performance](#performance)
- [Performance on NVIDIA T4](#performance-on-nvidia-t4)
- [Running the benchmark](#running-the-benchmark)
## Model overview
The Tacotron2 and WaveGlow models form a text-to-speech system that enables
users to synthesize natural sounding speech from raw transcripts without any
additional information such as patterns and/or rhythms of speech.
In this implementation, the Tacotron2 network is split into three sub-networks,
the encoder, decoder, and postnet.
This is followed by WaveGlow as a vocoder, and a Denoiser network using a
[STFT](https://en.wikipedia.org/wiki/Short-time_Fourier_transform)
to remove noise from the audio output.
More information on the Tacotron2 and WaveGlow architectures can be found in
[Tacotron2 PyTorch README](../README.md), as well as information about
training.
### Tacotron2 plugins
Because the size of the layers in Tacotron2's decoder, are quite small, many
deep learning frameworks fail achieve high throughput for a batch size of one,
as the overhead
associated with executing each of these small layers can dominate the runtime.
TensorRT supports custom layers through its
[plugin](https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#pluginv2-layer)
interface, which not only allows custom operations, but also allows
developers to manually tune and/or fuse specific layers in their
networks while still using TensorRT to perform automated optimizations on the
other layers, and to manage and execute the entire network.
This implementation uses several plugins for Tacotron2's decoder,
including fusing layers of the Prenet and Attention, as well as creating LSTM
Cell kernels optimized specifically for the dimensions used in Tacotron2.
## Setup
### Requirements
Building and running the container requires `docker`, `nvidia-docker` and `bash`.
In addition to this, the host machine must have a Volta or Turing based GPU.
## Quick Start Guide
### Clone the repository
```bash
git clone https://github.com/NVIDIA/DeepLearningExamples
cd DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/trtis_cpp
```
### Export the models
You can either train models yourself, or download pretrained checkpoints from [NGC](https://ngc.nvidia.com/catalog/models) and copy them to the `./checkpoints` directory:
- [Tacotron2 checkpoint](https://ngc.nvidia.com/models/nvidia:tacotron2pyt_fp16)
- [WaveGlow checkpoint](https://ngc.nvidia.com/models/nvidia:waveglow256pyt_fp16)
```bash
mkdir checkpoints
cp <Tacotron2_checkpoint> ./checkpoints/
cp <WaveGlow_checkpoint> ./checkpoints/
```
Next you will need to export the PyTorch checkpoints so that they can be used to build TensorRT engines. This can be done via the script `export_weights.sh` script:
```bash
mkdir models
./export_weights.sh checkpoints/nvidia_tacotron2pyt_fp16_20190427 checkpoints/nvidia_waveglow256pyt_fp16 models/
```
### Setup the TRTIS server
```bash
./build_trtis.sh models/tacotron2.json models/waveglow.onnx models/denoiser.json
```
This will take some time as TensorRT tries out different tactics for best
performance while building the engines.
### Setup the TRTIS client
Next you need to build the client docker container. To do this, enter the
`trtis_client` directory and run the script `build_trtis_client.sh`.
```bash
cd trtis_client
./build_trtis_client.sh
cd ..
```
### Run the TRTIS server
To run the server locally, use the script `run_trtis_server.sh`:
```bash
./run_trtis_server.sh
```
You can use the environment variable `NVIDIA_VISIBLE_DEVICES` to set which GPUs
the TRTIS server sees.
### Run the TRTIS client
Leave the server running. In another terminal, type:
```bash
cd trtis_client/
./run_trtis_client.sh phrases.txt
```
This will generate one WAV file per line in the file `phrases.txt`, named after
the line number (e.g., 1.wav through 8.wav for a 8 line file) in the `audio/`
directory. It is
important that each line in the file end with a period, or Tacotron2 may fail
to detect the end of the phrase.
## Advanced
### Code structure
The `src/` contains the following sub-directories:
* `trtis`: The directory containing code for the custom TRTIS backend.
* `trt/tacotron2`: The directory containing the Tacotron2 implementation in TensorRT.
* `trt/waveglow`: The directory containing the WaveGlow implementation in TensorRT.
* `trt/denoiser`: The directory containing the Denoiser (STFT) implementation in TensorRT.
* `trt/plugins`: The directory containing plugins used by the TensorRT engines.
* `trt/helpers`: The directory containing scripts for exporting models from
PyTorch.
The `trtis_client/` directory contains the code for running the client.
### Precision
By default the `./build_trtis.sh` script builds the TensorRT engines with FP16 mode enabled, which allows some operations to be performed in lower precision, in order to increase throughput. To use engines with only FP32 precision, add `0` to `./build_trtis.sh`s arguments:
```bash
./build_trtis.sh models/tacotron2.json models/waveglow.onnx models/denoiser.json 0
```
## Performance
The following tables show inference statistics for the Tacotron2 and WaveGlow
text-to-speech system.
The tables include average latency, latency standard deviation,
and latency confidence intervals. Throughput is measured as the number of
generated audio samples per second. RTF is the real-time factor which
tells how many seconds of speech are generated in 1 second of processing time.
For all tests in these tables, we used WaveGlow with 256 residual channels.
### Performance on NVIDIA T4
#### TensorRT \w Plugins in TRTIS
Latency in this table is measured from the client sending the request, to it
receiving back the generated audio.
|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)| Latency interval 90% (s)|Latency interval 95% (s)|Latency interval 99% (s)|Avg mels generated |Avg audio length (s)|Avg RTF|
|---:|----:|-----:|------:|------:|------:|------:|------:|----:|------:|-------:|
| 1 | 128 | FP16 | 0.49 | 0.00 | 0.49 | 0.49 | 0.50 | 564 | 6.59 | 13.48 |
| 4 | 128 | FP16 | 1.37 | 0.01 | 1.38 | 1.38 | 1.38 | 563 | 6.54 | 4.77 |
| 1 | 128 | FP32 | 1.30 | 0.01 | 1.30 | 1.30 | 1.31 | 567 | 6.58 | 5.08 |
| 4 | 128 | FP32 | 3.63 | 0.01 | 3.64 | 3.64 | 3.64 | 568 | 6.59 | 1.82 |
To reproduce this table, see [Running the benchmark](#running-the-benchmark)
below.
#### TensorRT \w Plugins vs. PyTorch
Latency in this table is measured from just before the input sequence starts
being copied from host memory to the GPU,
to just after the generated audio finishes being copied back to the host
memory.
That is, what is taking place in the custom backend inside of TRTIS.
|Framework|Batch size|Input length|Precision|Avg latency (s)|Latency std (s)| Latency interval 90% (s)|Latency interval 95% (s)|Latency interval 99% (s)| Throughput (samples/sec) | Speed-up vs. PyT FP32 | Speed-up vs. PyT FP16 | Avg mels generated |Avg audio length (s)|Avg RTF|
|------:|----:|-----:|-----------:|--------:|------:|------:|------:|------:|------:|------:|----:|------:|-------:|---:|
| TRT \w plugins | 1 | 128 | FP16 | 0.45 | 0.00 | 0.45 | 0.45 | 0.46 | 320,950 | __3.72x__ | __3.39x__ | 564 | 6.55 | 14.59 |
| TRT \w plugins | 1 | 128 | FP32 | 1.26 | 0.01 | 1.27 | 1.27 | 1.27 | 115,150 | __1.33x__ | __1.21x__ | 567 | 6.58 | 5.22 |
| PyTorch | 1 | 128 | FP16 | 1.63 | 0.07 | 1.71 | 1.73 | 1.81 | 94,758 | __1.10x__ | __1.00x__ | 601 | 6.98 | 4.30 |
| PyTorch | 1 | 128 | FP32 | 1.77 | 0.08 | 1.88 | 1.92 | 2.00 | 86,705 | __1.00x__ | __0.91x__ | 600 | 6.96 | 3.92 |
That is a __3.72x__ speedup when using TensorRT FP16 with plugins when compared to
PyTorch FP32, and still a __3.39x__ speedup when compared to PyTorch FP16.
The TensorRT entries in this table can be reproduced by using the output of
the TRTIS server, when performing the steps for [Running the
benchmark](#running-the-benchmark) below.
The PyTorch entries can be reproduced by following the instructions
[here](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/Tacotron2).
### Running the benchmark
Once you have performed the steps in [Setup the TRTIS server](#setup-the-trtis-server) and
[Setup the TRTIS client](#setup-the-trtis-client), you can run the benchmark by starting the TRTIS server via:
```bash
./run_trtis_server.sh
```
Leave the server running, and in another terminal run the script `trtis_client/run_trtis_benchmark_client.sh`:
```bash
cd trtis_client/
./run_trtis_benchmark_client.sh <batch size>
```
Replace <batch size> with the desired batch size between 1 and 32. The engines are built with a maximum batch size of 32 in the `./build_trtis.sh` script.
After some time this should produce output like:
```
Performed 1000 runs.
batch size = 1
input size = 128
avg latency (s) = 0.485718
latency std (s) = 0.00448834
latency interval 50% (s) = 0.485836
latency interval 90% (s) = 0.489517
latency interval 95% (s) = 0.490613
latency interval 99% (s) = 0.494721
average mels generated = 564
average audio generated (s) = 6.54803
average real-time factor = 13.4811
```

View file

@ -0,0 +1,79 @@
#!/bin/bash
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-all}"
IMAGE_NAME="trt-tacotron2-waveglow.trtis"
CONTAINER_NAME="trt-tacotron2-waveglow.trtis.container"
die() {
echo "ERROR: ${@}" 1>&2
exit 1
}
if [[ $# != 4 && $# != 3 ]]; then
echo "Unexpected number of arguments: $#"
echo "USAGE:"
echo "\t${0} <tacotron2 model> <waveglow model> <denoiser model> [use amp 0/1]"
exit 1
fi
# remove container if it exists
if [[ "$(docker ps -f "name=${CONTAINER_NAME}" -qa | wc -l)" != "0" ]]; then
docker rm "${CONTAINER_NAME}"
fi
TACOTRON2_MODEL="${1}"
WAVEGLOW_MODEL="${2}"
DENOISER_MODEL="${3}"
AMP="${4:-1}"
# copy models to build context
mkdir -p tmp/
cp -v "${TACOTRON2_MODEL}" tmp/tacotron2.json && TACOTRON2_MODEL="tmp/tacotron2.json" || die "Failed to copy ${TACOTRON2_MODEL}"
cp -v "${WAVEGLOW_MODEL}" tmp/waveglow.onnx && WAVEGLOW_MODEL="tmp/waveglow.onnx" || die "Failed to copy ${WAVEGLOW_MODEL}"
cp -v "${DENOISER_MODEL}" tmp/denoiser.json && DENOISER_MODEL="tmp/denoiser.json" || die "Failed to copy ${DENOISER_MODEL}"
docker build \
--build-arg TACOTRON2_MODEL="${TACOTRON2_MODEL}" \
--build-arg WAVEGLOW_MODEL="${WAVEGLOW_MODEL}" \
--build-arg DENOISER_MODEL="${DENOISER_MODEL}" \
-f Dockerfile.trtis . -t "${IMAGE_NAME}" || die "Failed to build docker container."
nvidia-docker run \
-e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
--name "${CONTAINER_NAME}" \
"${IMAGE_NAME}" "./scripts/build_engines.sh" "${AMP}" || die "Failed to build engines."
docker commit "${CONTAINER_NAME}" "${IMAGE_NAME}" || die "Failed commit changes."
docker rm "${CONTAINER_NAME}"

View file

@ -0,0 +1,60 @@
#!/bin/bash
BUILD_DIR="build/"
MAKEFILE="./Makefile"
OPTIONS="-DCMAKE_VERBOSE_MAKEFILE=1"
DEBUG=0
while [[ $# -gt 0 ]]; do
flag="${1}"
case "${flag}" in
--cc=*)
OPTIONS="${OPTIONS} -DCMAKE_C_COMPILER=${flag#*=}"
;;
--cxx=*)
OPTIONS="${OPTIONS} -DCMAKE_CXX_COMPILER=${flag#*=}"
;;
--devel)
OPTIONS="${OPTIONS} -DDEVEL=1"
;;
--debug)
DEBUG=1
;;
--trtis)
OPTIONS="${OPTIONS} -DBUILD_TRTIS=1"
;;
*)
echo "Unknown argument '${flag}'."
exit 1
;;
esac
shift
done
if [[ "${DEBUG}" == "1" ]]; then
OPTIONS="${OPTIONS} -DCMAKE_BUILD_TYPE=Debug"
else
OPTIONS="${OPTIONS} -DCMAKE_BUILD_TYPE=Release"
fi
if [[ -d "${BUILD_DIR}" ]]; then
rm -rf "${BUILD_DIR}"
fi
mkdir -p "${BUILD_DIR}"
pushd "${BUILD_DIR}"
cmake --version
echo "cmake .. ${OPTIONS}"
cmake .. ${OPTIONS}
popd
echo "# Auto generated makefile" > "${MAKEFILE}"
echo "" >> "${MAKEFILE}"
echo "all test:" >> "${MAKEFILE}"
echo " make -C \"${BUILD_DIR}\" \$@" >> "${MAKEFILE}"
echo "" >> "${MAKEFILE}"
echo ".PHONY: all" >> "${MAKEFILE}"
echo "" >> "${MAKEFILE}"

View file

@ -0,0 +1,104 @@
#!/bin/bash
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
NVIDIA_VISIBLE_DEVICES="${NVIDIA_VISIBLE_DEVICES:-0}"
DOCKER_FILE="$(realpath Dockerfile.export_weights)"
IMAGE_NAME="trt-tacotron2-waveglow.weight_export"
CONTAINER_NAME="trt-tacotron2-waveglow.weight_export.container"
die() {
echo "ERROR: ${@}" 1>&2
exit 1
}
die_and_remove_image() {
#docker rmi "${IMAGE_NAME}"
die "${@}"
}
if [[ "${#}" != 3 ]]; then
echo "Invalid arguments: ${@}"
echo "USAGE:"
echo " ${0} <tacotron2 checkpoint> <waveglow checkpoint> <output directory>"
exit 1
fi
TACOTRON2_PT="${1}"
WAVEGLOW_PT="${2}"
MODEL_DIR="$(realpath ${3})"
TACOTRON2_DIR="$(dirname $(realpath ${TACOTRON2_PT}))"
TACOTRON2_NAME="$(basename ${TACOTRON2_PT})"
WAVEGLOW_DIR="$(dirname $(realpath ${WAVEGLOW_PT}))"
WAVEGLOW_NAME="$(basename ${WAVEGLOW_PT})"
DLE_DIR="../"
# remove docker container if it exists
docker rm "${CONTAINER_NAME}" &> /dev/null
pushd "${DLE_DIR}"
docker build . -f "${DOCKER_FILE}" -t "${IMAGE_NAME}" || die "Failed to build container"
# export taoctron2
nvidia-docker run \
--rm \
-e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
--name "${CONTAINER_NAME}" \
-v "${TACOTRON2_DIR}:/checkpoints" \
-v "${MODEL_DIR}:/models" \
"${IMAGE_NAME}" "./scripts/tacotron2_to_json.py \"/checkpoints/${TACOTRON2_NAME}\" /models/tacotron2.json" || \
die_and_remove_image "Failed to export tacotron2."
# export waveglow
nvidia-docker run \
--rm \
-e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
--name "${CONTAINER_NAME}" \
-v "${WAVEGLOW_DIR}:/checkpoints" \
-v "${MODEL_DIR}:/models" \
"${IMAGE_NAME}" \
"./scripts/waveglow_to_onnx.py -W \"${DLE_DIR}\" -w \"/checkpoints/${WAVEGLOW_NAME}\" -o /models/waveglow.onnx" || \
die_and_remove_image "Failed to export waveglow."
# export denoiser
nvidia-docker run \
--rm \
-e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
--name "${CONTAINER_NAME}" \
-v "${WAVEGLOW_DIR}:/checkpoints" \
-v "${MODEL_DIR}:/models" \
"${IMAGE_NAME}" \
"./scripts/denoiser_to_json.py \"${DLE_DIR}\" \"/checkpoints/${WAVEGLOW_NAME}\" /models/denoiser.json" || \
die_and_remove_image "Failed to export the denoiser."
docker rmi "${IMAGE_NAME}"

View file

@ -0,0 +1,37 @@
name: "tacotron2waveglow"
platform: "custom"
default_model_filename: "libtt2i_trtis.so"
max_batch_size: 32
input [
{
name: "INPUT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "OUTPUT"
data_type: TYPE_FP32
dims: [ -1 ]
},
{
name: "OUTPUT_LENGTH"
data_type: TYPE_INT32
dims: [ -1 ]
}
]
parameters [
{
key: "engine_path"
value: { string_value: "/engines" }
},
{
key: "mapping_path"
value: { string_value: "/models/tacotron2waveglow/mapping.txt" }
},
{
key: "use_denoiser"
value: { string_value: "yes" }
}
]

View file

@ -0,0 +1,149 @@
# sequence-number symbol
0 _
1 -
2 !
3 '
4 (
5 )
6 ,
7 .
8 :
9 ;
10 ?
11
38 A
39 B
40 C
41 D
42 E
43 F
44 G
45 H
46 I
47 J
48 K
49 L
50 M
51 N
52 O
53 P
54 Q
55 R
56 S
57 T
58 U
59 V
60 W
61 X
62 Y
63 Z
38 a
39 b
40 c
41 d
42 e
43 f
44 g
45 h
46 i
47 j
48 k
49 l
50 m
51 n
52 o
53 p
54 q
55 r
56 s
57 t
58 u
59 v
60 w
61 x
62 y
63 z
64 @AA
65 @AA0
66 @AA1
67 @AA2
68 @AE
69 @AE0
70 @AE1
71 @AE2
72 @AH
73 @AH0
74 @AH1
75 @AH2
76 @AO
77 @AO0
78 @AO1
79 @AO2
80 @AW
81 @AW0
82 @AW1
83 @AW2
84 @AY
85 @AY0
86 @AY1
87 @AY2
88 @B
89 @CH
90 @D
91 @DH
92 @EH
93 @EH0
94 @EH1
95 @EH2
96 @ER
97 @ER0
98 @ER1
99 @ER2
100 @EY
101 @EY0
102 @EY1
103 @EY2
104 @F
105 @G
106 @HH
107 @IH
108 @IH0
109 @IH1
110 @IH2
111 @IY
112 @IY0
113 @IY1
114 @IY2
115 @JH
116 @K
117 @L
118 @M
119 @N
120 @NG
121 @OW
122 @OW0
123 @OW1
124 @OW2
125 @OY
126 @OY0
127 @OY1
128 @OY2
129 @P
130 @R
131 @S
132 @SH
133 @T
134 @TH
135 @UH
136 @UH0
137 @UH1
138 @UH2
139 @UW
140 @UW0
141 @UW1
142 @UW2
143 @V
144 @W
145 @Y
146 @Z
147 @ZH

View file

@ -0,0 +1,45 @@
#!/bin/bash
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
IMAGE_NAME="trt-tacotron2-waveglow.trtis"
NVIDIA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-all}"
nvidia-docker run \
--rm \
--shm-size=1g \
--ulimit memlock=-1 \
--ulimit stack=67108864 \
-p8000:8000 \
-p8001:8001 \
-p8002:8002 \
-e "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \
-e "LD_LIBRARY_PATH=/opt/tensorrtserver/lib" \
"${IMAGE_NAME}" trtserver --model-store=/models

View file

@ -0,0 +1,123 @@
#!/bin/bash
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
MODEL_DIR="/models/"
ENGINE_DIR="/engines/"
TACOTRON2_ID="1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA"
WAVEGLOW_ID="1WsibBTsuRg_SF2Z6L6NFRTT-NjEy1oTx"
TACOTRON2_PT="${MODEL_DIR}/tacotron2.pt"
WAVEGLOW_PT="${MODEL_DIR}/waveglow.pt"
TACOTRON2_JSON="${MODEL_DIR}/tacotron2.json"
WAVEGLOW_ONNX="${MODEL_DIR}/waveglow.onnx"
DENOISER_JSON="${MODEL_DIR}/denoiser.json"
HELPER_DIR="src/trt/helpers"
BIN_DIR="./build/bin"
BENCHMARK_BIN="${BIN_DIR}/benchmark"
MAX_BATCH_SIZE=32
SCRIPT_DIR="$(dirname "${0}")"
ENGINE_BUILD_SCRIPT="${SCRIPT_DIR}/build_engines.sh"
die() {
echo "ERROR: ${@}" 1>&2
exit 1
}
download_gfile() {
which curl &> /dev/null || die "Failed to find 'curl'."
# download file from google drive
local GOID="${1}"
local filename="${2}"
local GURL='https://drive.google.com/uc?export=download'
local cookie="$(mktemp)"
curl -sc "${cookie}" "${GURL}&id=${GOID}"
local getcode="$(awk '/_warning_/ {print $NF}' "${cookie}")"
curl -Lb "${cookie}" "${GURL}&confirm=${getcode}&id=${GOID}" -o "${filename}"
rm "${cookie}"
}
mkdir -p "${ENGINE_DIR}" "${MODEL_DIR}"
apt-get update -qy
apt-get install -y libsndfile1 || die "Failed to install libsndfile"
apt-get clean
git clone --depth=1 https://github.com/NVIDIA/DeepLearningExamples
TACO2_DIR="./DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2/"
# install required packages
pip3 install "torch==1.3" onnx scipy librosa || die "Failed while installing python packages."
# test packages
python3 -c "import torch; import onnx; import scipy; import numpy; import librosa" || die "Python packages fail to import"
## build tacotron2 engine
# download model
download_gfile "${TACOTRON2_ID}" "${TACOTRON2_PT}" || die "Failed to get tacotron2.pt"
# convert model to importable format
${HELPER_DIR}/tacotron2_to_json.py "${TACOTRON2_PT}" "${TACOTRON2_JSON}" || die "Failed to export tacotron2 to json."
rm -v "${TACOTRON2_PT}"
## build wave glow engine
# download model
download_gfile "${WAVEGLOW_ID}" "${WAVEGLOW_PT}" || die "Failed to get waveglow.pt"
# convert model to importable format
${HELPER_DIR}/waveglow_to_onnx.py \
-w "${WAVEGLOW_PT}" \
-W "${TACO2_DIR}" \
-o "${WAVEGLOW_ONNX}" \
--length_mels=160 || die "Failed to export waveglow to onnx."
## build denoiser engine
${HELPER_DIR}/denoiser_to_json.py "${TACO2_DIR}" "${WAVEGLOW_PT}" "${DENOISER_JSON}" || die "Failed to export denoiser to json."
# wait to remove wave glow until after denoiser is finished
rm -v "${WAVEGLOW_PT}"
rm -rvf "./DeepLearningExamples"
pip3 uninstall -qy torch onnx scipy
apt-get purge -y libsndfile1
"${ENGINE_BUILD_SCRIPT}" || die "Failed to build engines"

View file

@ -0,0 +1,91 @@
#!/bin/bash
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
MODEL_DIR="/models/"
ENGINE_DIR="/engines/"
TACOTRON2_JSON="${MODEL_DIR}/tacotron2.json"
WAVEGLOW_ONNX="${MODEL_DIR}/waveglow.onnx"
DENOISER_JSON="${MODEL_DIR}/denoiser.json"
TACOTRON2_ENG="${ENGINE_DIR}/tacotron2.eng"
WAVEGLOW_ENG="${ENGINE_DIR}/waveglow_chunk160_fp16.eng"
DENOISER_ENG="${ENGINE_DIR}/denoiser.eng"
BIN_DIR="./build/bin"
BENCHMARK_BIN="${BIN_DIR}/benchmark"
BUILD_TACOTRON2_BIN="${BIN_DIR}/build_tacotron2"
BUILD_WAVEGLOW_BIN="${BIN_DIR}/build_waveglow"
MAX_BATCH_SIZE=32
die() {
echo "ERROR: ${@}" 1>&2
exit 1
}
AMP="amp"
if [[ "${#}" == "1" ]]; then
if [[ "${1}" == "0" || "${1}" == "no" ]]; then
AMP="fp32"
elif [[ "${1}" == "1" || "${1}" == "yes" ]]; then
AMP="amp"
else
echo "Invalid arguments."
exit 1
fi
fi
echo
echo "Building with -F${AMP}"
echo
## build tacotron2 engine
./build/bin/build_tacotron2 "${TACOTRON2_JSON}" "${TACOTRON2_ENG}" -B ${MAX_BATCH_SIZE} -I 400 -F${AMP} || die "Failed to build tacotron2 engine."
rm -v "${TACOTRON2_JSON}"
## build wave glow engine
./build/bin/build_waveglow "${WAVEGLOW_ONNX}" "${WAVEGLOW_ENG}" -B ${MAX_BATCH_SIZE} -F${AMP} || die "Failed to build waveglow engine."
rm -v "${WAVEGLOW_ONNX}"
## build denoiser engine
./build/bin/build_denoiser "${DENOISER_JSON}" "${DENOISER_ENG}" -B ${MAX_BATCH_SIZE} -F${AMP} || die "Failed to build waveglow engine."
rm -v "${DENOISER_JSON}"
ls "${TACOTRON2_ENG}" "${WAVEGLOW_ENG}" "${DENOISER_ENG}" || die "Unable to access built engines."
echo "Successfully built '${TACOTRON2_ENG}', '${WAVEGLOW_ENG}', and '${DENOISER_ENG}'"

View file

@ -0,0 +1,104 @@
#!/usr/bin/env python3
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import json
import torch
import sys
import os
from scipy.signal import get_window
import librosa.util as librosa_util
WAVEGLOW_CONFIG = {
"n_mel_channels": 80,
"n_flows": 12,
"n_group": 8,
"n_early_every": 4,
"n_early_size": 2,
"WN_config": {
"n_layers": 8,
"kernel_size": 3,
"n_channels": 256
}
}
def gen_win_sq(
denoiser):
window = denoiser.stft.window
win_length = denoiser.stft.win_length
n_fft = denoiser.stft.filter_length
# Compute the squared window at the desired length
win_sq = get_window(window, win_length, fftbins=True)
win_sq = librosa_util.normalize(win_sq, norm=None)**2
win_sq = librosa_util.pad_center(win_sq, n_fft)
return win_sq
if len(sys.argv) < 4 or len(sys.argv) > 5:
print("USAGE:")
print(
"\t%s <tacotron2 directory> <waveglow checkpoint> <json output> [strength, default=0.1]" % sys.argv[0])
sys.exit(1)
json_path = sys.argv[3]
sys.path.append(sys.argv[1])
# must be imported after path is modified
from import_utils import load_waveglow
from waveglow.denoiser import Denoiser
strength = 0.1
if len(sys.argv) == 5:
strength = float(sys.argv[4])
print("Building denoiser")
waveglow = load_waveglow(sys.argv[2], WAVEGLOW_CONFIG)
denoiser = Denoiser(waveglow).cuda()
statedict = {}
statedict["denoiser.stft.forward_basis"] = denoiser.stft.forward_basis.cpu(
).numpy().tolist()
statedict["denoiser.stft.inverse_basis"] = denoiser.stft.inverse_basis.cpu(
).numpy().tolist()
statedict["denoiser.stft.win_sq"] = gen_win_sq(denoiser).tolist()
statedict["denoiser.bias_spec"] = (
denoiser.bias_spec*strength).cpu().numpy().tolist()
with open(json_path, "w") as fout:
json.dump(statedict, fout, indent=2)
print("Wrote to '%s'" % json_path)

View file

@ -0,0 +1,51 @@
#!/usr/bin/env python3
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import sys
if len(sys.argv) != 3:
print("Must specify path to PyTorch Tacotron2 containing 'text' module o load and text file to write")
sys.exit(1)
modulePath = sys.argv[1]
outputPath = sys.argv[2]
sys.path.append(modulePath)
from text import symbols
i=0
with open(outputPath, "w") as fout:
print("# sequence-number symbol", file=fout)
for s in symbols:
print("%d %s" % (i, s), file=fout)
i+=1
print("Successfully wrote %d symbols to '%s'." % (i, outputPath))

View file

@ -0,0 +1,30 @@
#!/usr/bin/env python3
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
from .waveglow import load_waveglow

View file

@ -0,0 +1,98 @@
#!/usr/bin/env python3
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import pickle
import torch
from waveglow.model import WaveGlow
def split_cond_layers(model):
for WN in model.WN:
if hasattr(WN, "cond_layer"):
n_layers = len(WN.res_skip_layers)
conv_weights = WN.cond_layer.weight
conv_bias = WN.cond_layer.bias
conv_stride = WN.cond_layer.stride
conv_dilation = WN.cond_layer.dilation
conv_padding = WN.cond_layer.padding
num_in_channels = conv_weights.size(1)
num_out_channels = conv_weights.size(0)//n_layers
kernel_size = conv_weights.size(2)
WN.cond_layers = []
for i in range(n_layers):
layer = torch.nn.Conv1d(
in_channels=num_in_channels,
out_channels=num_out_channels,
kernel_size=kernel_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation)
layer.weight.data[:, :, :] = conv_weights.data[
i*num_out_channels:(i+1)*num_out_channels, :, :]
layer.bias.data[:] = conv_bias.data[
i*num_out_channels:(i+1)*num_out_channels]
layer = torch.nn.utils.weight_norm(layer, name='weight')
WN.cond_layers.append(layer)
return model
def load_waveglow(filename, waveglow_config):
class RenamingUnpickler(pickle.Unpickler):
def find_class(self, module, name):
if module == 'glow':
module = 'waveglow.model'
return super().find_class(module, name)
class RenamingPickleModule:
def load(self, f, *args, **kw_args):
return self.Unpickler(f, *args, **kw_args).load()
def Unpickler(self, f, **pickle_load_args):
return RenamingUnpickler(f, **pickle_load_args)
pickle_module = RenamingPickleModule()
blob = torch.load(filename, pickle_module=pickle_module)
if 'state_dict' in blob:
waveglow = WaveGlow(**waveglow_config).cuda()
state_dict = {}
for key, value in blob["state_dict"].items():
newKey = key
if key.startswith("module."):
newKey = key[len("module."):]
state_dict[newKey] = value
waveglow.load_state_dict(state_dict)
else:
waveglow = blob['model']
waveglow = split_cond_layers(waveglow)
waveglow = waveglow.remove_weightnorm(waveglow)
waveglow.cuda().eval()
return waveglow

View file

@ -0,0 +1,56 @@
#!/usr/bin/env python3
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import json
import torch
import sys
if len(sys.argv) != 3:
print("Must specify statedict to load and json to write")
sys.exit(1)
statedict_path = sys.argv[1]
json_path = sys.argv[2]
print("Reading from '%s' and writing to '%s'." % (statedict_path, json_path))
statedict = dict(torch.load(statedict_path)["state_dict"])
outdict = {}
for k, v in dict(statedict).items():
if k.startswith("module."):
k = k[len("module."):]
print(k)
outdict[k] = v.cpu().numpy().tolist()
with open(json_path, "w") as fout:
json.dump(outdict, fout)
print("Wrote to '%s'" % json_path)

View file

@ -0,0 +1,329 @@
#!/usr/bin/env python3
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# # Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# # Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# # Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
import json
import sys
import onnx
import numpy as np
from scipy.io.wavfile import write
import argparse
import torch
args = None
def convert_conv_1d_to_2d(conv1d):
conv2d = torch.nn.Conv2d(conv1d.weight.size(1),
conv1d.weight.size(0),
(conv1d.weight.size(2), 1),
stride=(conv1d.stride[0], 1),
dilation=(conv1d.dilation[0], 1),
padding=(conv1d.padding[0], 0))
conv2d.weight.data[:, :, :, 0] = conv1d.weight.data
conv2d.bias.data = conv1d.bias.data
return conv2d
def convert_WN_1d_to_2d_(WN):
"""
Modifies the WaveNet like affine coupling layer in-place to use 2-d convolutions
"""
WN.start = convert_conv_1d_to_2d(WN.start)
WN.end = convert_conv_1d_to_2d(WN.end)
for i in range(len(WN.in_layers)):
WN.in_layers[i] = convert_conv_1d_to_2d(WN.in_layers[i])
for i in range(len(WN.res_skip_layers)):
WN.res_skip_layers[i] = convert_conv_1d_to_2d(WN.res_skip_layers[i])
for i in range(len(WN.res_skip_layers)):
WN.cond_layers[i] = convert_conv_1d_to_2d(WN.cond_layers[i])
def convert_convinv_1d_to_2d(convinv):
"""
Takes an invertible 1x1 1-d convolution and returns a 2-d convolution that does
the inverse
"""
conv2d = torch.nn.Conv2d(convinv.W_inverse.size(1),
convinv.W_inverse.size(0),
1, bias=False)
conv2d.weight.data[:, :, :, 0] = convinv.W_inverse.data
return conv2d
def convert_1d_to_2d_(glow):
"""
Caffe2 and TensorRT don't seem to support 1-d convolutions or properly
convert ONNX exports with 1d convolutions to 2d convolutions yet, so we
do the conversion to 2-d convolutions before ONNX export
"""
# Convert upsample to 2d
upsample = torch.nn.ConvTranspose2d(glow.upsample.weight.size(0),
glow.upsample.weight.size(1),
(glow.upsample.weight.size(2), 1),
stride=(glow.upsample.stride[0], 1))
upsample.weight.data[:, :, :, 0] = glow.upsample.weight.data
upsample.bias.data = glow.upsample.bias.data
glow.upsample = upsample
# Convert WN to 2d
for WN in glow.WN:
convert_WN_1d_to_2d_(WN)
# Convert invertible conv to 2d
for i in range(len(glow.convinv)):
glow.convinv[i] = convert_convinv_1d_to_2d(glow.convinv[i])
def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels):
in_act = input_a+input_b
in_left = in_act[:, 0:n_channels, :, :]
in_right = in_act[:, n_channels:2*n_channels, :, :]
t_act = torch.tanh(in_left)
s_act = torch.sigmoid(in_right)
acts = t_act * s_act
return acts
def WN_forward(self, forward_input):
"""
This is a forward replacement for the WN forward. This is required because
the code was written for 1d convs which isn't yet supported from ONNX
exports.
"""
audio, spect = forward_input
audio = self.start(audio)
for i in range(self.n_layers):
acts = fused_add_tanh_sigmoid_multiply(
self.in_layers[i](audio),
self.cond_layers[i](spect),
self.n_channels)
res_skip_acts = self.res_skip_layers[i](acts)
if i < self.n_layers - 1:
audio = res_skip_acts[:, 0:self.n_channels, :, :] + audio
skip_acts = res_skip_acts[:,
self.n_channels:2*self.n_channels, :, :]
else:
skip_acts = res_skip_acts
if i == 0:
output = skip_acts
else:
output = skip_acts + output
return self.end(output)
def infer_o(self, spect, z):
"""
In order to for the trace to work running through ONNX with 2d convolutions
we need to overwrite the forward method. All shape information is
pre-calculated so ONNX doesn't export "Dynamic" outputs which are not yet
suported by TensorRT
"""
spect = self.upsample(spect)
spect = torch.squeeze(spect, 3)
spect = spect.view(self.view_size_1)
spect = spect.permute(0, 2, 1, 3)
spect = spect.contiguous()
spect = spect.view(self.view_size_2)
spect = spect.permute(0, 2, 1)
spect = spect.reshape([
self.batch_size,
self.upsample_weight_size*self.n_group,
self.length_spect_group,
1])
audio = z[:, 0:self.n_remaining_channels, :, :]
z = z[:, self.n_remaining_channels:self.n_group, :, :]
for k in reversed(range(self.n_flows)):
n_half = self.n_halves[k]
audio_0 = audio[:, 0:n_half, :, :]
audio_1 = audio[:, n_half:2*n_half, :, :]
output = self.WN[k]((audio_0, spect))
s = output[:, n_half:2*n_half, :, :]
b = output[:, 0:n_half, :, :]
audio_1 = (audio_1 - b)/torch.exp(s)
audio = torch.cat([audio_0, audio_1], 1)
audio = self.convinv[k](audio)
if k % self.n_early_every == 0 and k > 0:
audio = torch.cat((z[:, 0:self.n_early_size, :, :], audio), 1)
z = z[:, self.n_early_size:self.n_group -
self.n_remaining_channels, :, :]
audio = torch.squeeze(audio, 3)
audio = audio.permute(0, 2, 1).contiguous().view(
1, (self.length_spect_group * self.n_group))
return audio
def main(waveglow_path, output_path, batch_size, length_mels):
"""
Takes a waveglow model, a batch size, and a length in mels about outputs a static
ONNX representation using 2D convoultions
"""
torch.manual_seed(0)
model = load_waveglow(waveglow_path, waveglow_config)
model.batch_size = batch_size
length_spect = length_mels
length_samples = 768 + 256*length_spect
model.upsample_weight_size = model.upsample.weight.size(0)
spect = torch.cuda.FloatTensor(
batch_size, model.upsample_weight_size, length_spect).normal_()
spect = torch.autograd.Variable(spect.cuda(), requires_grad=False)
# Run inference because it forces inverses to be calculated
with torch.no_grad():
test_out1 = model.infer(spect)
assert(length_samples % model.n_group == 0)
model.length_spect_group = int(length_samples / model.n_group)
# Pre-calculating the sizes of noise to use so it's not dynamic
n_halves = []
n_half = int(model.n_remaining_channels/2)
for k in reversed(range(model.n_flows)):
n_halves.append(n_half)
if k % model.n_early_every == 0 and k > 0:
n_half = n_half + int(model.n_early_size/2)
n_halves.reverse()
model.n_halves = n_halves
model.view_size_1 = torch.Size(
[model.batch_size, model.upsample_weight_size, model.length_spect_group, model.n_group])
model.view_size_2 = torch.Size(
[model.batch_size, model.length_spect_group, model.upsample_weight_size*model.n_group])
# Replace old forward with inference
glow.WaveGlow.forward = infer_o
glow.WN.forward = WN_forward
# Convert whole model to 2d convolutions
convert_1d_to_2d_(model)
model.cuda()
spect = torch.cuda.FloatTensor(
batch_size, model.upsample.weight.size(0), length_spect, 1).normal_()
z = torch.cuda.FloatTensor(
1, model.n_group, model.length_spect_group, 1).normal_()
spect = torch.autograd.Variable(spect.cuda(), requires_grad=False)
z = torch.autograd.Variable(z, requires_grad=False)
# Get output for comparison with Caffe2
with torch.no_grad():
test_out2 = model(spect, z)
# Export model
torch.onnx.export(model, (spect, z),
output_path,
dynamic_axes={'spect': [0], 'z': [0]},
input_names=['spect', 'z'],
output_names=['audio'],
opset_version=10,
verbose=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-w', '--waveglow_path',
help='Path to waveglow decoder checkpoint with model',
required=True)
parser.add_argument('-W', '--tacotron2_home', help='Path to DeepLearningExamples/PyTorch/SpeechSynthesis/Tacotron2 directory.',
required=True)
parser.add_argument('-o', "--onnx_path",
help="Path to output ONNX file", required=True)
parser.add_argument("--batch_size", default=1, type=int)
parser.add_argument("--length_mels", default=160, type=int)
# add wave glow arguments
waveglow = parser.add_argument_group("WaveGlow parameters")
waveglow.add_argument('--n-mel-channels', default=80, type=int,
help='Number of bins in mel-spectrograms')
# glow parameters
waveglow.add_argument('--flows', default=12, type=int,
help='Number of steps of flow')
waveglow.add_argument('--groups', default=8, type=int,
help='Number of samples in a group processed by the steps of flow')
waveglow.add_argument('--early-every', default=4, type=int,
help='Determines how often (i.e., after how many coupling layers) \
a number of channels (defined by --early-size parameter) are output\
to the loss function')
waveglow.add_argument('--early-size', default=2, type=int,
help='Number of channels output to the loss function')
waveglow.add_argument('--sigma', default=1.0, type=float,
help='Standard deviation used for sampling from Gaussian')
waveglow.add_argument('--segment-length', default=4000, type=int,
help='Segment length (audio samples) processed per iteration')
# wavenet parameters
wavenet = waveglow.add_argument_group('WaveNet parameters')
wavenet.add_argument('--wn-kernel-size', default=3, type=int,
help='Kernel size for dialted convolution in the affine coupling layer (WN)')
wavenet.add_argument('--wn-channels', default=256, type=int,
help='Number of channels in WN')
wavenet.add_argument('--wn-layers', default=8, type=int,
help='Number of layers in WN')
args = parser.parse_args()
# do imports as needed
sys.path.append(args.tacotron2_home)
import waveglow.model as glow
from import_utils import load_waveglow
global waveglow_config
waveglow_config = {
"n_mel_channels": args.n_mel_channels,
"n_flows": args.flows,
"n_group": args.groups,
"n_early_every": args.early_every,
"n_early_size": args.early_size,
"WN_config": {
"n_layers": args.wn_layers,
"kernel_size": args.wn_kernel_size,
"n_channels": args.wn_channels
}
}
main(args.waveglow_path, args.onnx_path, args.batch_size, args.length_mels)

View file

@ -0,0 +1,43 @@
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# include headers in current directory
include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}")
# sub-pieces
add_subdirectory("trt")
add_subdirectory("bin")
# build trtis
if (DEFINED BUILD_TRTIS AND NOT BUILD_TRTIS EQUAL 0)
message("Building TRTIS backend")
add_subdirectory("trtis")
endif()
# build tests
add_subdirectory("test")

View file

@ -0,0 +1,48 @@
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
function(add_binary bin_file)
get_filename_component(bin_name "${bin_file}" NAME_WE)
add_executable(${bin_name} ${bin_file})
target_link_libraries(${bin_name} tt2i)
target_include_directories(${bin_name} PRIVATE
../trt/
../trt/util
../trt/tacotron2
../trt/waveglow
../trt/denoiser
../trt/common
)
set_property(TARGET ${bin_name} PROPERTY RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
endfunction()
# build benchmark executable
file(GLOB binaries *.cpp)
foreach (file ${binaries})
add_binary(${file})
endforeach()

View file

@ -0,0 +1,189 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cudaUtils.h"
#include "denoiserBuilder.h"
#include "engineCache.h"
#include "jsonModelImporter.h"
#include "logging.h"
#include "NvInfer.h"
#include <iostream>
#include <memory>
using namespace nvinfer1;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
bool matches(const std::string& arg, const std::string& flag)
{
return arg.length() >= flag.length() && arg.substr(0, flag.length()) == flag;
}
int parseNumFlag(
const int argc, const char** argv, const std::string& flag, int* i)
{
int value;
const std::string arg(argv[*i]);
if (arg.length() > flag.length()) {
value = std::stol(arg.substr(flag.length()));
} else if (*i + 1 < argc) {
++(*i);
value = std::stol(argv[*i]);
} else {
throw std::runtime_error("Missing argument for '" + flag + "'.");
}
return value;
}
int parseAmpFlag(
const int argc, const char** argv, const std::string& flag, int* i)
{
std::string str;
const std::string arg(argv[*i]);
if (arg.length() > flag.length()) {
str = arg.substr(flag.length());
} else if (*i + 1 < argc) {
++(*i);
str = argv[*i];
} else {
throw std::runtime_error("Missing argument for '" + flag + "'.");
}
int value;
if (str == "fp32") {
value = 0;
} else if (str == "amp") {
value = 1;
} else {
throw std::runtime_error(
"Invalid argument for precision (amp|fp32): " + str);
}
return value;
}
void usage(const std::string& binName)
{
std::cerr << "usage: " << std::endl;
std::cerr << " " << binName << " <model file> <engine file> [options]\n";
std::cerr << "options:" << std::endl;
std::cerr << " -B<batch size>" << std::endl;
std::cerr << " -F<precision (fp32|amp)>" << std::endl;
std::cerr << " -h" << std::endl;
}
void parseArgs(
const int argc,
const char** const argv,
std::string* model,
std::string* enginePath,
int* batchSize,
int* useAMP)
{
bool modelSet = false;
bool enginePathSet = false;
for (int i = 1; i < argc; ++i) {
const std::string arg(argv[i]);
if (matches(arg, "-B")) {
*batchSize = parseNumFlag(argc, argv, "-B", &i);
} else if (matches(arg, "-F")) {
*useAMP = parseAmpFlag(argc, argv, "-F", &i);
} else if (matches(arg, "-h")) {
usage(argv[0]);
exit(0);
} else {
if (!modelSet) {
*model = arg;
modelSet = true;
} else if (!enginePathSet) {
*enginePath = arg;
enginePathSet = true;
} else {
throw std::runtime_error("Unknown extra argument '" + arg + "'.");
}
}
}
}
/******************************************************************************
* MAIN ***********************************************************************
*****************************************************************************/
int main(int argc, const char* argv[])
{
std::string denoiserModelPath;
std::string enginePath;
int batchSize = 1;
int useFP16 = true;
parseArgs(argc, argv, &denoiserModelPath, &enginePath, &batchSize, &useFP16);
if (denoiserModelPath.empty() || enginePath.empty()) {
usage(argv[0]);
return 1;
}
CudaUtils::printDeviceInformation();
try {
std::shared_ptr<Logger> logger(new Logger(ILogger::Severity::kERROR));
TRTPtr<IBuilder> builder(createInferBuilder(*logger));
builder->setMaxBatchSize(batchSize);
TRTPtr<IBuilderConfig> config(builder->createBuilderConfig());
config->setMaxWorkspaceSize(1ULL << 30);
uint32_t flags = 0;
if (useFP16) {
flags |= (1U << static_cast<int>(BuilderFlag::kFP16));
}
config->setFlags(flags);
EngineCache cache(logger);
JSONModelImporter importer(denoiserModelPath);
const int denoiserWindowSize = 2 << 13;
DenoiserBuilder denoiserBuilder(denoiserWindowSize);
const TRTPtr<ICudaEngine> engine
= denoiserBuilder.build(importer, *builder, batchSize, useFP16);
cache.save(*engine, enginePath);
} catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
return 1;
}
return 0;
}

View file

@ -0,0 +1,183 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cudaUtils.h"
#include "engineCache.h"
#include "logging.h"
#include "tacotron2Builder.h"
#include "NvInfer.h"
#include <iostream>
#include <memory>
#include <vector>
using namespace nvinfer1;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
bool matches(const std::string& arg, const std::string& flag)
{
return arg.length() >= flag.length() && arg.substr(0, flag.length()) == flag;
}
int parseNumFlag(
const int argc, const char** argv, const std::string& flag, int* i)
{
int value;
const std::string arg(argv[*i]);
if (arg.length() > flag.length()) {
value = std::stol(arg.substr(flag.length()));
} else if (*i + 1 < argc) {
++(*i);
value = std::stol(argv[*i]);
} else {
throw std::runtime_error("Missing argument for '" + flag + "'.");
}
return value;
}
int parseAmpFlag(
const int argc, const char** argv, const std::string& flag, int* i)
{
std::string str;
const std::string arg(argv[*i]);
if (arg.length() > flag.length()) {
str = arg.substr(flag.length());
} else if (*i + 1 < argc) {
++(*i);
str = argv[*i];
} else {
throw std::runtime_error("Missing argument for '" + flag + "'.");
}
int value;
if (str == "fp32") {
value = 0;
} else if (str == "amp") {
value = 1;
} else {
throw std::runtime_error(
"Invalid argument for precision (amp|fp32): " + str);
}
return value;
}
void usage(const std::string& binName)
{
std::cerr << "usage: " << std::endl;
std::cerr << " " << binName << " <model file> <engine file> [options]\n";
std::cerr << "options:" << std::endl;
std::cerr << " -I<max input length>" << std::endl;
std::cerr << " -B<batch size>" << std::endl;
std::cerr << " -F<precision (fp32|amp)>" << std::endl;
std::cerr << " -h" << std::endl;
}
void parseArgs(
const int argc,
const char** const argv,
std::string* model,
std::string* enginePath,
int* batchSize,
int* inputLength,
int* useAMP)
{
bool modelSet = false;
bool enginePathSet = false;
for (int i = 1; i < argc; ++i) {
const std::string arg(argv[i]);
if (matches(arg, "-I")) {
*inputLength = parseNumFlag(argc, argv, "-I", &i);
} else if (matches(arg, "-B")) {
*batchSize = parseNumFlag(argc, argv, "-B", &i);
} else if (matches(arg, "-F")) {
*useAMP = parseAmpFlag(argc, argv, "-F", &i);
} else if (matches(arg, "-h")) {
usage(argv[0]);
exit(0);
} else {
if (!modelSet) {
*model = arg;
modelSet = true;
} else if (!enginePathSet) {
*enginePath = arg;
enginePathSet = true;
} else {
throw std::runtime_error("Unknown extra argument '" + arg + "'.");
}
}
}
}
/******************************************************************************
* MAIN ***********************************************************************
*****************************************************************************/
int main(int argc, const char* argv[])
{
std::string tacotron2ModelPath;
std::string enginePath;
int batchSize = 1;
int inputLength = 400;
int useFP16 = true;
parseArgs(
argc,
argv,
&tacotron2ModelPath,
&enginePath,
&batchSize,
&inputLength,
&useFP16);
CudaUtils::printDeviceInformation();
try {
std::shared_ptr<Logger> logger(new Logger(ILogger::Severity::kERROR));
TRTPtr<IBuilder> builder(createInferBuilder(*logger));
EngineCache cache(logger);
Tacotron2Builder tacotron2Builder(tacotron2ModelPath);
const std::vector<TRTPtr<ICudaEngine>> engines
= tacotron2Builder.build(inputLength, *builder, batchSize, useFP16);
cache.save(engines, enginePath);
} catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
return 1;
}
return 0;
}

View file

@ -0,0 +1,174 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cudaUtils.h"
#include "engineCache.h"
#include "logging.h"
#include "waveGlowBuilder.h"
#include "NvInfer.h"
#include <iostream>
#include <memory>
using namespace nvinfer1;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
bool matches(const std::string& arg, const std::string& flag)
{
return arg.length() >= flag.length() && arg.substr(0, flag.length()) == flag;
}
int parseNumFlag(
const int argc, const char** argv, const std::string& flag, int* i)
{
int value;
const std::string arg(argv[*i]);
if (arg.length() > flag.length()) {
value = std::stol(arg.substr(flag.length()));
} else if (*i + 1 < argc) {
++(*i);
value = std::stol(argv[*i]);
} else {
throw std::runtime_error("Missing argument for '" + flag + "'.");
}
return value;
}
int parseAmpFlag(
const int argc, const char** argv, const std::string& flag, int* i)
{
std::string str;
const std::string arg(argv[*i]);
if (arg.length() > flag.length()) {
str = arg.substr(flag.length());
} else if (*i + 1 < argc) {
++(*i);
str = argv[*i];
} else {
throw std::runtime_error("Missing argument for '" + flag + "'.");
}
int value;
if (str == "fp32") {
value = 0;
} else if (str == "amp") {
value = 1;
} else {
throw std::runtime_error(
"Invalid argument for precision (amp|fp32): " + str);
}
return value;
}
void usage(const std::string& binName)
{
std::cerr << "usage: " << std::endl;
std::cerr << " " << binName << " <model file> <engine file> [options]\n";
std::cerr << "options:" << std::endl;
std::cerr << " -B<batch size>" << std::endl;
std::cerr << " -F<precision (fp32|amp)>" << std::endl;
std::cerr << " -h" << std::endl;
}
void parseArgs(
const int argc,
const char** const argv,
std::string* model,
std::string* enginePath,
int* batchSize,
int* useAMP)
{
bool modelSet = false;
bool enginePathSet = false;
for (int i = 1; i < argc; ++i) {
const std::string arg(argv[i]);
if (matches(arg, "-B")) {
*batchSize = parseNumFlag(argc, argv, "-B", &i);
} else if (matches(arg, "-F")) {
*useAMP = parseAmpFlag(argc, argv, "-F", &i);
} else if (matches(arg, "-h")) {
usage(argv[0]);
exit(0);
} else {
if (!modelSet) {
*model = arg;
modelSet = true;
} else if (!enginePathSet) {
*enginePath = arg;
enginePathSet = true;
} else {
throw std::runtime_error("Unknown extra argument '" + arg + "'.");
}
}
}
}
/******************************************************************************
* MAIN ***********************************************************************
*****************************************************************************/
int main(int argc, const char* argv[])
{
std::string waveglowModelPath;
std::string enginePath;
int batchSize = 1;
int useFP16 = true;
parseArgs(argc, argv, &waveglowModelPath, &enginePath, &batchSize, &useFP16);
if (waveglowModelPath.empty() || enginePath.empty()) {
usage(argv[0]);
return 1;
}
CudaUtils::printDeviceInformation();
try {
std::shared_ptr<Logger> logger(new Logger(ILogger::Severity::kERROR));
TRTPtr<IBuilder> builder(createInferBuilder(*logger));
EngineCache cache(logger);
WaveGlowBuilder waveglowBuilder(waveglowModelPath, logger);
const TRTPtr<ICudaEngine> wgEng
= waveglowBuilder.build(*builder, batchSize, useFP16);
cache.save(*wgEng, enginePath);
} catch (const std::exception& e) {
std::cerr << "Exception: " << e.what() << std::endl;
return 1;
}
return 0;
}

View file

@ -0,0 +1,130 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "blending.h"
#include "cudaMemory.h"
#include <vector>
using namespace tts;
/******************************************************************************
* UNIT TEST ******************************************************************
*****************************************************************************/
TEST(noOverlapNoOffsetBatchSize1)
{
const int chunkSize = 4000;
const int batchSize = 1;
std::vector<float> samplesHost(chunkSize * batchSize);
for (size_t i = 0; i < samplesHost.size(); ++i) {
samplesHost[i] = static_cast<float>(i % 1001) / 1000.0f;
}
CudaMemory<float> samplesDevice(samplesHost);
CudaMemory<float> outDevice(samplesHost.size());
Blending::linear(
batchSize,
samplesDevice.data(),
outDevice.data(),
chunkSize,
0,
chunkSize,
0,
0);
const std::vector<float> outHost = outDevice.toHost();
for (size_t i = 0; i < samplesHost.size(); ++i) {
EXPECT_NEAR(samplesHost[i], outHost[i], 1e-6f);
}
}
TEST(noOverlapNoOffsetBatchSize4)
{
const int chunkSize = 4000;
const int batchSize = 4;
std::vector<float> samplesHost(chunkSize * batchSize);
for (size_t i = 0; i < samplesHost.size(); ++i) {
samplesHost[i] = static_cast<float>(i % 1001) / 1000.0f;
}
CudaMemory<float> samplesDevice(samplesHost);
CudaMemory<float> outDevice(samplesHost.size());
Blending::linear(
batchSize,
samplesDevice.data(),
outDevice.data(),
chunkSize,
0,
chunkSize,
0,
0);
const std::vector<float> outHost = outDevice.toHost();
for (size_t i = 0; i < samplesHost.size(); ++i) {
EXPECT_NEAR(samplesHost[i], outHost[i], 1e-6f);
}
}
TEST(noOverlapOneOffsetBatchSize4)
{
const int chunkSize = 4000;
const int batchSize = 4;
std::vector<float> samplesHost(chunkSize * batchSize);
for (size_t i = 0; i < samplesHost.size(); ++i) {
samplesHost[i] = static_cast<float>(i % 1001) / 1000.0f;
}
CudaMemory<float> samplesDevice(samplesHost);
CudaMemory<float> outDevice(samplesHost.size() * 2);
outDevice.zero();
Blending::linear(
batchSize,
samplesDevice.data(),
outDevice.data(),
chunkSize,
0,
2 * chunkSize,
chunkSize,
0);
const std::vector<float> outHost = outDevice.toHost();
for (int b = 0; b < batchSize; ++b) {
for (int i = 0; i < chunkSize; ++i) {
const int j = b * (chunkSize * 2) + i;
EXPECT_EQ(0.0f, outHost[j]) << "i = " << i;
}
for (int i = chunkSize; i < chunkSize * 2; ++i) {
const int j = b * (chunkSize * 2) + i;
const int k = b * chunkSize + (i - chunkSize);
EXPECT_NEAR(samplesHost[k], outHost[j], 1e-6f) << "i = " << i;
}
}
}

View file

@ -0,0 +1,57 @@
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
function(add_unit_test test_file)
get_filename_component(test_name "${test_file}" NAME_WE)
add_executable(${test_name} ${test_file} UnitTest.cpp)
target_link_libraries(${test_name} tt2i)
add_test(NAME ${test_name}
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/${test_name}"
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}")
endfunction()
include_directories(
../extra
../trt/plugins/taco2AttentionPlugin/
../trt/plugins/taco2DenoiseTransformPlugin/
../trt/plugins/taco2LSTMCellPlugin/
../trt/plugins/taco2ModulationRemovalPlugin/
../trt/plugins/taco2PrenetPlugin/
../trt/plugins/taco2ProjectionPlugin/
../trt/plugins/common/
../trt/
../trt/util
../trt/tacotron2
../trt/waveglow
../trt/denoiser
../trt/common
)
file(GLOB tests *_test.cpp)
foreach (file ${tests})
add_unit_test(${file})
endforeach()

View file

@ -0,0 +1,56 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "CharacterMappingReader.hpp"
#include "UnitTest.hpp"
#include "characterMapping.h"
#include <fstream>
using namespace tts;
/******************************************************************************
* UNIT TEST ******************************************************************
*****************************************************************************/
TEST(LoadFromFile)
{
{
std::ofstream fout("test.txt");
fout << "# ignored header" << std::endl;
fout << "0 long" << std::endl;
fout << "1 s" << std::endl;
fout << "2 " << std::endl;
fout << "3 " << std::endl;
}
CharacterMapping mapping = CharacterMappingReader::loadFromFile("test.txt");
EXPECT_EQ(mapping.get("long"), 0);
EXPECT_EQ(mapping.get("s"), 1);
EXPECT_EQ(mapping.get(" "), 2);
EXPECT_EQ(mapping.get(" "), 3);
}

View file

@ -0,0 +1,80 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "characterMapping.h"
using namespace tts;
/******************************************************************************
* UNIT TEST ******************************************************************
*****************************************************************************/
TEST(MapAsciiTest)
{
const std::string text(
"printing, in the only sense with which we are at present concerned, differs "
"from most if not from all the arts and crafts represented in the exhibition in "
"being comparatively modern.");
CharacterMapping cm = CharacterMapping::defaultMapping();
const std::vector<int32_t> sequence = cm.map(text);
const std::vector<int32_t> expSequence{
53, 55, 46, 51, 57, 46, 51, 44, 6 , 11, 46, 51, 11, 57, 45, 42, 11, 52, 51, 49,
62, 11, 56, 42, 51, 56, 42, 11, 60, 46, 57, 45, 11, 60, 45, 46, 40, 45, 11, 60,
42, 11, 38, 55, 42, 11, 38, 57, 11, 53, 55, 42, 56, 42, 51, 57, 11, 40, 52, 51,
40, 42, 55, 51, 42, 41, 6, 11, 41, 46, 43, 43, 42, 55, 56, 11, 43, 55, 52, 50,
11, 50, 52, 56, 57, 11, 46, 43, 11, 51, 52, 57, 11, 43, 55, 52, 50, 11, 38, 49,
49, 11, 57, 45, 42, 11, 38, 55, 57, 56, 11, 38, 51, 41, 11, 40, 55, 38, 43, 57,
56, 11, 55, 42, 53, 55, 42, 56, 42, 51, 57, 42, 41, 11, 46, 51, 11, 57, 45, 42,
11, 42, 61, 45, 46, 39, 46, 57, 46, 52, 51, 11, 46, 51, 11, 39, 42, 46, 51, 44,
11, 40, 52, 50, 53, 38, 55, 38, 57, 46, 59, 42, 49, 62, 11, 50, 52, 41, 42, 55,
51, 7 };
ASSERT_EQ(sequence.size(), expSequence.size());
for (size_t i = 0; i < expSequence.size(); ++i) {
EXPECT_EQ(expSequence[i], sequence[i]);
}
}
TEST(MapArpabetTest)
{
const std::string text("Hello {@AE0}ther {@UW}{@AO}rld.");
CharacterMapping cm = CharacterMapping::defaultMapping();
const std::vector<int32_t> sequence = cm.map(text);
const std::vector<int32_t> expSequence{
45, 42, 49, 49, 52, 11, 69, 57, 45, 42, 55, 11, 139, 76, 55, 49, 41, 7};
ASSERT_EQ(sequence.size(), expSequence.size());
for (size_t i = 0; i < expSequence.size(); ++i) {
EXPECT_EQ(expSequence[i], sequence[i]);
}
}

View file

@ -0,0 +1,92 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "cudaMemory.h"
#include "dataShuffler.h"
#include <vector>
using namespace tts;
/******************************************************************************
* UNIT TEST ******************************************************************
*****************************************************************************/
TEST(parseDecoderOutput)
{
const int chunkSize = 89;
const int batchSize = 3;
const int numChannels = 80;
const int rows = chunkSize;
const int cols = (numChannels + 1) * batchSize;
std::vector<float> mat(rows * cols);
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
mat[i * cols + j] = static_cast<float>(i * cols + j);
if ((j % (numChannels + 1)) == numChannels) {
// gate
mat[i * cols + j] *= -1.0f;
}
}
}
CudaMemory<float> matInDev(mat);
CudaMemory<float> matOutDev(chunkSize * numChannels * batchSize);
CudaMemory<float> gateOutDev(chunkSize * batchSize);
DataShuffler::parseDecoderOutput(
matInDev.data(),
matOutDev.data(),
gateOutDev.data(),
batchSize,
chunkSize,
numChannels,
0);
const std::vector<float> act = matOutDev.toHost();
for (int i = 0; i < numChannels * batchSize; ++i) {
for (int j = 0; j < chunkSize; ++j) {
EXPECT_EQ(
act[i * chunkSize + j],
static_cast<float>(j * cols + (i + (i / numChannels))))
<< "i = " << i << " j = " << j;
}
}
const std::vector<float> actGate = gateOutDev.toHost();
for (int i = 0; i < batchSize; ++i) {
for (int j = 0; j < chunkSize; ++j) {
EXPECT_EQ(
actGate[i * chunkSize + j],
-static_cast<float>(
((i + 1) * numChannels + i)
+ (j * (numChannels + 1) * batchSize)))
<< "i = " << i << " j = " << j;
}
}
}

View file

@ -0,0 +1,90 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "jsonModelImporter.h"
#include <fstream>
using namespace tts;
/******************************************************************************
* UNIT TESTS *****************************************************************
*****************************************************************************/
TEST(ImportArraysTest)
{
std::ofstream fout("test.json");
fout << "{" << std::endl;
fout << "\"test.layer.weight\" :" << std::endl;
fout << "[[[1.0, 3.0, -5.0], [2.0, 1.0, 0.0]]]," << std::endl;
fout << "\"test.layer.bias\" :" << std::endl;
fout << "[[2.0, -3.0, 1.0]]" << std::endl;
fout << "}" << std::endl;
fout.flush();
fout.close();
JSONModelImporter importer("test.json");
const LayerData * data = importer.getWeights({"test", "layer"});
ASSERT_TRUE(data != nullptr);
ASSERT_EQ(data->get("weight").count, 6);
EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[0], 1.0f);
EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[1], 3.0f);
EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[2], -5.0f);
EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[3], 2.0f);
EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[4], 1.0f);
EXPECT_EQ(static_cast<const float*>(data->get("weight").values)[5], 0.0f);
ASSERT_EQ(data->get("bias").count, 3);
EXPECT_EQ(static_cast<const float*>(data->get("bias").values)[0], 2.0f);
EXPECT_EQ(static_cast<const float*>(data->get("bias").values)[1], -3.0f);
EXPECT_EQ(static_cast<const float*>(data->get("bias").values)[2], 1.0f);
}
TEST(ImportScalarTest)
{
std::ofstream fout("test.json");
fout << "{" << std::endl;
fout << "\"test.layer.some_value\" :" << std::endl;
fout << "3" << std::endl;
fout << "}" << std::endl;
fout.flush();
fout.close();
JSONModelImporter importer("test.json");
const LayerData * data = importer.getWeights({"test", "layer"});
ASSERT_TRUE(data != nullptr);
ASSERT_EQ(data->get("some_value").count, 1);
EXPECT_EQ(static_cast<const float*>(data->get("some_value").values)[0], 3.0f);
}

View file

@ -0,0 +1,229 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "cudaMemory.h"
#include "taco2DenoiseTransformLayerPlugin.h"
#include "trtUtils.h"
#include "NvInfer.h"
#include <random>
#include <vector>
using namespace nvinfer1;
using namespace nvinfer1::plugin;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
namespace
{
template <typename RNG>
std::vector<float> genVec(const size_t size, RNG& rng)
{
std::uniform_real_distribution<float> dist(-1.0, 1.0);
std::vector<float> vec(size);
for (size_t i = 0; i < size; ++i) {
vec[i] = dist(rng);
}
return vec;
}
} // namespace
/******************************************************************************
* UNIT TEST ******************************************************************
*****************************************************************************/
TEST(CPUCompareTestBatch1)
{
std::mt19937 rng(0);
const int filterLength = 1026;
const int inputLength = 935;
const int inputWidth = filterLength / 2;
std::vector<float> weightsHost = genVec(inputWidth, rng);
Taco2DenoiseTransformLayerPlugin layer(
TRTUtils::toWeights(weightsHost), filterLength, inputLength);
std::vector<float> inputHost = genVec(filterLength * inputLength, rng);
CudaMemory<float> inputDevice(inputHost);
std::vector<Dims> inputDims{Dims4(1, 1, filterLength, inputLength)};
const std::vector<Dims> outputDims{Dims4(1, 1, filterLength, inputLength)};
const std::vector<DataType> dataTypes{DataType::kFLOAT};
const bool broadcast[] = {false};
layer.configurePlugin(
inputDims.data(),
static_cast<int>(inputDims.size()),
outputDims.data(),
static_cast<int>(outputDims.size()),
dataTypes.data(),
dataTypes.data(),
broadcast,
broadcast,
#if NV_TENSORRT_MAJOR < 6
PluginFormat::kNCHW,
#else
PluginFormat::kLINEAR,
#endif
1);
layer.initialize();
std::vector<const float*> inputs{inputDevice.data()};
CudaMemory<float> outputDevice(filterLength * inputLength);
std::vector<float*> outputs{outputDevice.data()};
layer.enqueue(
1,
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
nullptr,
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> expOutput(filterLength * inputLength);
for (int y = 0; y < inputWidth; ++y) {
for (int x = 0; x < inputLength; ++x) {
const int idx = y * inputLength + x;
const int idx2 = (y + inputWidth) * inputLength + x;
const float real = inputHost[idx];
const float img = inputHost[idx2];
float mag = std::sqrt(real * real + img * img);
const float phase = std::atan2(img, real);
mag = std::max(0.0f, mag - weightsHost[y]);
expOutput[idx] = mag * std::cos(phase);
expOutput[idx2] = mag * std::sin(phase);
}
}
// match outputs
const std::vector<float> actOutput = outputDevice.toHost();
ASSERT_EQ(expOutput.size(), actOutput.size());
for (size_t i = 0; i < expOutput.size(); ++i) {
EXPECT_NEAR(expOutput[i], actOutput[i], 1e-6) << "i = " << i;
}
}
TEST(CPUCompareTestBatch4)
{
std::mt19937 rng(0);
const int filterLength = 1026;
const int inputLength = 935;
const int inputWidth = filterLength / 2;
const int batchSize = 9;
std::vector<float> weightsHost = genVec(inputWidth, rng);
Taco2DenoiseTransformLayerPlugin layer(
TRTUtils::toWeights(weightsHost), filterLength, inputLength);
// duplicate vector
std::vector<float> inputHostBase = genVec(filterLength * inputLength, rng);
std::vector<float> inputHost;
for (int i = 0; i < batchSize; ++i) {
inputHost.insert(
inputHost.end(), inputHostBase.begin(), inputHostBase.end());
}
CudaMemory<float> inputDevice(inputHost);
std::vector<Dims> inputDims{Dims4(1, 1, filterLength, inputLength)};
const std::vector<Dims> outputDims{Dims4(1, 1, filterLength, inputLength)};
const std::vector<DataType> dataTypes{DataType::kFLOAT};
const bool broadcast[] = {false};
layer.configurePlugin(
inputDims.data(),
static_cast<int>(inputDims.size()),
outputDims.data(),
static_cast<int>(outputDims.size()),
dataTypes.data(),
dataTypes.data(),
broadcast,
broadcast,
PluginFormat::kLINEAR,
batchSize);
layer.initialize();
std::vector<const float*> inputs{inputDevice.data()};
CudaMemory<float> outputDevice(filterLength * inputLength * batchSize);
std::vector<float*> outputs{outputDevice.data()};
layer.enqueue(
batchSize,
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
nullptr,
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> expOutput(filterLength * inputLength);
for (int y = 0; y < inputWidth; ++y) {
for (int x = 0; x < inputLength; ++x) {
const int idx = y * inputLength + x;
const int idx2 = (y + inputWidth) * inputLength + x;
const float real = inputHost[idx];
const float img = inputHost[idx2];
float mag = std::sqrt(real * real + img * img);
const float phase = std::atan2(img, real);
mag = std::max(0.0f, mag - weightsHost[y]);
expOutput[idx] = mag * std::cos(phase);
expOutput[idx2] = mag * std::sin(phase);
}
}
// match outputs
const std::vector<float> actOutput = outputDevice.toHost();
ASSERT_EQ(expOutput.size() * batchSize, actOutput.size());
for (int b = 0; b < batchSize; ++b) {
for (size_t i = 0; i < expOutput.size(); ++i) {
EXPECT_NEAR(expOutput[i], actOutput[i + (b * expOutput.size())], 1e-6)
<< "i = " << i << " b " << b;
}
}
}

View file

@ -0,0 +1,944 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "binding.h"
#include "cudaMemory.h"
#include "cudaUtils.h"
#include "logging.h"
#include "taco2LSTMCellLayerPlugin.h"
#include "trtUtils.h"
#include "utils.h"
#include "NvInfer.h"
#include <random>
#include <vector>
using namespace nvinfer1;
using namespace nvinfer1::plugin;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
namespace
{
template <typename RNG>
std::vector<float> genVec(const size_t size, RNG& rng)
{
std::uniform_real_distribution<float> dist(-10.0, 10.0);
std::vector<float> vec(size);
for (size_t i = 0; i < size; ++i) {
vec[i] = dist(rng);
}
return vec;
}
} // namespace
/******************************************************************************
* UNIT TESTS *****************************************************************
*****************************************************************************/
TEST(CPUCompareFP32I256Test)
{
std::mt19937 rng(0);
const int inputLengthFirst = 256;
const int inputLengthSecond = 512;
const int inputLength = inputLengthFirst + inputLengthSecond;
const int numDimensions = 1024;
// weights
std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
std::vector<float> hiddenWeight
= genVec(numDimensions * numDimensions * 4, rng);
const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
Taco2LSTMCellLayerPlugin layer(
TRTUtils::toWeights(inputWeight),
TRTUtils::toWeights(hiddenWeight),
TRTUtils::toWeights(inputBias),
TRTUtils::toWeights(hiddenBias),
inputLength,
numDimensions,
false);
const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
const std::vector<float> hiddenState = genVec(numDimensions, rng);
const std::vector<float> cellState = genVec(numDimensions, rng);
CudaMemory<float> inputFirstDevice(inputFirst);
CudaMemory<float> inputSecondDevice(inputSecond);
CudaMemory<float> hiddenStateDevice(hiddenState);
CudaMemory<float> cellStateDevice(cellState);
const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
Dims4(1, inputLengthSecond, 1, 1),
Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<Dims> outputDims{Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
const std::vector<DynamicPluginTensorDesc> inDesc{
{// INPUT_FIRST_INDEX
{Dims2(-1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthFirst),
Dims2(1, inputLengthFirst)},
{// INPUT_SECOND_INDEX
{Dims4(-1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthSecond),
Dims2(1, inputLengthSecond)},
{// HIDDEN_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
layer.configurePlugin(
inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
layer.initialize();
const std::vector<const float*> inputs{inputFirstDevice.data(),
inputSecondDevice.data(),
hiddenStateDevice.data(),
cellStateDevice.data()};
CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
CudaMemory<float> cellStateOutDevice(hiddenState.size());
std::vector<float*> outputs{hiddenStateOutDevice.data(),
cellStateOutDevice.data()};
const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
Dims2(1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// INPUT_SECOND_INDEX
Dims4(1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// HIDDEN_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
const std::vector<PluginTensorDesc> outConf{{// HIDDEN
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
inConf.data(),
static_cast<int>(inConf.size()),
outConf.data(),
static_cast<int>(outConf.size())));
layer.enqueue(
inConf.data(),
outConf.data(),
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
workspace.data(),
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> prod1(4 * numDimensions, 0);
std::vector<float> prod2(4 * numDimensions, 0);
std::vector<float> prod3(4 * numDimensions, 0);
std::vector<float> prod(4 * numDimensions, 0);
// perform input MV
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
val += inputWeight[i * inputLength + j] * inputFirst[j];
}
prod[i] += val;
}
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
val += inputWeight[i * inputLength + j + inputLengthFirst]
* inputSecond[j];
}
prod[i] += val;
}
for (size_t i = 0; i < hiddenBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < hiddenState.size(); ++j) {
val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
}
prod[i] += val;
}
// add biases
for (size_t i = 0; i < inputBias.size(); ++i) {
prod[i] += inputBias[i] + hiddenBias[i];
}
std::vector<float> expHiddenOut(hiddenState);
std::vector<float> expCellOut(cellState);
// perform reduction
for (int row = 0; row < numDimensions; ++row) {
const float c = cellState[row];
const float i = Utils::sigmoid(prod[row]);
const float f = Utils::sigmoid(prod[row + numDimensions]);
const float g = tanh(prod[row + numDimensions * 2]);
const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
const float cPrime = f * c + i * g;
const float hPrime = o * tanh(cPrime);
expHiddenOut[row] = hPrime;
expCellOut[row] = cPrime;
}
// copy back to host
const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
const std::vector<float> actCellOut = cellStateOutDevice.toHost();
ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
for (size_t i = 0; i < expHiddenOut.size(); ++i) {
EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 7.5e-4) << "i = " << i;
}
ASSERT_EQ(expCellOut.size(), actCellOut.size());
for (size_t i = 0; i < expCellOut.size(); ++i) {
EXPECT_NEAR(expCellOut[i], actCellOut[i], 5e-3) << "i = " << i;
}
}
TEST(CPUCompareFP32I1024Test)
{
std::mt19937 rng(0);
const int inputLengthFirst = 1024;
const int inputLengthSecond = 512;
const int inputLength = inputLengthFirst + inputLengthSecond;
const int numDimensions = 1024;
// weights
std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
std::vector<float> hiddenWeight
= genVec(numDimensions * numDimensions * 4, rng);
const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
Taco2LSTMCellLayerPlugin layer(
TRTUtils::toWeights(inputWeight),
TRTUtils::toWeights(hiddenWeight),
TRTUtils::toWeights(inputBias),
TRTUtils::toWeights(hiddenBias),
inputLength,
numDimensions,
false);
const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
const std::vector<float> hiddenState = genVec(numDimensions, rng);
const std::vector<float> cellState = genVec(numDimensions, rng);
CudaMemory<float> inputFirstDevice(inputFirst);
CudaMemory<float> inputSecondDevice(inputSecond);
CudaMemory<float> hiddenStateDevice(hiddenState);
CudaMemory<float> cellStateDevice(cellState);
const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
Dims4(1, inputLengthSecond, 1, 1),
Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<Dims> outputDims{Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
const std::vector<DynamicPluginTensorDesc> inDesc{
{// INPUT_FIRST_INDEX
{Dims2(-1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthFirst),
Dims2(1, inputLengthFirst)},
{// INPUT_SECOND_INDEX
{Dims4(-1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthSecond),
Dims2(1, inputLengthSecond)},
{// HIDDEN_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
layer.configurePlugin(
inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
layer.initialize();
const std::vector<const float*> inputs{inputFirstDevice.data(),
inputSecondDevice.data(),
hiddenStateDevice.data(),
cellStateDevice.data()};
CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
CudaMemory<float> cellStateOutDevice(hiddenState.size());
std::vector<float*> outputs{hiddenStateOutDevice.data(),
cellStateOutDevice.data()};
const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
Dims2(1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// INPUT_SECOND_INDEX
Dims4(1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// HIDDEN_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
const std::vector<PluginTensorDesc> outConf{{// HIDDEN
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
inConf.data(),
static_cast<int>(inConf.size()),
outConf.data(),
static_cast<int>(outConf.size())));
layer.enqueue(
inConf.data(),
outConf.data(),
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
workspace.data(),
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> prod1(4 * numDimensions, 0);
std::vector<float> prod2(4 * numDimensions, 0);
std::vector<float> prod3(4 * numDimensions, 0);
std::vector<float> prod(4 * numDimensions, 0);
// perform input MV
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
val += inputWeight[i * inputLength + j] * inputFirst[j];
}
prod[i] += val;
}
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
val += inputWeight[i * inputLength + j + inputLengthFirst]
* inputSecond[j];
}
prod[i] += val;
}
for (size_t i = 0; i < hiddenBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < hiddenState.size(); ++j) {
val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
}
prod[i] += val;
}
// add biases
for (size_t i = 0; i < inputBias.size(); ++i) {
prod[i] += inputBias[i] + hiddenBias[i];
}
std::vector<float> expHiddenOut(hiddenState);
std::vector<float> expCellOut(cellState);
// perform reduction
for (int row = 0; row < numDimensions; ++row) {
const float c = cellState[row];
const float i = Utils::sigmoid(prod[row]);
const float f = Utils::sigmoid(prod[row + numDimensions]);
const float g = tanh(prod[row + numDimensions * 2]);
const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
const float cPrime = f * c + i * g;
const float hPrime = o * tanh(cPrime);
expHiddenOut[row] = hPrime;
expCellOut[row] = cPrime;
}
// copy back to host
const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
const std::vector<float> actCellOut = cellStateOutDevice.toHost();
ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
for (size_t i = 0; i < expHiddenOut.size(); ++i) {
EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 7.5e-4) << "i = " << i;
}
ASSERT_EQ(expCellOut.size(), actCellOut.size());
for (size_t i = 0; i < expCellOut.size(); ++i) {
EXPECT_NEAR(expCellOut[i], actCellOut[i], 5e-3) << "i = " << i;
}
}
TEST(CPUCompareFP16I256Test)
{
std::mt19937 rng(0);
const int inputLengthFirst = 256;
const int inputLengthSecond = 512;
const int inputLength = inputLengthFirst + inputLengthSecond;
const int numDimensions = 1024;
// weights
std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
std::vector<float> hiddenWeight
= genVec(numDimensions * numDimensions * 4, rng);
const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
Taco2LSTMCellLayerPlugin layer(
TRTUtils::toWeights(inputWeight),
TRTUtils::toWeights(hiddenWeight),
TRTUtils::toWeights(inputBias),
TRTUtils::toWeights(hiddenBias),
inputLength,
numDimensions,
true);
const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
const std::vector<float> hiddenState = genVec(numDimensions, rng);
const std::vector<float> cellState = genVec(numDimensions, rng);
CudaMemory<float> inputFirstDevice(inputFirst);
CudaMemory<float> inputSecondDevice(inputSecond);
CudaMemory<float> hiddenStateDevice(hiddenState);
CudaMemory<float> cellStateDevice(cellState);
const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
Dims4(1, inputLengthSecond, 1, 1),
Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<Dims> outputDims{Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
const std::vector<DynamicPluginTensorDesc> inDesc{
{// INPUT_FIRST_INDEX
{Dims2(-1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthFirst),
Dims2(1, inputLengthFirst)},
{// INPUT_SECOND_INDEX
{Dims4(-1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthSecond),
Dims2(1, inputLengthSecond)},
{// HIDDEN_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
layer.configurePlugin(
inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
layer.initialize();
const std::vector<const float*> inputs{inputFirstDevice.data(),
inputSecondDevice.data(),
hiddenStateDevice.data(),
cellStateDevice.data()};
CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
CudaMemory<float> cellStateOutDevice(hiddenState.size());
std::vector<float*> outputs{hiddenStateOutDevice.data(),
cellStateOutDevice.data()};
const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
Dims2(1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// INPUT_SECOND_INDEX
Dims4(1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// HIDDEN_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
const std::vector<PluginTensorDesc> outConf{{// HIDDEN
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
inConf.data(),
static_cast<int>(inConf.size()),
outConf.data(),
static_cast<int>(outConf.size())));
layer.enqueue(
inConf.data(),
outConf.data(),
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
workspace.data(),
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> prod1(4 * numDimensions, 0);
std::vector<float> prod2(4 * numDimensions, 0);
std::vector<float> prod3(4 * numDimensions, 0);
std::vector<float> prod(4 * numDimensions, 0);
// perform input MV
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
val += inputWeight[i * inputLength + j] * inputFirst[j];
}
prod[i] += val;
}
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
val += inputWeight[i * inputLength + j + inputLengthFirst]
* inputSecond[j];
}
prod[i] += val;
}
for (size_t i = 0; i < hiddenBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < hiddenState.size(); ++j) {
val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
}
prod[i] += val;
}
// add biases
for (size_t i = 0; i < inputBias.size(); ++i) {
prod[i] += inputBias[i] + hiddenBias[i];
}
std::vector<float> expHiddenOut(hiddenState);
std::vector<float> expCellOut(cellState);
// perform reduction
for (int row = 0; row < numDimensions; ++row) {
const float c = cellState[row];
const float i = Utils::sigmoid(prod[row]);
const float f = Utils::sigmoid(prod[row + numDimensions]);
const float g = tanh(prod[row + numDimensions * 2]);
const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
const float cPrime = f * c + i * g;
const float hPrime = o * tanh(cPrime);
expHiddenOut[row] = hPrime;
expCellOut[row] = cPrime;
}
// copy back to host
const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
const std::vector<float> actCellOut = cellStateOutDevice.toHost();
ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
for (size_t i = 0; i < expHiddenOut.size(); ++i) {
EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 4.5e-1) << "i = " << i;
}
ASSERT_EQ(expCellOut.size(), actCellOut.size());
for (size_t i = 0; i < expCellOut.size(); ++i) {
EXPECT_NEAR(expCellOut[i], actCellOut[i], 4.5e-1) << "i = " << i;
}
}
TEST(CPUCompareFP16I1024Test)
{
std::mt19937 rng(0);
const int inputLengthFirst = 1024;
const int inputLengthSecond = 512;
const int inputLength = inputLengthFirst + inputLengthSecond;
const int numDimensions = 1024;
// weights
std::vector<float> inputWeight = genVec(inputLength * numDimensions * 4, rng);
const std::vector<float> inputBias = genVec(numDimensions * 4, rng);
std::vector<float> hiddenWeight
= genVec(numDimensions * numDimensions * 4, rng);
const std::vector<float> hiddenBias = genVec(numDimensions * 4, rng);
Taco2LSTMCellLayerPlugin layer(
TRTUtils::toWeights(inputWeight),
TRTUtils::toWeights(hiddenWeight),
TRTUtils::toWeights(inputBias),
TRTUtils::toWeights(hiddenBias),
inputLength,
numDimensions,
true);
const std::vector<float> inputFirst = genVec(inputLengthFirst, rng);
const std::vector<float> inputSecond = genVec(inputLengthSecond, rng);
const std::vector<float> hiddenState = genVec(numDimensions, rng);
const std::vector<float> cellState = genVec(numDimensions, rng);
CudaMemory<float> inputFirstDevice(inputFirst);
CudaMemory<float> inputSecondDevice(inputSecond);
CudaMemory<float> hiddenStateDevice(hiddenState);
CudaMemory<float> cellStateDevice(cellState);
const std::vector<Dims> inputDims{Dims2(1, inputLengthFirst),
Dims4(1, inputLengthSecond, 1, 1),
Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<Dims> outputDims{Dims2(1, numDimensions),
Dims2(1, numDimensions)};
const std::vector<DataType> dataTypes(4, DataType::kFLOAT);
const std::vector<DynamicPluginTensorDesc> inDesc{
{// INPUT_FIRST_INDEX
{Dims2(-1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthFirst),
Dims2(1, inputLengthFirst)},
{// INPUT_SECOND_INDEX
{Dims4(-1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, inputLengthSecond),
Dims2(1, inputLengthSecond)},
{// HIDDEN_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL_INDEX
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
const std::vector<DynamicPluginTensorDesc> outDesc{{// HIDDEN
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)},
{// CELL
{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
layer.configurePlugin(
inDesc.data(), inDesc.size(), outDesc.data(), outDesc.size());
layer.initialize();
const std::vector<const float*> inputs{inputFirstDevice.data(),
inputSecondDevice.data(),
hiddenStateDevice.data(),
cellStateDevice.data()};
CudaMemory<float> hiddenStateOutDevice(hiddenState.size());
CudaMemory<float> cellStateOutDevice(hiddenState.size());
std::vector<float*> outputs{hiddenStateOutDevice.data(),
cellStateOutDevice.data()};
const std::vector<PluginTensorDesc> inConf{{// INPUT_FIRST_INDEX
Dims2(1, inputLengthFirst),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// INPUT_SECOND_INDEX
Dims4(1, inputLengthSecond, 1, 1),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// HIDDEN_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL_INDEX
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
const std::vector<PluginTensorDesc> outConf{{// HIDDEN
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{// CELL
Dims2(1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
inConf.data(),
static_cast<int>(inConf.size()),
outConf.data(),
static_cast<int>(outConf.size())));
layer.enqueue(
inConf.data(),
outConf.data(),
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
workspace.data(),
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> prod1(4 * numDimensions, 0);
std::vector<float> prod2(4 * numDimensions, 0);
std::vector<float> prod3(4 * numDimensions, 0);
std::vector<float> prod(4 * numDimensions, 0);
// perform input MV
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthFirst); ++j) {
val += inputWeight[i * inputLength + j] * inputFirst[j];
}
prod[i] += val;
}
for (size_t i = 0; i < inputBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < static_cast<size_t>(inputLengthSecond); ++j) {
val += inputWeight[i * inputLength + j + inputLengthFirst]
* inputSecond[j];
}
prod[i] += val;
}
for (size_t i = 0; i < hiddenBias.size(); ++i) {
double val = 0;
for (size_t j = 0; j < hiddenState.size(); ++j) {
val += hiddenWeight[i * hiddenState.size() + j] * hiddenState[j];
}
prod[i] += val;
}
// add biases
for (size_t i = 0; i < inputBias.size(); ++i) {
prod[i] += inputBias[i] + hiddenBias[i];
}
std::vector<float> expHiddenOut(hiddenState);
std::vector<float> expCellOut(cellState);
// perform reduction
for (int row = 0; row < numDimensions; ++row) {
const float c = cellState[row];
const float i = Utils::sigmoid(prod[row]);
const float f = Utils::sigmoid(prod[row + numDimensions]);
const float g = tanh(prod[row + numDimensions * 2]);
const float o = Utils::sigmoid(prod[row + numDimensions * 3]);
const float cPrime = f * c + i * g;
const float hPrime = o * tanh(cPrime);
expHiddenOut[row] = hPrime;
expCellOut[row] = cPrime;
}
// copy back to host
const std::vector<float> actHiddenOut = hiddenStateOutDevice.toHost();
const std::vector<float> actCellOut = cellStateOutDevice.toHost();
ASSERT_EQ(expHiddenOut.size(), actHiddenOut.size());
for (size_t i = 0; i < expHiddenOut.size(); ++i) {
EXPECT_NEAR(expHiddenOut[i], actHiddenOut[i], 4.5e-1) << "i = " << i;
}
ASSERT_EQ(expCellOut.size(), actCellOut.size());
for (size_t i = 0; i < expCellOut.size(); ++i) {
EXPECT_NEAR(expCellOut[i], actCellOut[i], 4.5e-1) << "i = " << i;
}
}

View file

@ -0,0 +1,241 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "cudaMemory.h"
#include "taco2ModulationRemovalLayerPlugin.h"
#include "trtUtils.h"
#include "NvInfer.h"
#include <cfloat>
#include <random>
#include <vector>
using namespace nvinfer1;
using namespace nvinfer1::plugin;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
namespace
{
template <typename RNG>
std::vector<float> genVec(const size_t size, RNG& rng)
{
std::uniform_real_distribution<float> dist(-1.0, 1.0);
std::vector<float> vec(size);
for (size_t i = 0; i < size; ++i) {
vec[i] = dist(rng);
}
return vec;
}
} // namespace
/******************************************************************************
* UNIT TEST ******************************************************************
*****************************************************************************/
TEST(CPUCompareTestBatch1)
{
std::mt19937 rng(0);
const int numFrames = 250;
const int filterLength = 1024;
const int hopLength = 256;
const int inputLength = numFrames * hopLength;
std::vector<float> weightsHost = genVec(filterLength, rng);
std::fill(weightsHost.begin(), weightsHost.end(), 1.0f);
Taco2ModulationRemovalLayerPlugin layer(
TRTUtils::toWeights(weightsHost), inputLength, filterLength, hopLength);
std::vector<float> inputHost = genVec(inputLength, rng);
std::fill(inputHost.begin(), inputHost.end(), 1.0f);
CudaMemory<float> inputDevice(inputHost);
std::vector<Dims> inputDims{Dims3(1, 1, inputLength)};
const std::vector<Dims> outputDims{Dims3(1, 1, inputLength)};
const std::vector<DataType> dataTypes{DataType::kFLOAT};
const bool broadcast[] = {false};
layer.configurePlugin(
inputDims.data(),
static_cast<int>(inputDims.size()),
outputDims.data(),
static_cast<int>(outputDims.size()),
dataTypes.data(),
dataTypes.data(),
broadcast,
broadcast,
#if NV_TENSORRT_MAJOR < 6
PluginFormat::kNCHW,
#else
PluginFormat::kLINEAR,
#endif
1);
layer.initialize();
std::vector<const float*> inputs{inputDevice.data()};
CudaMemory<float> outputDevice(inputLength - filterLength);
std::vector<float*> outputs{outputDevice.data()};
layer.enqueue(
1,
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
nullptr,
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> windowSum(inputLength, 0);
for (int i = 0; i < inputLength; i += hopLength) {
for (int j = 0; j < filterLength; ++j) {
const int idx = i + j;
if (idx < inputLength) {
windowSum[idx] += weightsHost[j];
}
}
}
std::vector<float> expOutput(inputLength, 0);
for (int x = 0; x < inputLength; ++x) {
float val = inputHost[x];
if (windowSum[x] > FLT_MIN) {
val /= windowSum[x];
}
val *= static_cast<float>(filterLength) / static_cast<float>(hopLength);
expOutput[x] = val;
}
expOutput.erase(expOutput.begin(), expOutput.begin() + (filterLength / 2));
expOutput.erase(expOutput.end() - (filterLength / 2), expOutput.end());
// match outputs
const std::vector<float> actOutput = outputDevice.toHost();
ASSERT_EQ(expOutput.size(), actOutput.size());
for (size_t i = 0; i < expOutput.size(); ++i) {
EXPECT_NEAR(expOutput[i], actOutput[i], 1e-6) << "i = " << i;
}
}
TEST(CPUCompareTestBatch4)
{
std::mt19937 rng(0);
const int batchSize = 2;
const int numFrames = 250;
const int filterLength = 1024;
const int hopLength = 256;
const int inputLength = numFrames * hopLength;
std::vector<float> weightsHost = genVec(filterLength, rng);
std::fill(weightsHost.begin(), weightsHost.end(), 1.0f);
Taco2ModulationRemovalLayerPlugin layer(
TRTUtils::toWeights(weightsHost), inputLength, filterLength, hopLength);
std::vector<float> inputHost = genVec(batchSize * inputLength, rng);
std::fill(inputHost.begin(), inputHost.end(), 1.0f);
CudaMemory<float> inputDevice(inputHost);
std::vector<Dims> inputDims{Dims3(1, 1, inputLength)};
const std::vector<Dims> outputDims{Dims3(1, 1, inputLength)};
const std::vector<DataType> dataTypes{DataType::kFLOAT};
const bool broadcast[] = {false};
layer.configurePlugin(
inputDims.data(),
static_cast<int>(inputDims.size()),
outputDims.data(),
static_cast<int>(outputDims.size()),
dataTypes.data(),
dataTypes.data(),
broadcast,
broadcast,
PluginFormat::kLINEAR,
batchSize);
layer.initialize();
std::vector<const float*> inputs{inputDevice.data()};
CudaMemory<float> outputDevice((inputLength - filterLength) * batchSize);
std::vector<float*> outputs{outputDevice.data()};
layer.enqueue(
batchSize,
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
nullptr,
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> windowSum(inputLength, 0);
for (int i = 0; i < inputLength; i += hopLength) {
for (int j = 0; j < filterLength; ++j) {
const int idx = i + j;
if (idx < inputLength) {
windowSum[idx] += weightsHost[j];
}
}
}
std::vector<float> expOutput(inputLength, 0);
for (int x = 0; x < inputLength; ++x) {
float val = inputHost[x];
if (windowSum[x] > FLT_MIN) {
val /= windowSum[x];
}
val *= static_cast<float>(filterLength) / static_cast<float>(hopLength);
expOutput[x] = val;
}
expOutput.erase(expOutput.begin(), expOutput.begin() + (filterLength / 2));
expOutput.erase(expOutput.end() - (filterLength / 2), expOutput.end());
// match outputs -- across entire batch
const std::vector<float> actOutput = outputDevice.toHost();
ASSERT_EQ(expOutput.size() * batchSize, actOutput.size());
for (int b = 0; b < batchSize; ++b) {
for (size_t i = 0; i < expOutput.size(); ++i) {
EXPECT_NEAR(expOutput[i], actOutput[i + b * expOutput.size()], 1e-6)
<< "i = " << i << ", b = " << b;
}
}
}

View file

@ -0,0 +1,185 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "binding.h"
#include "cudaMemory.h"
#include "cudaUtils.h"
#include "logging.h"
#include "taco2PrenetLayerPlugin.h"
#include "trtUtils.h"
#include "NvInfer.h"
#include <random>
#include <vector>
using namespace nvinfer1;
using namespace nvinfer1::plugin;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
namespace
{
template <typename RNG>
std::vector<float> genVec(const size_t size, RNG& rng)
{
std::uniform_real_distribution<float> dist(-1.0, 1.0);
std::vector<float> vec(size);
for (size_t i = 0; i < size; ++i) {
vec[i] = dist(rng);
}
return vec;
}
} // namespace
/******************************************************************************
* UNIT TESTS *****************************************************************
*****************************************************************************/
TEST(CPUCompareTest)
{
std::mt19937 rng(0);
const int inputLength = 80;
const int numDimensions = 256;
// weights
std::vector<float> weight1 = genVec(inputLength * numDimensions, rng);
std::vector<float> weight2 = genVec(numDimensions * numDimensions, rng);
Taco2PrenetLayerPlugin layer(
TRTUtils::toWeights(weight1),
TRTUtils::toWeights(weight2),
inputLength,
numDimensions);
const std::vector<float> inputHost = genVec(numDimensions, rng);
const std::vector<float> dropoutHost(numDimensions, 1.0f);
CudaMemory<float> inputDevice(inputHost);
CudaMemory<float> dropoutDevice(dropoutHost);
std::vector<Dims> inputDims{Dims3(1, 1, inputLength),
Dims2(1, numDimensions)};
const std::vector<Dims> outputDims{Dims3(1, 1, numDimensions)};
const std::vector<DataType> dataTypes(2, DataType::kFLOAT);
const std::vector<DynamicPluginTensorDesc> inDynDesc{
{{Dims3(-1, 1, inputLength),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims3(1, 1, inputLength),
Dims3(1, 1, inputLength)},
{{Dims2(-1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims2(1, numDimensions),
Dims2(1, numDimensions)}};
const std::vector<DynamicPluginTensorDesc> outDynDesc{
{{Dims3(-1, 1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims3(1, 1, numDimensions),
Dims3(1, 1, numDimensions)}};
layer.configurePlugin(
inDynDesc.data(), inDynDesc.size(), outDynDesc.data(), outDynDesc.size());
layer.initialize();
std::vector<const float*> inputs{inputDevice.data(), dropoutDevice.data()};
CudaMemory<float> outputDevice(numDimensions);
std::vector<float*> outputs{outputDevice.data()};
const std::vector<PluginTensorDesc> inDesc{
{Dims3(1, 1, inputLength), DataType::kFLOAT, TensorFormat::kLINEAR, 1.0f},
{Dims2(1, numDimensions), DataType::kFLOAT, TensorFormat::kLINEAR, 1.0f},
};
const std::vector<PluginTensorDesc> outDesc{{Dims3(1, 1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
inDesc.data(),
static_cast<int>(inDesc.size()),
outDesc.data(),
static_cast<int>(outDesc.size())));
layer.enqueue(
inDesc.data(),
outDesc.data(),
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
workspace.data(),
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> expOutput(numDimensions);
std::vector<float> intermediate(numDimensions);
for (int i = 0; i < numDimensions; ++i) {
float v = 0.0f;
for (int j = 0; j < inputLength; ++j) {
v += inputHost[j] * weight1[i * inputLength + j];
}
intermediate[i] = v;
}
for (int i = 0; i < numDimensions; ++i) {
intermediate[i] = std::max(0.0f, intermediate[i]) * dropoutHost[i];
}
for (int i = 0; i < numDimensions; ++i) {
float v = 0.0f;
for (int j = 0; j < numDimensions; ++j) {
v += intermediate[j] * weight2[i * numDimensions + j];
}
expOutput[i] = v;
}
for (int i = 0; i < numDimensions; ++i) {
expOutput[i] = std::max(0.0f, expOutput[i]) * dropoutHost[i];
}
// match outputs
const std::vector<float> actOutput = outputDevice.toHost();
ASSERT_EQ(expOutput.size(), actOutput.size());
for (size_t i = 0; i < expOutput.size(); ++i) {
EXPECT_NEAR(expOutput[i], actOutput[i], 1e-4) << "i = " << i;
}
}

View file

@ -0,0 +1,205 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include "binding.h"
#include "cudaMemory.h"
#include "cudaUtils.h"
#include "logging.h"
#include "taco2ProjectionLayerPlugin.h"
#include "trtUtils.h"
#include "NvInfer.h"
#include <random>
#include <vector>
using namespace nvinfer1;
using namespace nvinfer1::plugin;
using namespace tts;
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
namespace
{
template <typename RNG>
std::vector<float> genVec(const size_t size, RNG& rng)
{
std::uniform_real_distribution<float> dist(-1.0, 1.0);
std::vector<float> vec(size);
for (size_t i = 0; i < size; ++i) {
vec[i] = dist(rng);
}
return vec;
}
} // namespace
/******************************************************************************
* UNIT TESTS *****************************************************************
*****************************************************************************/
TEST(CPUCompareTest)
{
std::mt19937 rng(0);
const int hiddenInputLength = 1024;
const int contextInputLength = 512;
const int numChannelDimensions = 80;
const int numGateDimensions = 1;
const int inputLength = hiddenInputLength + contextInputLength;
const int numDimensions = numChannelDimensions + numGateDimensions;
// weights
std::vector<float> weightChannel
= genVec(inputLength * numChannelDimensions, rng);
std::vector<float> weightGate = genVec(inputLength * numGateDimensions, rng);
std::vector<float> biasChannel = genVec(numChannelDimensions, rng);
std::vector<float> biasGate = genVec(numGateDimensions, rng);
Taco2ProjectionLayerPlugin layer(
TRTUtils::toWeights(weightChannel),
TRTUtils::toWeights(weightGate),
TRTUtils::toWeights(biasChannel),
TRTUtils::toWeights(biasGate),
hiddenInputLength,
contextInputLength,
numChannelDimensions,
numGateDimensions);
std::vector<float> inputHidden = genVec(hiddenInputLength, rng);
std::vector<float> inputContext = genVec(contextInputLength, rng);
CudaMemory<float> inputHiddenDevice(inputHidden);
CudaMemory<float> inputContextDevice(inputContext);
std::vector<Dims> inputDims{Dims3(1, 1, hiddenInputLength),
Dims3(1, 1, contextInputLength)};
const std::vector<Dims> outputDims{Dims3(1, 1, numDimensions)};
const std::vector<DataType> dataTypes(2, DataType::kFLOAT);
const std::vector<DynamicPluginTensorDesc> inDynDesc{
{{Dims3(-1, 1, hiddenInputLength),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims3(1, 1, hiddenInputLength),
Dims3(1, 1, hiddenInputLength)},
{{Dims3(-1, 1, contextInputLength),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims3(1, 1, contextInputLength),
Dims3(1, 1, contextInputLength)}};
const std::vector<DynamicPluginTensorDesc> outDynDesc{
{{Dims3(-1, 1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
Dims3(1, 1, numDimensions),
Dims3(1, 1, numDimensions)}};
layer.configurePlugin(
inDynDesc.data(), inDynDesc.size(), outDynDesc.data(), outDynDesc.size());
layer.initialize();
std::vector<const float*> inputs{inputHiddenDevice.data(),
inputContextDevice.data()};
CudaMemory<float> outputDevice(numDimensions);
std::vector<float*> outputs{outputDevice.data()};
const std::vector<PluginTensorDesc> inDesc{
{Dims3(1, 1, hiddenInputLength),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
{Dims3(1, 1, contextInputLength),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f},
};
const std::vector<PluginTensorDesc> outDesc{{Dims3(1, 1, numDimensions),
DataType::kFLOAT,
TensorFormat::kLINEAR,
1.0f}};
CudaMemory<uint8_t> workspace(layer.getWorkspaceSize(
inDesc.data(),
static_cast<int>(inDesc.size()),
outDesc.data(),
static_cast<int>(outDesc.size())));
layer.enqueue(
inDesc.data(),
outDesc.data(),
reinterpret_cast<const void* const*>(inputs.data()),
reinterpret_cast<void**>(outputs.data()),
workspace.data(),
0);
CudaUtils::sync(0);
// perform operations on cpu
std::vector<float> expOutput(numDimensions);
for (int i = 0; i < numChannelDimensions; ++i) {
float v = 0.0f;
for (int j = 0; j < hiddenInputLength; ++j) {
v += inputHidden[j] * weightChannel[i * inputLength + j];
}
for (int j = 0; j < contextInputLength; ++j) {
v += inputContext[j]
* weightChannel[i * inputLength + j + hiddenInputLength];
}
expOutput[i] = v + biasChannel[i];
}
for (int i = 0; i < numGateDimensions; ++i) {
float v = 0.0f;
for (int j = 0; j < hiddenInputLength; ++j) {
v += inputHidden[j] * weightGate[i * inputLength + j];
}
for (int j = 0; j < contextInputLength; ++j) {
v += inputContext[j]
* weightGate[i * inputLength + j + hiddenInputLength];
}
expOutput[i + numChannelDimensions] = v + biasGate[i];
}
// match outputs
const std::vector<float> actOutput = outputDevice.toHost();
ASSERT_EQ(expOutput.size(), actOutput.size());
for (size_t i = 0; i < expOutput.size(); ++i) {
EXPECT_NEAR(expOutput[i], actOutput[i], 1e-4) << "i = " << i;
}
}

View file

@ -0,0 +1,125 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "UnitTest.hpp"
#include <exception>
#include <iostream>
namespace
{
std::vector<UnitTest*>* s_tests = nullptr;
}
/******************************************************************************
* PUBLIC STATIC METHODS ******************************************************
*****************************************************************************/
bool UnitTest::runAll()
{
size_t numPassed = 0;
size_t numTests = 0;
if (s_tests) {
numTests = s_tests->size();
for (UnitTest* const test : *s_tests) {
try {
test->run();
if (test->passed()) {
std::cout << "Test: " << test->fullname() << " passed." << std::endl;
++numPassed;
continue;
}
} catch (const TestException&) {
// assertion failed
} catch (const std::exception& e) {
std::cout << "Unhandled excpetion: " << e.what() << std::endl;
}
std::cout << "Test: " << test->fullname() << " failed." << std::endl;
}
}
std::cout << numPassed << " / " << numTests << " passed." << std::endl;
return numPassed == numTests;
}
void UnitTest::registerTest(UnitTest* const test)
{
if (!s_tests) {
s_tests = new std::vector<UnitTest*>(0);
}
s_tests->emplace_back(test);
}
/******************************************************************************
* CONSTRUCTORS / DESTRUCTOR **************************************************
*****************************************************************************/
UnitTest::UnitTest(const std::string& filename, const std::string& name)
: m_nullStream(), m_passed(true), m_filename(filename), m_name(name)
{
registerTest(this);
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
std::string UnitTest::fullname() const
{
return m_filename + "__" + m_name;
}
bool UnitTest::passed() const
{
return m_passed;
}
/******************************************************************************
* PROTECTED METHODS **********************************************************
*****************************************************************************/
void UnitTest::failure()
{
m_passed = false;
}
/******************************************************************************
* MAIN ***********************************************************************
*****************************************************************************/
int main(int /*argc*/, char** /*argv*/)
{
if (UnitTest::runAll()) {
return 0;
} else {
return 1;
}
}

View file

@ -0,0 +1,285 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cmath>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>
#ifndef TT2I_UNITTEST_HPP
#define TT2I_UNITTEST_HPP
#define _TEST(test_name, name) \
class test_name : public UnitTest \
{ \
public: \
test_name() : UnitTest(__FILE__, #name){}; \
void run() override; \
}; \
test_name test_name##_instance; \
void test_name::run()
#define TEST(name) _TEST(test_##__FILE__##__##name, name)
#define ASSERT_TRUE(x) \
do { \
if (!(x)) { \
std::cerr << "ASSERT_TRUE: " << #x << "(" << (x) << ") is false at " \
<< __FILE__ << ":" << __LINE__ << std::endl; \
throw TestException(); \
} \
} while (false)
#define ASSERT_EQ(x, y) \
do { \
if (!((x) == (y))) { \
std::cerr << "ASSERT_EQ: " << #x << "(" << (x) << ") != " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
throw TestException(); \
} \
} while (false)
#define ASSERT_LT(x, y) \
do { \
if (!areComparable((x), (y)) || !((x) < (y))) { \
std::cerr << "ASSERT_LT: " << #x << "(" << (x) << ") !< " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
throw TestException(); \
} \
} while (false)
#define ASSERT_LE(x, y) \
do { \
if (!areComparable((x), (y)) || !((x) <= (y))) { \
std::cerr << "ASSERT_LE: " << #x << "(" << (x) << ") !<= " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
throw TestException(); \
} \
} while (false)
#define ASSERT_GT(x, y) \
do { \
if (!areComparable((x), (y)) || !((x) > (y))) { \
std::cerr << "ASSERT_GT: " << #x << "(" << (x) << ") !> " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
throw TestException(); \
} \
} while (false)
#define ASSERT_GE(x, y) \
do { \
if (!areComparable((x), (y)) || !((x) >= (y))) { \
std::cerr << "ASSERT_GE: " << #x << "(" << (x) << ") !>= " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
throw TestException(); \
} \
} while (false)
#define EXPECT_TRUE(x) \
[&]() { \
if (!(x)) { \
std::cerr << "EXPECT_TRUE: " << #x << "(" << (x) << ") is false at " \
<< __FILE__ << ":" << __LINE__ << std::endl; \
this->failure(); \
return CheckOutput(true); \
} else { \
return CheckOutput(false); \
} \
}()
#define EXPECT_EQ(x, y) \
[&]() { \
if (!areComparable((x), (y)) || !((x) == (y))) { \
std::cerr << "EXPECT_EQ: " << #x << "(" << (x) << ") != " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
this->failure(); \
return CheckOutput(true); \
} else { \
return CheckOutput(false); \
} \
}()
#define EXPECT_LT(x, y) \
[&]() { \
if (!areComparable((x), (y)) || !((x) < (y))) { \
std::cerr << "EXPECT_LT: " << #x << "(" << (x) << ") !< " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
this->failure(); \
return CheckOutput(true); \
} else { \
return CheckOutput(false); \
} \
}()
#define EXPECT_LE(x, y) \
[&]() { \
if (!areComparable((x), (y)) || !((x) <= (y))) { \
std::cerr << "EXPECT_LE: " << #x << "(" << (x) << ") !<= " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
this->failure(); \
return CheckOutput(true); \
} else { \
return CheckOutput(false); \
} \
}()
#define EXPECT_GT(x, y) \
[&]() { \
if (!areComparable((x), (y)) || !((x) > (y))) { \
std::cerr << "EXPECT_GT: " << #x << "(" << (x) << ") !> " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
this->failure(); \
return CheckOutput(true); \
} else { \
return CheckOutput(false); \
} \
}()
#define EXPECT_GE(x, y) \
[&]() { \
if (!areComparable((x), (y)) || !((x) >= (y))) { \
std::cerr << "EXPECT_GE: " << #x << "(" << (x) << ") !>= " << #y << "(" \
<< (y) << ") " \
<< "at " << __FILE__ << ":" << __LINE__ << std::endl; \
this->failure(); \
return CheckOutput(true); \
} else { \
return CheckOutput(false); \
} \
}()
#define EXPECT_NEAR(x, y, t) \
[&]() { \
auto diff = std::abs((x) - (y)); \
if (!areComparable((x), (y)) || diff > (t)) { \
std::cerr << "EXPECT_NEAR: " << #x << "(" << (x) << ") !~= " << #y \
<< "(" << (y) << ") " \
<< " within (" << diff << "/" #t << ") at " << __FILE__ << ":" \
<< __LINE__ << std::endl; \
this->failure(); \
return CheckOutput(true); \
} else { \
return CheckOutput(false); \
} \
}()
class CheckOutput
{
public:
CheckOutput(bool output) : m_displayOutput(output), m_output()
{
}
CheckOutput(CheckOutput&& other)
: m_displayOutput(other.m_displayOutput),
m_output(std::move(other.m_output))
{
other.m_displayOutput = false;
}
~CheckOutput()
{
if (m_displayOutput && !m_output.str().empty()) {
std::cerr << m_output.str() << std::endl;
}
}
template <typename T>
CheckOutput& operator<<(const T& obj)
{
m_output << obj;
return *this;
}
private:
bool m_displayOutput;
std::ostringstream m_output;
};
class TestException : public std::runtime_error
{
public:
TestException() : std::runtime_error("TestFailed"){};
};
class UnitTest
{
public:
static bool runAll();
static void registerTest(UnitTest* test);
UnitTest(const std::string& filename, const std::string& name);
virtual ~UnitTest() = default;
virtual void run() = 0;
std::string fullname() const;
bool passed() const;
protected:
void failure();
template <
typename T,
typename std::enable_if<std::is_floating_point<T>::value, int>::type = 0>
bool areComparable(T x, T y) const
{
return !std::isnan(x) && !std::isnan(y) &&
(!std::isinf(x) || !std::isinf(y));
}
template <
typename T,
typename std::enable_if<!std::is_floating_point<T>::value, int>::type = 0>
bool areComparable(T, T) const
{
return true;
}
std::ostringstream m_nullStream;
private:
bool m_passed;
std::string m_filename;
std::string m_name;
};
#endif

View file

@ -0,0 +1,77 @@
##
# Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the NVIDIA CORPORATION nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
file(GLOB TACOTRON2WAVEGLOW_SOURCES
speechSynthesizer.cpp
denoiser/*.cpp
layers/*.cpp
tacotron2/*.cpp
tacotron2/*.cu
util/*.cpp
util/*.cu
waveglow/*.cpp
waveglow/*.cu
plugins/taco2AttentionPlugin/*.cpp
plugins/taco2AttentionPlugin/*.cu
plugins/taco2DenoiseTransformPlugin/*.cpp
plugins/taco2DenoiseTransformPlugin/*.cu
plugins/taco2LSTMCellPlugin/*.cpp
plugins/taco2LSTMCellPlugin/*.cu
plugins/taco2ModulationRemovalPlugin/*.cpp
plugins/taco2ModulationRemovalPlugin/*.cu
plugins/taco2PrenetPlugin/*.cpp
plugins/taco2PrenetPlugin/*.cu
plugins/taco2ProjectionPlugin/*.cpp
plugins/taco2ProjectionPlugin/*.cu
)
set(TARGET_NAME "tt2i")
## library
add_library(${TARGET_NAME} ${TACOTRON2WAVEGLOW_SOURCES})
target_link_libraries(${TARGET_NAME}
cublas
nvinfer
nvonnxparser
)
set_property(TARGET ${TARGET_NAME} PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON)
set_property(TARGET ${TARGET_NAME} PROPERTY ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
include_directories(
./
./denoiser/
./layers/
./tacotron2/
./util/
./waveglow/
./common/
./plugins/taco2AttentionPlugin/
./plugins/taco2DenoiseTransformPlugin/
./plugins/taco2LSTMCellPlugin/
./plugins/taco2ModulationRemovalPlugin/
./plugins/taco2PrenetPlugin/
./plugins/taco2ProjectionPlugin/
)

View file

@ -0,0 +1,505 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef TENSORRT_LOGGING_H
#define TENSORRT_LOGGING_H
#include "NvInferRuntimeCommon.h"
#include <cassert>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <ostream>
#include <sstream>
#include <string>
using Severity = nvinfer1::ILogger::Severity;
class LogStreamConsumerBuffer : public std::stringbuf
{
public:
LogStreamConsumerBuffer(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mOutput(stream)
, mPrefix(prefix)
, mShouldLog(shouldLog)
{
}
LogStreamConsumerBuffer(LogStreamConsumerBuffer&& other)
: mOutput(other.mOutput)
, mPrefix(std::move(other.mPrefix))
, mShouldLog(std::move(other.mShouldLog))
{
}
~LogStreamConsumerBuffer()
{
// std::streambuf::pbase() gives a pointer to the beginning of the buffered part of the output sequence
// std::streambuf::pptr() gives a pointer to the current position of the output sequence
// if the pointer to the beginning is not equal to the pointer to the current position,
// call putOutput() to log the output to the stream
if (pbase() != pptr())
{
putOutput();
}
}
// synchronizes the stream buffer and returns 0 on success
// synchronizing the stream buffer consists of inserting the buffer contents into the stream,
// resetting the buffer and flushing the stream
virtual int sync()
{
putOutput();
return 0;
}
void putOutput()
{
if (mShouldLog)
{
// prepend timestamp
std::time_t timestamp = std::time(nullptr);
tm* tm_local = std::localtime(&timestamp);
std::cout << "[";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mon << "/";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_mday << "/";
std::cout << std::setw(4) << std::setfill('0') << 1900 + tm_local->tm_year << "-";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_hour << ":";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_min << ":";
std::cout << std::setw(2) << std::setfill('0') << tm_local->tm_sec << "] ";
// std::stringbuf::str() gets the string contents of the buffer
// insert the buffer contents pre-appended by the appropriate prefix into the stream
mOutput << mPrefix << str();
// set the buffer to empty
str("");
// flush the stream
mOutput.flush();
}
}
void setShouldLog(bool shouldLog)
{
mShouldLog = shouldLog;
}
private:
std::ostream& mOutput;
std::string mPrefix;
bool mShouldLog;
};
//!
//! \class LogStreamConsumerBase
//! \brief Convenience object used to initialize LogStreamConsumerBuffer before std::ostream in LogStreamConsumer
//!
class LogStreamConsumerBase
{
public:
LogStreamConsumerBase(std::ostream& stream, const std::string& prefix, bool shouldLog)
: mBuffer(stream, prefix, shouldLog)
{
}
protected:
LogStreamConsumerBuffer mBuffer;
};
//!
//! \class LogStreamConsumer
//! \brief Convenience object used to facilitate use of C++ stream syntax when logging messages.
//! Order of base classes is LogStreamConsumerBase and then std::ostream.
//! This is because the LogStreamConsumerBase class is used to initialize the LogStreamConsumerBuffer member field
//! in LogStreamConsumer and then the address of the buffer is passed to std::ostream.
//! This is necessary to prevent the address of an uninitialized buffer from being passed to std::ostream.
//! Please do not change the order of the parent classes.
//!
class LogStreamConsumer : protected LogStreamConsumerBase, public std::ostream
{
public:
//! \brief Creates a LogStreamConsumer which logs messages with level severity.
//! Reportable severity determines if the messages are severe enough to be logged.
LogStreamConsumer(Severity reportableSeverity, Severity severity)
: LogStreamConsumerBase(severityOstream(severity), severityPrefix(severity), severity <= reportableSeverity)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(severity <= reportableSeverity)
, mSeverity(severity)
{
}
LogStreamConsumer(LogStreamConsumer&& other)
: LogStreamConsumerBase(severityOstream(other.mSeverity), severityPrefix(other.mSeverity), other.mShouldLog)
, std::ostream(&mBuffer) // links the stream buffer with the stream
, mShouldLog(other.mShouldLog)
, mSeverity(other.mSeverity)
{
}
void setReportableSeverity(Severity reportableSeverity)
{
mShouldLog = mSeverity <= reportableSeverity;
mBuffer.setShouldLog(mShouldLog);
}
private:
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
static std::string severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
bool mShouldLog;
Severity mSeverity;
};
//! \class Logger
//!
//! \brief Class which manages logging of TensorRT tools and samples
//!
//! \details This class provides a common interface for TensorRT tools and samples to log information to the console,
//! and supports logging two types of messages:
//!
//! - Debugging messages with an associated severity (info, warning, error, or internal error/fatal)
//! - Test pass/fail messages
//!
//! The advantage of having all samples use this class for logging as opposed to emitting directly to stdout/stderr is
//! that the logic for controlling the verbosity and formatting of sample output is centralized in one location.
//!
//! In the future, this class could be extended to support dumping test results to a file in some standard format
//! (for example, JUnit XML), and providing additional metadata (e.g. timing the duration of a test run).
//!
//! TODO: For backwards compatibility with existing samples, this class inherits directly from the nvinfer1::ILogger
//! interface, which is problematic since there isn't a clean separation between messages coming from the TensorRT
//! library and messages coming from the sample.
//!
//! In the future (once all samples are updated to use Logger::getTRTLogger() to access the ILogger) we can refactor the
//! class to eliminate the inheritance and instead make the nvinfer1::ILogger implementation a member of the Logger
//! object.
class Logger : public nvinfer1::ILogger
{
public:
Logger(Severity severity = Severity::kWARNING)
: mReportableSeverity(severity)
{
}
//!
//! \enum TestResult
//! \brief Represents the state of a given test
//!
enum class TestResult
{
kRUNNING, //!< The test is running
kPASSED, //!< The test passed
kFAILED, //!< The test failed
kWAIVED //!< The test was waived
};
//!
//! \brief Forward-compatible method for retrieving the nvinfer::ILogger associated with this Logger
//! \return The nvinfer1::ILogger associated with this Logger
//!
//! TODO Once all samples are updated to use this method to register the logger with TensorRT,
//! we can eliminate the inheritance of Logger from ILogger
//!
nvinfer1::ILogger& getTRTLogger()
{
return *this;
}
//!
//! \brief Implementation of the nvinfer1::ILogger::log() virtual method
//!
//! Note samples should not be calling this function directly; it will eventually go away once we eliminate the
//! inheritance from nvinfer1::ILogger
//!
void log(Severity severity, const char* msg) override
{
LogStreamConsumer(mReportableSeverity, severity) << "[TRT] " << std::string(msg) << std::endl;
}
//!
//! \brief Method for controlling the verbosity of logging output
//!
//! \param severity The logger will only emit messages that have severity of this level or higher.
//!
void setReportableSeverity(Severity severity)
{
mReportableSeverity = severity;
}
//!
//! \brief Opaque handle that holds logging information for a particular test
//!
//! This object is an opaque handle to information used by the Logger to print test results.
//! The sample must call Logger::defineTest() in order to obtain a TestAtom that can be used
//! with Logger::reportTest{Start,End}().
//!
class TestAtom
{
public:
TestAtom(TestAtom&&) = default;
private:
friend class Logger;
TestAtom(bool started, const std::string& name, const std::string& cmdline)
: mStarted(started)
, mName(name)
, mCmdline(cmdline)
{
}
bool mStarted;
std::string mName;
std::string mCmdline;
};
//!
//! \brief Define a test for logging
//!
//! \param[in] name The name of the test. This should be a string starting with
//! "TensorRT" and containing dot-separated strings containing
//! the characters [A-Za-z0-9_].
//! For example, "TensorRT.sample_googlenet"
//! \param[in] cmdline The command line used to reproduce the test
//
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
//!
static TestAtom defineTest(const std::string& name, const std::string& cmdline)
{
return TestAtom(false, name, cmdline);
}
//!
//! \brief A convenience overloaded version of defineTest() that accepts an array of command-line arguments
//! as input
//!
//! \param[in] name The name of the test
//! \param[in] argc The number of command-line arguments
//! \param[in] argv The array of command-line arguments (given as C strings)
//!
//! \return a TestAtom that can be used in Logger::reportTest{Start,End}().
static TestAtom defineTest(const std::string& name, int argc, char const* const* argv)
{
auto cmdline = genCmdlineString(argc, argv);
return defineTest(name, cmdline);
}
//!
//! \brief Report that a test has started.
//!
//! \pre reportTestStart() has not been called yet for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has started
//!
static void reportTestStart(TestAtom& testAtom)
{
reportTestResult(testAtom, TestResult::kRUNNING);
assert(!testAtom.mStarted);
testAtom.mStarted = true;
}
//!
//! \brief Report that a test has ended.
//!
//! \pre reportTestStart() has been called for the given testAtom
//!
//! \param[in] testAtom The handle to the test that has ended
//! \param[in] result The result of the test. Should be one of TestResult::kPASSED,
//! TestResult::kFAILED, TestResult::kWAIVED
//!
static void reportTestEnd(const TestAtom& testAtom, TestResult result)
{
assert(result != TestResult::kRUNNING);
assert(testAtom.mStarted);
reportTestResult(testAtom, result);
}
static int reportPass(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kPASSED);
return EXIT_SUCCESS;
}
static int reportFail(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kFAILED);
return EXIT_FAILURE;
}
static int reportWaive(const TestAtom& testAtom)
{
reportTestEnd(testAtom, TestResult::kWAIVED);
return EXIT_SUCCESS;
}
static int reportTest(const TestAtom& testAtom, bool pass)
{
return pass ? reportPass(testAtom) : reportFail(testAtom);
}
Severity getReportableSeverity() const
{
return mReportableSeverity;
}
private:
//!
//! \brief returns an appropriate string for prefixing a log message with the given severity
//!
static const char* severityPrefix(Severity severity)
{
switch (severity)
{
case Severity::kINTERNAL_ERROR: return "[F] ";
case Severity::kERROR: return "[E] ";
case Severity::kWARNING: return "[W] ";
case Severity::kINFO: return "[I] ";
case Severity::kVERBOSE: return "[V] ";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate string for prefixing a test result message with the given result
//!
static const char* testResultString(TestResult result)
{
switch (result)
{
case TestResult::kRUNNING: return "RUNNING";
case TestResult::kPASSED: return "PASSED";
case TestResult::kFAILED: return "FAILED";
case TestResult::kWAIVED: return "WAIVED";
default: assert(0); return "";
}
}
//!
//! \brief returns an appropriate output stream (cout or cerr) to use with the given severity
//!
static std::ostream& severityOstream(Severity severity)
{
return severity >= Severity::kINFO ? std::cout : std::cerr;
}
//!
//! \brief method that implements logging test results
//!
static void reportTestResult(const TestAtom& testAtom, TestResult result)
{
severityOstream(Severity::kINFO) << "&&&& " << testResultString(result) << " " << testAtom.mName << " # "
<< testAtom.mCmdline << std::endl;
}
//!
//! \brief generate a command line string from the given (argc, argv) values
//!
static std::string genCmdlineString(int argc, char const* const* argv)
{
std::stringstream ss;
for (int i = 0; i < argc; i++)
{
if (i > 0)
ss << " ";
ss << argv[i];
}
return ss.str();
}
Severity mReportableSeverity;
};
namespace
{
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kVERBOSE
//!
//! Example usage:
//!
//! LOG_VERBOSE(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_VERBOSE(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kVERBOSE);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINFO
//!
//! Example usage:
//!
//! LOG_INFO(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_INFO(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINFO);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kWARNING
//!
//! Example usage:
//!
//! LOG_WARN(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_WARN(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kWARNING);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kERROR
//!
//! Example usage:
//!
//! LOG_ERROR(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_ERROR(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kERROR);
}
//!
//! \brief produces a LogStreamConsumer object that can be used to log messages of severity kINTERNAL_ERROR
// ("fatal" severity)
//!
//! Example usage:
//!
//! LOG_FATAL(logger) << "hello world" << std::endl;
//!
inline LogStreamConsumer LOG_FATAL(const Logger& logger)
{
return LogStreamConsumer(logger.getReportableSeverity(), Severity::kINTERNAL_ERROR);
}
} // anonymous namespace
#endif // TENSORRT_LOGGING_H

View file

@ -0,0 +1,2 @@
4ac2851cbf9ad438581845e5594f591b data/tacotron2.pt
418ba5c243fbacd88fb271b080c1dc0a data/waveglow.pt

View file

@ -0,0 +1,161 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "denoiserBuilder.h"
#include "denoiserStreamingInstance.h"
#include "pluginBuilder.h"
#include "trtUtils.h"
#include <iostream>
using namespace nvinfer1;
namespace tts
{
/******************************************************************************
* CONSTANTS ******************************************************************
*****************************************************************************/
namespace
{
constexpr const char* const INPUT_NAME = DenoiserStreamingInstance::INPUT_NAME;
constexpr const char* const OUTPUT_NAME = DenoiserStreamingInstance::OUTPUT_NAME;
} // namespace
/******************************************************************************
* CONSTRUCTORS / DESTRUCTOR **************************************************
*****************************************************************************/
DenoiserBuilder::DenoiserBuilder(int sampleLength, int filterLength, int numOverlap, int winLength)
: mChunkSize(sampleLength)
, mFilterLength(filterLength)
, mHopLength(filterLength / numOverlap)
, mWinLength(winLength)
{
// do nothing
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
TRTPtr<ICudaEngine> DenoiserBuilder::build(
IModelImporter& importer,
IBuilder& builder,
const int maxBatchSize,
const bool useFP16)
{
TRTPtr<INetworkDefinition> network(builder.createNetworkV2(0));
network->setName("Denoiser");
const int cutoff = mFilterLength / 2 + 1;
const LayerData* const stftData = importer.getWeights({"denoiser", "stft"});
const LayerData* const denoiserData = importer.getWeights({"denoiser"});
ITensor* const input = network->addInput(
INPUT_NAME, DataType::kFLOAT, Dims4(1, 1, 1, mChunkSize));
// forward transform
#if NV_TENSORRT_MAJOR < 7
IConvolutionLayer* const convLayer = network->addConvolution(
*input, cutoff * 2, DimsHW(1, mFilterLength), stftData->get("forward_basis"), Weights{});
convLayer->setPadding(DimsHW(0, mFilterLength / 2));
convLayer->setStride(DimsHW(1, mHopLength));
#else
IConvolutionLayer* const convLayer = network->addConvolutionNd(
*input, cutoff * 2, Dims2(1, mFilterLength), stftData->get("forward_basis"), Weights{});
convLayer->setPaddingNd(Dims2(0, mFilterLength / 2));
convLayer->setStrideNd(Dims2(1, mHopLength));
#endif
convLayer->setName("forward_transform_layer");
// use plugin to compute magnitude and phase
PluginBuilder denoiseTransformBuilder("Taco2DenoiseTransform", "0.1.0");
denoiseTransformBuilder.setField(
"InputLength", static_cast<int32_t>(TRTUtils::getTensorSize(*convLayer->getOutput(0)) / (cutoff * 2)));
denoiseTransformBuilder.setField("FilterLength", cutoff * 2);
denoiseTransformBuilder.setField("Weights", denoiserData->get("bias_spec"));
TRTPtr<IPluginV2> denoise = denoiseTransformBuilder.make("denoise_layer");
std::vector<ITensor*> denoiseInputs{convLayer->getOutput(0)};
ILayer* const denoiseLayer
= network->addPluginV2(denoiseInputs.data(), static_cast<int>(denoiseInputs.size()), *denoise);
// inverse transform
#if NV_TENSORRT_MAJOR < 7
IDeconvolutionLayer* const deconvLayer = network->addDeconvolution(
*denoiseLayer->getOutput(0), 1, DimsHW(1, mFilterLength), stftData->get("inverse_basis"), {});
deconvLayer->setStride(DimsHW(1, mHopLength));
#else
IDeconvolutionLayer* const deconvLayer = network->addDeconvolutionNd(
*denoiseLayer->getOutput(0), 1, Dims2(1, mFilterLength), stftData->get("inverse_basis"), {});
deconvLayer->setStrideNd(Dims2(1, mHopLength));
#endif
deconvLayer->setName("inverse_transform_layer");
// apply windowing
PluginBuilder modulationRemovalBuilder("Taco2ModulationRemoval", "0.1.0");
modulationRemovalBuilder.setField(
"InputLength", static_cast<int32_t>(TRTUtils::getTensorSize(*deconvLayer->getOutput(0))));
modulationRemovalBuilder.setField("FilterLength", static_cast<int32_t>(mFilterLength));
modulationRemovalBuilder.setField("HopLength", static_cast<int32_t>(mHopLength));
modulationRemovalBuilder.setField("Weights", stftData->get("win_sq"));
TRTPtr<IPluginV2> modRemoval
= modulationRemovalBuilder.make("modulation_removal_layer");
std::vector<ITensor*> modRemovalInputs{deconvLayer->getOutput(0)};
ILayer* const modRemovalLayer
= network->addPluginV2(modRemovalInputs.data(), static_cast<int>(modRemovalInputs.size()), *modRemoval);
ITensor* const output = modRemovalLayer->getOutput(0);
output->setName(OUTPUT_NAME);
network->markOutput(*output);
assert(TRTUtils::getTensorSize(*output) == static_cast<size_t>(mChunkSize));
// build engine
TRTPtr<IBuilderConfig> config(builder.createBuilderConfig());
config->setMaxWorkspaceSize(1ULL << 29); // 512 MB
if (useFP16)
{
config->setFlag(BuilderFlag::kFP16);
}
builder.setMaxBatchSize(maxBatchSize);
TRTPtr<ICudaEngine> engine(
builder.buildEngineWithConfig(*network, *config));
if (!engine)
{
throw std::runtime_error("Failed to build Denoiser engine.");
}
return engine;
}
} // namespace tts

View file

@ -0,0 +1,82 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_DENOISER_H
#define TT2I_DENOISER_H
#include "IModelImporter.h"
#include "trtPtr.h"
#include <memory>
namespace nvinfer1
{
class ICudaEngine;
class IBuilder;
} // namespace nvinfer1
namespace tts
{
class DenoiserBuilder
{
public:
/**
* @brief Create a new denoiser.
*
* @param sampleLength The number of samples.
* @param filterLength The filter length.
* @param numOverlap The number of overlapping filters.
* @param winLength The length of the window.
*/
DenoiserBuilder(int sampleLength, int filterLength = 1024, int numOverlap = 4, int winLength = 1024);
/**
* @brief Create a new Denoiser engine.
*
* @param importer The weight importer.
* @param builder The builder.
* @param maxBatchSize The maximum batch size to support.
* @param useFP16 Whether or not to allow FP16 calculations.
*
* @return The built engine.
*/
TRTPtr<nvinfer1::ICudaEngine> build(
IModelImporter& importer,
nvinfer1::IBuilder& builder,
const int maxBatchSize,
const bool useFP16);
private:
int mChunkSize;
int mFilterLength;
int mHopLength;
int mWinLength;
};
} // namespace tts
#endif

View file

@ -0,0 +1,118 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "denoiserInstance.h"
#include "cudaUtils.h"
#include "dataShuffler.h"
#include <stdexcept>
using namespace nvinfer1;
namespace tts
{
/******************************************************************************
* CONSTRUCTORS / DESTRUCTOR **************************************************
*****************************************************************************/
DenoiserInstance::DenoiserInstance(TRTPtr<ICudaEngine>&& engine) :
TimedObject("DenoiserInstance::infer()"),
mStreamingInstance(std::move(engine)),
mInBufferDevice(
mStreamingInstance.getChunkSize()
* mStreamingInstance.getMaxBatchSize()),
mOutBufferDevice(
mStreamingInstance.getChunkSize()
* mStreamingInstance.getMaxBatchSize())
{
// do nothing
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
void DenoiserInstance::infer(const int batchSize, const float* const inputDevice, const int inputSpacing,
const int* const inputLength, float* outputDevice)
{
startTiming();
cudaStream_t stream;
if (cudaStreamCreate(&stream) != cudaSuccess)
{
throw std::runtime_error("Failed to create stream.");
}
const int chunkSize = mStreamingInstance.getChunkSize();
int maxNumSamples = 0;
for (int i = 0; i < batchSize; ++i)
{
if (inputLength[i] > maxNumSamples)
{
maxNumSamples = inputLength[i];
}
}
mStreamingInstance.startInference();
for (int pos = 0; pos < maxNumSamples; pos += chunkSize)
{
DataShuffler::frameTransfer(
inputDevice,
mInBufferDevice.data(),
inputSpacing,
pos,
chunkSize,
batchSize,
chunkSize,
0,
stream);
mStreamingInstance.inferNext(
batchSize, mInBufferDevice.data(), mOutBufferDevice.data(), stream);
DataShuffler::frameTransfer(
mOutBufferDevice.data(),
outputDevice,
chunkSize,
0,
chunkSize,
batchSize,
inputSpacing,
pos,
stream);
}
CudaUtils::sync(stream);
cudaStreamDestroy(stream);
stopTiming();
}
} // namespace tts

View file

@ -0,0 +1,81 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_DENOISERINSTANCE_H
#define TT2I_DENOISERINSTANCE_H
#include "cudaMemory.h"
#include "denoiserStreamingInstance.h"
#include "timedObject.h"
namespace nvinfer1
{
class ICudaEngine;
} // namespace nvinfer1
namespace tts
{
class DenoiserInstance : public TimedObject
{
public:
/**
* @brief Create a new denoiser.
*
* @param sampleNoise The audio sample of what should be "noise" to be
* removed.
* @param sampleLength The number of samples in the "noise".
* @param filterLength The filter length.
* @param overlapLength The length of overlap between filters.
* @param winLength The length of the window.
*/
DenoiserInstance(TRTPtr<nvinfer1::ICudaEngine>&& engine);
/**
* @brief Perform inference using the denoiser.
*
* @param batchSize The number of items in the batch.
* @param inputDevice The input tensor on the device.
* @param inputSpacing The spacing between the start of items in the batch.
* @param inputLength The length of each input.
* @param outputDevice The output tensor on the device.
*/
void infer(
const int batchSize,
const float* inputDevice,
int inputSpacing,
const int* inputLength,
float* outputDevice);
private:
DenoiserStreamingInstance mStreamingInstance;
CudaMemory<float> mInBufferDevice;
CudaMemory<float> mOutBufferDevice;
};
} // namespace tts
#endif

View file

@ -0,0 +1,84 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "denoiserLoader.h"
#include "denoiserBuilder.h"
#include "engineCache.h"
#include "jsonModelImporter.h"
#include "utils.h"
#include "NvInfer.h"
#include <stdexcept>
using namespace nvinfer1;
namespace tts
{
/******************************************************************************
* PUBLIC STATIC METHODS ******************************************************
*****************************************************************************/
std::shared_ptr<DenoiserInstance> DenoiserLoader::load(
EngineCache& cache, IBuilder& builder, const std::string& filename, const bool fp16, const int batchSize)
{
TRTPtr<ICudaEngine> engine;
if (Utils::hasExtension(filename, ".json")) {
DenoiserBuilder denoiserBuilder(2 << 13);
JSONModelImporter importer(filename);
engine = denoiserBuilder.build(importer, builder, batchSize, fp16);
// save generated engine
const std::string engFilename(filename + ".eng");
cache.save(*engine, engFilename);
}
else if (Utils::hasExtension(filename, ".eng"))
{
engine = cache.load(filename);
if (engine->getMaxBatchSize() < batchSize)
{
throw std::runtime_error(
"Engine " + filename
+ " does not support "
" the requested batch size: "
+ std::to_string(engine->getMaxBatchSize()) + " / "
+ std::to_string(batchSize)
+ "."
"Rebuild the engine with the larger batch size.");
}
}
else
{
throw std::runtime_error("Unknown model file type: " + filename);
}
return std::make_shared<DenoiserInstance>(std::move(engine));
}
} // namespace tts

View file

@ -0,0 +1,67 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_DENOISERLOADER_H
#define TT2I_DENOISERLOADER_H
#include "denoiserInstance.h"
#include <memory>
#include <string>
namespace nvinfer1
{
class IBuilder;
}
namespace tts
{
class EngineCache;
class DenoiserLoader
{
public:
/**
* @brief Load a new DenoiserInstance from an engine file or a json file.
*
* @param cache The engine cache.
* @param builder The TensorRT Engine Builder.
* @param filename The name of the engine/json file.
* @param fp16 If building an engine from a json file, whether or not to
* allow fp16 operations. If loading an engine file, this input is ignored.
* @param batchSize If building an engine from a json file, the maximum batch
* size to support. If loading an engine file, this input is ignored.
*
* @return The newly created DenoiserInstance.
*/
static std::shared_ptr<DenoiserInstance> load(EngineCache& cache, nvinfer1::IBuilder& builder,
const std::string& filename, bool fp16 = true, int batchSize = 8);
};
} // namespace tts
#endif

View file

@ -0,0 +1,78 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "denoiserStreamingInstance.h"
#include "trtUtils.h"
#include <stdexcept>
using namespace nvinfer1;
namespace tts
{
/******************************************************************************
* CONSTRUCTORS / DESTRUCTOR **************************************************
*****************************************************************************/
DenoiserStreamingInstance::DenoiserStreamingInstance(
TRTPtr<ICudaEngine>&& engine) :
TimedObject("DenoiserStreamingInstance::infer()"),
EngineDriver(std::move(engine)),
mBinding(),
mContext(getEngine().createExecutionContext()),
mChunkSize(TRTUtils::getBindingSize(getEngine(), INPUT_NAME))
{
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
void DenoiserStreamingInstance::startInference()
{
// do nothing
}
void DenoiserStreamingInstance::inferNext(
const int batchSize, const float* const inputDevice, float* outputDevice, cudaStream_t stream)
{
startTiming();
const ICudaEngine& engine = mContext->getEngine();
mBinding.setBinding(engine, OUTPUT_NAME, outputDevice);
mBinding.setBinding(engine, INPUT_NAME, inputDevice);
if (!mContext->enqueue(batchSize, mBinding.getBindings(), stream, nullptr))
{
throw std::runtime_error("Failed to run encoding.");
}
stopTiming();
}
} // namespace tts

View file

@ -0,0 +1,102 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_DENOISERSTREAMINGINSTANCE_H
#define TT2I_DENOISERSTREAMINGINSTANCE_H
#include "binding.h"
#include "engineDriver.h"
#include "timedObject.h"
namespace nvinfer1
{
class ICudaEngine;
class IExecutionContext;
} // namespace nvinfer1
namespace tts
{
class DenoiserStreamingInstance : public TimedObject, public EngineDriver
{
public:
/**
* @brief Tensor of shape {1 x INPUT_LENGTH}
*/
static constexpr const char* const INPUT_NAME = "input_denoiser";
/**
* @brief Tensor of shape {1 x OUTPUT_LENGTH}
*/
static constexpr const char* const OUTPUT_NAME = "output_denoiser";
/**
* @brief Create a new denoiser.
*
* @param sampleNoise The audio sample of what should be "noise" to be
* removed.
* @param sampleLength The number of samples in the "noise".
* @param filterLength The filter length.
* @param overlapLength The length of overlap between filters.
* @param winLength The length of the window.
*/
DenoiserStreamingInstance(TRTPtr<nvinfer1::ICudaEngine>&& engine);
/**
* @brief Start a new session for performing streaming inference. This
* method should be called before the first call to `inferNext()`.
*/
void startInference();
/**
* @brief Perform inference on a chunk of input.
*
* @param batchSize The size of the batch to process.
* @param inputDevice The input tensor on the device.
* @param outputDevice The output tensor on the device.
* @param stream The stream to operate on.
*/
void inferNext(const int batchSize, const float* inputDevice, float* outputDevice, cudaStream_t stream);
/**
* @brief Get the size of the chunk the denoiser will process.
*
* @return The size of the chunk.
*/
int getChunkSize() const
{
return mChunkSize;
}
private:
Binding mBinding;
TRTPtr<nvinfer1::IExecutionContext> mContext;
int mChunkSize;
};
} // namespace tts
#endif

View file

@ -0,0 +1,122 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "attentionLayerCreator.h"
#include "dims5.h"
#include "layerData.h"
#include "NvInfer.h"
using namespace nvinfer1;
namespace tts
{
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
ILayer* AttentionLayerCreator::addLocation(INetworkDefinition& network, ITensor* const input, const int attentionDim,
const int numFilters, const int kernelSize, const LayerData& convData, const LayerData& linearData,
const std::string& name)
{
// conv layer
const int padding = (kernelSize - 1) / 2;
#if NV_TENSORRT_MAJOR < 7
IConvolutionLayer* const convLayer = network.addConvolution(
*input, numFilters, DimsHW{kernelSize, 1}, convData.get("weight"), {DataType::kFLOAT, nullptr, 0});
convLayer->setPadding({padding, 0});
#else
IConvolutionLayer* const convLayer = network.addConvolutionNd(
*input, numFilters, Dims2(kernelSize, 1), convData.get("weight"), {DataType::kFLOAT, nullptr, 0});
convLayer->setPaddingNd(Dims2(padding, 0));
#endif
convLayer->setName((name + ".conv_layer").c_str());
// need to tranpose
IShuffleLayer* const transLayer = network.addShuffle(*convLayer->getOutput(0));
transLayer->setFirstTranspose({0, 2, 1, 3});
transLayer->setReshapeDimensions(Dims5{1, convLayer->getOutput(0)->getDimensions().d[2],
convLayer->getOutput(0)->getDimensions().d[1], 1, convLayer->getOutput(0)->getDimensions().d[3]});
transLayer->setName((name + ".transpose").c_str());
// fully connected layer
ILayer* const linearLayer = network.addFullyConnected(
*transLayer->getOutput(0), attentionDim, linearData.get("weight"), Weights{DataType::kFLOAT, 0, 0});
linearLayer->setName((name + ".linear_layer").c_str());
return linearLayer;
}
ILayer* AttentionLayerCreator::addEnergy(INetworkDefinition& network, ITensor* const input1, ITensor* const input2,
ITensor* const input3, const LayerData& linearData, const std::string& name)
{
// summation
ILayer* const add1Layer = network.addElementWise(*input1, *input2, ElementWiseOperation::kSUM);
add1Layer->setName((name + ".0.elementwise_sum").c_str());
ILayer* const add2Layer = network.addElementWise(*add1Layer->getOutput(0), *input3, ElementWiseOperation::kSUM);
add2Layer->setName((name + ".1.elementwise_sum").c_str());
// activation
ILayer* const actLayer = network.addActivation(*add2Layer->getOutput(0), ActivationType::kTANH);
actLayer->setName((name + ".tanh").c_str());
// fully connected layer
ILayer* const linearLayer = network.addFullyConnected(
*actLayer->getOutput(0), 1, linearData.get("weight"), Weights{DataType::kFLOAT, 0, 0});
linearLayer->setName((name + ".linear_layer").c_str());
return linearLayer;
}
ILayer* AttentionLayerCreator::addPaddedSoftMax(INetworkDefinition& network, ITensor* const input,
ITensor* const inputMask, ITensor* const inputSegments, const std::string& name)
{
// make our inputs 2 dimensional
IShuffleLayer* const maskShuffleLayer = network.addShuffle(*inputMask);
maskShuffleLayer->setReshapeDimensions(Dims2{1, -1});
maskShuffleLayer->setName((name + ".mask_reshape").c_str());
IShuffleLayer* const inputShuffleLayer = network.addShuffle(*input);
inputShuffleLayer->setReshapeDimensions(Dims2{1, -1});
inputShuffleLayer->setName((name + ".input_reshape").c_str());
// perform softmax over non-padding elements
ILayer* const softMaxLayer = network.addRaggedSoftMax(*inputShuffleLayer->getOutput(0), *inputSegments);
softMaxLayer->setName((name + ".ragged_softmax").c_str());
// zero padding
ILayer* const maskLayer = network.addElementWise(
*softMaxLayer->getOutput(0), *maskShuffleLayer->getOutput(0), ElementWiseOperation::kPROD);
maskLayer->setName((name + ".mask").c_str());
// return three dimensional output
IShuffleLayer* const outShuffle = network.addShuffle(*maskLayer->getOutput(0));
outShuffle->setReshapeDimensions(Dims3{-1, 1, 1});
outShuffle->setName((name + ".transpose").c_str());
return outShuffle;
}
} // namespace tts

View file

@ -0,0 +1,98 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_COMPOSITELAYERS_H
#define TT2I_COMPOSITELAYERS_H
#include <string>
#include <vector>
namespace nvinfer1
{
class INetworkDefinition;
class ITensor;
class ILayer;
} // namespace nvinfer1
namespace tts
{
class LayerData;
class AttentionLayerCreator
{
public:
/**
* @brief Add a location layer to the given network.
*
* @param network The network to add to.
* @param input The input tensor.
* @param attentionDim The number of dimensions.
* @param numFilters The number of filters
* @param kernelSize The size of each kernel.
* @param convData The convolution data.
* @param linearData The linear data for the fully connected layer.
* @param name The name to prefix the layers with.
*
* @return The last of the newly added layers.
*/
static nvinfer1::ILayer* addLocation(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input,
int attentionDim, int numFilters, int kernelSize, const LayerData& convData, const LayerData& linearData,
const std::string& name);
/**
* @brief Add an energy layer to the given network.
*
* @param network The network.
* @param input1 The first input to be summed.
* @param input2 The second input to be summed.
* @param input3 The third input to be summed.
* @param linearData The data for the fully connected layer.
* @param name The name to prefix layers with.
*
* @return The last layer of the newly added layers.
*/
static nvinfer1::ILayer* addEnergy(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input1,
nvinfer1::ITensor* input2, nvinfer1::ITensor* input3, const LayerData& linearData, const std::string& name);
/**
* @brief Perform a softmax on padded input.
*
* @param network The network being built.
* @param input The padded input.
* @param inputMask The mask.
* @param inputSegments The length of the input.
* @param name The name to prefix the layers with.
*
* @return The last layer.
*/
static nvinfer1::ILayer* addPaddedSoftMax(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input,
nvinfer1::ITensor* inputMask, nvinfer1::ITensor* inputSegments, const std::string& name);
};
} // namespace tts
#endif

View file

@ -0,0 +1,167 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "convBatchNormCreator.h"
#include "layerData.h"
#include "trtUtils.h"
#include "NvInfer.h"
#include <cmath>
#include <stdexcept>
using namespace nvinfer1;
namespace tts
{
/******************************************************************************
* CONSTANTS ******************************************************************
*****************************************************************************/
namespace
{
constexpr const float EPS = 1e-5f;
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
ILayer* ConvBatchNormCreator::add(INetworkDefinition& network, ITensor* const input, const LayerData& convData,
const LayerData& normData, const std::string& activation, const std::string& name)
{
// base the number of channels based on the output size of the batch norm
const int numChannels = static_cast<int>(normData.get("bias").count);
// CONVOLUTION //////////////////////////////////////////////////////////////
const std::vector<float>& convWeight = newVector(static_cast<const float*>(convData.get("weight").values),
static_cast<const float*>(convData.get("weight").values) + convData.get("weight").count);
const std::vector<float>& convBias = newVector(static_cast<const float*>(convData.get("bias").values),
static_cast<const float*>(convData.get("bias").values) + convData.get("bias").count);
#if NV_TENSORRT_MAJOR < 7
IConvolutionLayer* const convLayer = network.addConvolution(
*input, numChannels, DimsHW(5, 1), TRTUtils::toWeights(convWeight), TRTUtils::toWeights(convBias));
convLayer->setPadding({2, 0});
#else
IConvolutionLayer* const convLayer = network.addConvolutionNd(
*input, numChannels, Dims2(5, 1), TRTUtils::toWeights(convWeight), TRTUtils::toWeights(convBias));
convLayer->setPaddingNd(Dims2(2, 0));
#endif
convLayer->setName((name + ".conv_layer").c_str());
ITensor* const batchInput = convLayer->getOutput(0);
// BATCH NORM ///////////////////////////////////////////////////////////////
// create vectors
std::vector<float>& negativeMeanWeights = newVector(static_cast<const float*>(normData.get("running_mean").values),
static_cast<const float*>(normData.get("running_mean").values) + normData.get("running_mean").count);
std::vector<float>& scaleWeights = newVector(static_cast<const float*>(normData.get("weight").values),
static_cast<const float*>(normData.get("weight").values) + normData.get("weight").count);
const std::vector<float>& normBias = newVector(static_cast<const float*>(normData.get("bias").values),
static_cast<const float*>(normData.get("bias").values) + normData.get("bias").count);
const Weights emptyWeights{DataType::kFLOAT, nullptr, 0};
// check input
if (negativeMeanWeights.size() != scaleWeights.size())
{
throw std::runtime_error("Mismatch between 'running_mean' and 'weight' sizes: "
+ std::to_string(negativeMeanWeights.size()) + " " + std::to_string(scaleWeights.size()) + ".");
}
if (static_cast<size_t>(normData.get("running_var").count) != scaleWeights.size())
{
throw std::runtime_error("Size of 'running_var' does not match 'running_mean':"
+ std::to_string(normData.get("running_var").count) + " vs. " + std::to_string(scaleWeights.size()));
}
// create negative mean values
for (float& val : negativeMeanWeights)
{
val = -val;
}
// compute scaling matrix
// weight / sqrt(var(x) + eps)
const float* varWeights = static_cast<const float*>(normData.get("running_var").values);
for (size_t i = 0; i < scaleWeights.size(); ++i)
{
const float den = std::sqrt(varWeights[i] + EPS);
scaleWeights[i] /= den;
}
// x - mean(x)
ILayer* const shiftedLayer = network.addScale(
*batchInput, ScaleMode::kCHANNEL, TRTUtils::toWeights(negativeMeanWeights), emptyWeights, emptyWeights);
shiftedLayer->setName((name + ".shift").c_str());
// ((x - mean(x)) / sqrt(var(x) + eps)) * weight + bias
ILayer* const scaleLayer = network.addScale(*shiftedLayer->getOutput(0), ScaleMode::kCHANNEL,
TRTUtils::toWeights(normBias), TRTUtils::toWeights(scaleWeights), emptyWeights);
scaleLayer->setName((name + ".scale").c_str());
ITensor* const actInput = scaleLayer->getOutput(0);
// ACTIVATION ///////////////////////////////////////////////////////////////
ILayer* outputLayer;
if (activation == "relu")
{
outputLayer = network.addActivation(*actInput, ActivationType::kRELU);
outputLayer->setName((name + ".relu").c_str());
}
else if (activation == "tanh")
{
outputLayer = network.addActivation(*actInput, ActivationType::kTANH);
outputLayer->setName((name + ".tanh").c_str());
}
else if (activation == "none")
{
outputLayer = scaleLayer;
}
else
{
throw std::runtime_error("Unknown activation '" + activation + "'.");
}
return outputLayer;
}
/******************************************************************************
* PRIVATE METHODS ************************************************************
*****************************************************************************/
std::vector<float>& ConvBatchNormCreator::newVector(const float* const begin, const float* const end)
{
mData.emplace_back(new std::vector<float>(begin, end));
return *mData.back().get();
}
} // namespace tts

View file

@ -0,0 +1,93 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_CONVBATCHNORMCREATOR_H
#define TT2I_CONVBATCHNORMCREATOR_H
#include <memory>
#include <string>
#include <vector>
namespace nvinfer1
{
class INetworkDefinition;
class ILayer;
class ITensor;
} // namespace nvinfer1
namespace tts
{
class LayerData;
class ConvBatchNormCreator
{
public:
/**
* @brief Add a 1d-convolution plus batch normalization followed by
* activation to the network,
* where the convolution has kernel size of 5, and padding 2 (to preserve
* shape).
* ```
* y = conv(x)
* z = ( (y-Mean[y]) / sqrt(Var[y]+eps) ) * weight + bias
* ```
*
* WARNING: This sets pointers from the network to this object's members,
* and so this object must not be destroyed or moved while until after the
* lifetime of the network has ended.
*
* @param network The network to add to.
* @param input The input tensor.
* @param convData The LayerData object that has `weight` and `bias` for the
* convolution.
* @param normData The LayerData object that has `running_mean`,
* `running_var`, `weight`, and `bias` entries for the batch norm.
* @param activation May be "relu", "tanh", or "none".
* @param name The name to prefix the layers with.
*
* @return The last of the newly added layers.
*/
nvinfer1::ILayer* add(nvinfer1::INetworkDefinition& network, nvinfer1::ITensor* input, const LayerData& convData,
const LayerData& normData, const std::string& activation, const std::string& name);
private:
std::vector<std::unique_ptr<std::vector<float>>> mData{};
/**
* @brief Create a new vector to be stored inside of this object.
*
* @param begin The starting iterator to initialize with.
* @param end The ending iterator to initialize with.
*
* @return The vector.
*/
std::vector<float>& newVector(const float* begin, const float* end);
};
} // namespace tts
#endif

View file

@ -0,0 +1,221 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "lstm.h"
#include "NvInfer.h"
using namespace nvinfer1;
namespace tts
{
/******************************************************************************
* PUBLIC STATIC METHODS ******************************************************
*****************************************************************************/
ILayer* LSTM::addPaddedBidirectional(INetworkDefinition* const network, ITensor* const input,
ITensor* const inputLength, const int numDimensions, const LayerData& lstmData)
{
// build LSTM
const int hiddenSize = numDimensions / 2;
IRNNv2Layer* lstm = network->addRNNv2(*input, 1, hiddenSize, input->getDimensions().d[1], RNNOperation::kLSTM);
lstm->setDirection(RNNDirection::kBIDIRECTION);
lstm->setSequenceLengths(*inputLength);
{
const int64_t inputBlockSize = numDimensions * hiddenSize;
// pytorch weights are stored in "weight_ih_l0" = {W_ii|W_if|W_ig|W_io}
const float* inputWeights = (const float*) lstmData.get("weight_ih_l0").values;
Weights wii{DataType::kFLOAT, (void*) (inputWeights), inputBlockSize};
Weights wif{DataType::kFLOAT, (void*) (inputWeights + inputBlockSize), inputBlockSize};
Weights wig{DataType::kFLOAT, (void*) (inputWeights + 2 * inputBlockSize), inputBlockSize};
Weights wio{DataType::kFLOAT, (void*) (inputWeights + 3 * inputBlockSize), inputBlockSize};
lstm->setWeightsForGate(0, RNNGateType::kINPUT, true, wii);
lstm->setWeightsForGate(0, RNNGateType::kCELL, true, wig);
lstm->setWeightsForGate(0, RNNGateType::kFORGET, true, wif);
lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, true, wio);
const float* inputBias = (const float*) lstmData.get("bias_ih_l0").values;
Weights bii{DataType::kFLOAT, (void*) (inputBias), hiddenSize};
Weights bif{DataType::kFLOAT, (void*) (inputBias + hiddenSize), hiddenSize};
Weights big{DataType::kFLOAT, (void*) (inputBias + 2 * hiddenSize), hiddenSize};
Weights bio{DataType::kFLOAT, (void*) (inputBias + 3 * hiddenSize), hiddenSize};
lstm->setBiasForGate(0, RNNGateType::kINPUT, true, bii);
lstm->setBiasForGate(0, RNNGateType::kCELL, true, big);
lstm->setBiasForGate(0, RNNGateType::kFORGET, true, bif);
lstm->setBiasForGate(0, RNNGateType::kOUTPUT, true, bio);
const int64_t hiddenBlockSize = hiddenSize * hiddenSize;
// pytorch weights are stored in "weight_hh_l0" = {W_hi|W_hf|W_hg|W_ho}
const float* hiddenWeights = (const float*) lstmData.get("weight_hh_l0").values;
Weights whi{DataType::kFLOAT, (void*) (hiddenWeights), hiddenBlockSize};
Weights whf{DataType::kFLOAT, (void*) (hiddenWeights + hiddenBlockSize), hiddenBlockSize};
Weights whg{DataType::kFLOAT, (void*) (hiddenWeights + 2 * hiddenBlockSize), hiddenBlockSize};
Weights who{DataType::kFLOAT, (void*) (hiddenWeights + 3 * hiddenBlockSize), hiddenBlockSize};
lstm->setWeightsForGate(0, RNNGateType::kINPUT, false, whi);
lstm->setWeightsForGate(0, RNNGateType::kCELL, false, whg);
lstm->setWeightsForGate(0, RNNGateType::kFORGET, false, whf);
lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, false, who);
const float* hiddenBias = (const float*) lstmData.get("bias_hh_l0").values;
Weights bhi{DataType::kFLOAT, (void*) (hiddenBias), hiddenSize};
Weights bhf{DataType::kFLOAT, (void*) (hiddenBias + hiddenSize), hiddenSize};
Weights bhg{DataType::kFLOAT, (void*) (hiddenBias + 2 * hiddenSize), hiddenSize};
Weights bho{DataType::kFLOAT, (void*) (hiddenBias + 3 * hiddenSize), hiddenSize};
lstm->setBiasForGate(0, RNNGateType::kINPUT, false, bhi);
lstm->setBiasForGate(0, RNNGateType::kCELL, false, bhg);
lstm->setBiasForGate(0, RNNGateType::kFORGET, false, bhf);
lstm->setBiasForGate(0, RNNGateType::kOUTPUT, false, bho);
}
{
const int64_t inputBlockSize = numDimensions * hiddenSize;
// pytorch weights are stored in "weight_ih_l0" = {W_ii|W_if|W_ig|W_io}
const float* inputWeights = (const float*) lstmData.get("weight_ih_l0_reverse").values;
Weights wii{DataType::kFLOAT, (void*) (inputWeights), inputBlockSize};
Weights wif{DataType::kFLOAT, (void*) (inputWeights + inputBlockSize), inputBlockSize};
Weights wig{DataType::kFLOAT, (void*) (inputWeights + 2 * inputBlockSize), inputBlockSize};
Weights wio{DataType::kFLOAT, (void*) (inputWeights + 3 * inputBlockSize), inputBlockSize};
lstm->setWeightsForGate(1, RNNGateType::kINPUT, true, wii);
lstm->setWeightsForGate(1, RNNGateType::kCELL, true, wig);
lstm->setWeightsForGate(1, RNNGateType::kFORGET, true, wif);
lstm->setWeightsForGate(1, RNNGateType::kOUTPUT, true, wio);
const float* inputBias = (const float*) lstmData.get("bias_ih_l0_reverse").values;
Weights bii{DataType::kFLOAT, (void*) (inputBias), hiddenSize};
Weights bif{DataType::kFLOAT, (void*) (inputBias + hiddenSize), hiddenSize};
Weights big{DataType::kFLOAT, (void*) (inputBias + 2 * hiddenSize), hiddenSize};
Weights bio{DataType::kFLOAT, (void*) (inputBias + 3 * hiddenSize), hiddenSize};
lstm->setBiasForGate(1, RNNGateType::kINPUT, true, bii);
lstm->setBiasForGate(1, RNNGateType::kCELL, true, big);
lstm->setBiasForGate(1, RNNGateType::kFORGET, true, bif);
lstm->setBiasForGate(1, RNNGateType::kOUTPUT, true, bio);
const int64_t hiddenBlockSize = hiddenSize * hiddenSize;
// pytorch weights are stored in "weight_hh_l0" = {W_hi|W_hf|W_hg|W_ho}
const float* hiddenWeights = (const float*) lstmData.get("weight_hh_l0_reverse").values;
Weights whi{DataType::kFLOAT, (void*) (hiddenWeights), hiddenBlockSize};
Weights whf{DataType::kFLOAT, (void*) (hiddenWeights + hiddenBlockSize), hiddenBlockSize};
Weights whg{DataType::kFLOAT, (void*) (hiddenWeights + 2 * hiddenBlockSize), hiddenBlockSize};
Weights who{DataType::kFLOAT, (void*) (hiddenWeights + 3 * hiddenBlockSize), hiddenBlockSize};
lstm->setWeightsForGate(1, RNNGateType::kINPUT, false, whi);
lstm->setWeightsForGate(1, RNNGateType::kCELL, false, whg);
lstm->setWeightsForGate(1, RNNGateType::kFORGET, false, whf);
lstm->setWeightsForGate(1, RNNGateType::kOUTPUT, false, who);
const float* hiddenBias = (const float*) lstmData.get("bias_hh_l0_reverse").values;
Weights bhi{DataType::kFLOAT, (void*) (hiddenBias), hiddenSize};
Weights bhf{DataType::kFLOAT, (void*) (hiddenBias + hiddenSize), hiddenSize};
Weights bhg{DataType::kFLOAT, (void*) (hiddenBias + 2 * hiddenSize), hiddenSize};
Weights bho{DataType::kFLOAT, (void*) (hiddenBias + 3 * hiddenSize), hiddenSize};
lstm->setBiasForGate(1, RNNGateType::kINPUT, false, bhi);
lstm->setBiasForGate(1, RNNGateType::kCELL, false, bhg);
lstm->setBiasForGate(1, RNNGateType::kFORGET, false, bhf);
lstm->setBiasForGate(1, RNNGateType::kOUTPUT, false, bho);
}
return lstm;
}
ILayer* LSTM::addUnidirectionalCell(INetworkDefinition* const network, ITensor* const input,
ITensor* const hiddenStatesIn, ITensor* const cellStatesIn, const int numDimensions, const LayerData& lstmData)
{
// build LSTM
const int hiddenSize = numDimensions;
const int inputLength = input->getDimensions().d[2];
IRNNv2Layer* lstm = network->addRNNv2(*input, 1, hiddenSize, input->getDimensions().d[1], RNNOperation::kLSTM);
lstm->setDirection(RNNDirection::kUNIDIRECTION);
const int64_t inputBlockSize = inputLength * hiddenSize;
// pytorch weights are stored in "weight_ih" = {W_ii|W_if|W_ig|W_io}
const float* inputWeights = (const float*) lstmData.get("weight_ih").values;
Weights wii{DataType::kFLOAT, (void*) (inputWeights), inputBlockSize};
Weights wif{DataType::kFLOAT, (void*) (inputWeights + inputBlockSize), inputBlockSize};
Weights wig{DataType::kFLOAT, (void*) (inputWeights + 2 * inputBlockSize), inputBlockSize};
Weights wio{DataType::kFLOAT, (void*) (inputWeights + 3 * inputBlockSize), inputBlockSize};
lstm->setWeightsForGate(0, RNNGateType::kINPUT, true, wii);
lstm->setWeightsForGate(0, RNNGateType::kCELL, true, wig);
lstm->setWeightsForGate(0, RNNGateType::kFORGET, true, wif);
lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, true, wio);
const float* inputBias = (const float*) lstmData.get("bias_ih").values;
Weights bii{DataType::kFLOAT, (void*) (inputBias), hiddenSize};
Weights bif{DataType::kFLOAT, (void*) (inputBias + hiddenSize), hiddenSize};
Weights big{DataType::kFLOAT, (void*) (inputBias + 2 * hiddenSize), hiddenSize};
Weights bio{DataType::kFLOAT, (void*) (inputBias + 3 * hiddenSize), hiddenSize};
lstm->setBiasForGate(0, RNNGateType::kINPUT, true, bii);
lstm->setBiasForGate(0, RNNGateType::kCELL, true, big);
lstm->setBiasForGate(0, RNNGateType::kFORGET, true, bif);
lstm->setBiasForGate(0, RNNGateType::kOUTPUT, true, bio);
const int64_t hiddenBlockSize = hiddenSize * hiddenSize;
// pytorch weights are stored in "weight_hh" = {W_hi|W_hf|W_hg|W_ho}
const float* hiddenWeights = (const float*) lstmData.get("weight_hh").values;
Weights whi{DataType::kFLOAT, (void*) (hiddenWeights), hiddenBlockSize};
Weights whf{DataType::kFLOAT, (void*) (hiddenWeights + hiddenBlockSize), hiddenBlockSize};
Weights whg{DataType::kFLOAT, (void*) (hiddenWeights + 2 * hiddenBlockSize), hiddenBlockSize};
Weights who{DataType::kFLOAT, (void*) (hiddenWeights + 3 * hiddenBlockSize), hiddenBlockSize};
lstm->setWeightsForGate(0, RNNGateType::kINPUT, false, whi);
lstm->setWeightsForGate(0, RNNGateType::kCELL, false, whg);
lstm->setWeightsForGate(0, RNNGateType::kFORGET, false, whf);
lstm->setWeightsForGate(0, RNNGateType::kOUTPUT, false, who);
const float* hiddenBias = (const float*) lstmData.get("bias_hh").values;
Weights bhi{DataType::kFLOAT, (void*) (hiddenBias), hiddenSize};
Weights bhf{DataType::kFLOAT, (void*) (hiddenBias + hiddenSize), hiddenSize};
Weights bhg{DataType::kFLOAT, (void*) (hiddenBias + 2 * hiddenSize), hiddenSize};
Weights bho{DataType::kFLOAT, (void*) (hiddenBias + 3 * hiddenSize), hiddenSize};
lstm->setBiasForGate(0, RNNGateType::kINPUT, false, bhi);
lstm->setBiasForGate(0, RNNGateType::kCELL, false, bhg);
lstm->setBiasForGate(0, RNNGateType::kFORGET, false, bhf);
lstm->setBiasForGate(0, RNNGateType::kOUTPUT, false, bho);
lstm->setHiddenState(*hiddenStatesIn);
lstm->setCellState(*cellStatesIn);
return lstm;
}
} // namespace tts

View file

@ -0,0 +1,84 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_LSTM_H
#define TT2I_LSTM_H
#include "layerData.h"
namespace nvinfer1
{
class INetworkDefinition;
class ITensor;
class ILayer;
} // namespace nvinfer1
namespace tts
{
class LSTM
{
public:
/**
* @brief Add a new bidirection LSTM layer to the network with padding at the
* end of the sequence, and with a number of
* hidden layers equal to half the number of output layers.
*
* @param network The network to add to.
* @param input The input tensor.
* @param inputLength The length of each input sequence.
* @param numDimensions The number of output dimensions of the LSTM.
* @param lstmData The LSTM weights (must be in
* scope until the network is finished building).
* @param name The name to prefix the layers with.
*
* @return The last of the newly added layrs.
*/
static nvinfer1::ILayer* addPaddedBidirectional(nvinfer1::INetworkDefinition* network, nvinfer1::ITensor* input,
nvinfer1::ITensor* inputLength, int numDimensions, const LayerData& lstmData);
/**
* @brief Add a new unidirection LSTM layer to the network, with a number of
* hidden layers equal to half the number of output layers.
*
* @param network The network to add to.
* @param input The input tensor.
* @param input The input hidden states.
* @param input The input cell states.
* @param numDimensions The number of output dimensions of the LSTM.
* @param lstmData The LSTM weights (must be in
* scope until the network is finished building).
*
* @return The last of the newly added layrs.
*/
static nvinfer1::ILayer* addUnidirectionalCell(nvinfer1::INetworkDefinition* network, nvinfer1::ITensor* input,
nvinfer1::ITensor* hiddenStatesIn, nvinfer1::ITensor* cellStatesIn, int numDimensions,
const LayerData& lstmData);
};
} // namespace tts
#endif

View file

@ -0,0 +1,18 @@
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)

View file

@ -0,0 +1,440 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "taco2AttentionLayerKernel.h"
#include "taco2Utils.h"
using namespace tts;
namespace nvinfer1
{
namespace plugin
{
/******************************************************************************
* CONSTANTS ******************************************************************
*****************************************************************************/
namespace
{
constexpr const int ENERGY_BLOCK_SIZE = 128;
constexpr const int CONV_BLOCK_SIZE = 128;
constexpr const int QUERY_NUM_COLS = 1024;
constexpr const int QUERY_COL_SIZE = 128;
constexpr const int WARP_SIZE = 32;
static_assert(QUERY_NUM_COLS % QUERY_COL_SIZE == 0, "QUERY_NUM_COLS must be a multiple of QUERY_COL_SIZE");
} // namespace
const float Taco2AttentionLayerKernel::ONE = 1.0f;
const float Taco2AttentionLayerKernel::ZERO = 0.0f;
/******************************************************************************
* CUDA KERNELS ***************************************************************
*****************************************************************************/
template <typename T, int NUM_THREADS>
__device__ inline T warpSum(T const initVal)
{
constexpr const uint32_t mask = 0xffffffff >> (WARP_SIZE - NUM_THREADS);
T val = initVal;
#pragma unroll
for (int d = NUM_THREADS / 2; d > 0; d /= 2)
{
val += __shfl_down_sync(mask, val, d, NUM_THREADS);
}
return val;
}
template <typename T, int BLOCK_SIZE>
__device__ T cooperativeSum(T const initVal, T* const buffer)
{
// first all warps reduce to single value
assert(BLOCK_SIZE % WARP_SIZE == 0);
assert(BLOCK_SIZE <= WARP_SIZE * WARP_SIZE);
T val = warpSum<T, WARP_SIZE>(initVal);
if (BLOCK_SIZE > WARP_SIZE)
{
if (threadIdx.x % WARP_SIZE == 0)
{
buffer[threadIdx.x / WARP_SIZE] = val;
}
__syncthreads();
if (threadIdx.x < (BLOCK_SIZE / WARP_SIZE))
{
val = warpSum<T, BLOCK_SIZE / WARP_SIZE>(buffer[threadIdx.x]);
}
}
return val;
}
__global__ void attentionQueryGemvKernel(const float* const weights, const float* const input, float* const output,
const int inputLength, const int outputLength)
{
__shared__ float shared[QUERY_COL_SIZE];
assert(gridDim.x == outputLength);
assert(inputLength == QUERY_NUM_COLS);
// perform mat vec
float v = 0.0f;
for (int col = threadIdx.x; col < QUERY_NUM_COLS; col += QUERY_COL_SIZE)
{
// load chunk
v += input[col] * weights[blockIdx.x * QUERY_NUM_COLS + col];
}
v = cooperativeSum<float, QUERY_COL_SIZE>(v, shared);
// add bias and write
if (threadIdx.x == 0)
{
output[blockIdx.x] = v;
}
}
__global__ void attentionEnergyKernel(const float* const query, const float* const processedMemory,
const float* const location, const float* const weights, const int inputLength, float* const blockSums)
{
// first every thread must load their 'query' cell
const float q = query[threadIdx.x];
// should be 32x128 = 4k
__shared__ float summation[ENERGY_BLOCK_SIZE];
// iterate over rows to create sums and perform tanh
const int gIdx = blockIdx.x * ENERGY_BLOCK_SIZE + threadIdx.x;
const float v = q + processedMemory[gIdx] + location[gIdx];
float val = tanh(v) * weights[threadIdx.x];
val = cooperativeSum<float, ENERGY_BLOCK_SIZE>(val, summation);
// perform simplistic reduction
if (threadIdx.x == 0)
{
// write summation back to shared memory
blockSums[blockIdx.x] = exp(val);
}
}
__global__ void attentionNormalizeAndSumKernel(
const float* const elemAccumsIn, float* const elems, const int numElems, const float* const blockSums)
{
__shared__ float sums[ENERGY_BLOCK_SIZE];
__shared__ float invSum;
// each block sums up the blockSums on its own
float v = 0;
for (int i = threadIdx.x; i < gridDim.x; i += blockDim.x)
{
v += blockSums[i];
}
v = cooperativeSum<float, ENERGY_BLOCK_SIZE>(v, sums);
if (threadIdx.x == 0)
{
invSum = 1.0f / v;
}
__syncthreads();
// normalize and sum
float* const elemAccumsOut = elems + numElems;
for (int i = threadIdx.x + (blockIdx.x * blockDim.x); i < numElems; i += gridDim.x * blockDim.x)
{
const float val = blockSums[i] * invSum;
elems[i] = val;
elemAccumsOut[i] = val + elemAccumsIn[i];
}
}
__global__ void attentionConvolutionKernel(const float* const convWeights, const float* const attWeights,
float* const output, const int inputLength, const int kernelSize)
{
__shared__ float kernels[32 * 2];
__shared__ float input[(CONV_BLOCK_SIZE + 32) * 2];
__shared__ float sum[CONV_BLOCK_SIZE * 2];
const int halfKernel = (kernelSize - 1) / 2;
const int inputOffset = 32 - halfKernel;
// all threads work to populate the shared memory kernels
if (threadIdx.x < kernelSize)
{
kernels[threadIdx.x + threadIdx.y * 32]
= convWeights[blockIdx.x * (kernelSize * 2) + (threadIdx.x + threadIdx.y * kernelSize)];
}
// set initial input zero for second half
if (threadIdx.x < 32)
{
if (threadIdx.x < halfKernel || threadIdx.x - halfKernel >= inputLength)
{
input[CONV_BLOCK_SIZE + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)] = 0;
}
else
{
input[CONV_BLOCK_SIZE + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)]
= attWeights[threadIdx.x - halfKernel + threadIdx.y * inputLength];
}
}
__syncthreads();
for (int i = 0; i < inputLength; i += CONV_BLOCK_SIZE)
{
// shift second half into first half
if (threadIdx.x < 32)
{
input[threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)]
= input[CONV_BLOCK_SIZE + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)];
}
__syncthreads();
// copy in second half
float v = 0;
if (i + threadIdx.x + inputOffset < inputLength)
{
v = attWeights[i + threadIdx.x + inputOffset + threadIdx.y * inputLength];
}
input[32 + threadIdx.x + threadIdx.y * (CONV_BLOCK_SIZE + 32)] = v;
__syncthreads();
// multiply with kernel
float a = 0.0f;
for (int j = 0; j < kernelSize; ++j)
{
const int k = threadIdx.x + j + threadIdx.y * (CONV_BLOCK_SIZE + 32);
a += input[k] * kernels[j + threadIdx.y * 32];
}
sum[threadIdx.x + threadIdx.y * CONV_BLOCK_SIZE] = a;
__syncthreads();
// write to global memory
if (threadIdx.y == 0 && threadIdx.x + i < inputLength)
{
output[(blockIdx.x * inputLength) + i + threadIdx.x]
= sum[threadIdx.x] + sum[threadIdx.x + CONV_BLOCK_SIZE];
}
}
}
/******************************************************************************
* CONSTRUCTORS / DESTRUCTOR **************************************************
*****************************************************************************/
Taco2AttentionLayerKernel::Taco2AttentionLayerKernel(
const std::vector<float>& queryWeightsHost,
const std::vector<float>& convWeightsHost,
const std::vector<float>& locationWeightsHost,
const std::vector<float>& energyWeightsHost,
const int encLength,
const int numQueryDimension,
const int numFilters,
const int convKernelSize,
const int numAttentionDimension) :
mNumEncodingDimension(encLength),
mNumQueryDimension(numQueryDimension),
mNumFilters(numFilters),
mConvKernelSize(convKernelSize),
mNumAttentionDimension(numAttentionDimension),
mQueryWeightsDevice(),
mConvWeightsDevice(),
mLocationWeightsDevice(),
mEnergyWeightsDevice(),
mCublasHandle{}
{
const size_t numExpectedQueryWeights = mNumAttentionDimension * mNumQueryDimension;
const size_t numExpectedConvWeights = mNumFilters * mConvKernelSize * 2;
const size_t numExpectedLocationWeights = mNumAttentionDimension * mNumFilters;
const size_t numExpectedEnergyWeights = mNumAttentionDimension;
if (queryWeightsHost.size() != numExpectedQueryWeights)
{
throw std::runtime_error("Expected " + std::to_string(numExpectedQueryWeights) + " query weights but got "
+ std::to_string(queryWeightsHost.size()) + " instead.");
}
else if (convWeightsHost.size() != numExpectedConvWeights)
{
throw std::runtime_error("Expected " + std::to_string(numExpectedConvWeights) + " convolution weights but got "
+ std::to_string(convWeightsHost.size()) + " instead.");
}
else if (locationWeightsHost.size() != numExpectedLocationWeights)
{
throw std::runtime_error("Expected " + std::to_string(numExpectedLocationWeights) + " location weights but got "
+ std::to_string(locationWeightsHost.size()) + " instead.");
}
else if (energyWeightsHost.size() != numExpectedEnergyWeights)
{
throw std::runtime_error("Expected " + std::to_string(numExpectedEnergyWeights) + " energy weights but got "
+ std::to_string(energyWeightsHost.size()) + " instead.");
}
// copy up weights to GPU
// keep in row major [128x1024]
mQueryWeightsDevice = CudaMemory<float>(queryWeightsHost);
// convolution has [32x2x31] weights (filters x kernel size).
mConvWeightsDevice = CudaMemory<float>(convWeightsHost);
// transpose from column major [32x128] to column major [128x32]
std::vector<float> transLocationWeights(locationWeightsHost.size());
for (int j = 0; j < mNumAttentionDimension; ++j)
{
for (int i = 0; i < mNumFilters; ++i)
{
transLocationWeights[i * mNumAttentionDimension + j] = locationWeightsHost[j * mNumFilters + i];
}
}
mLocationWeightsDevice = CudaMemory<float>(transLocationWeights);
// energy FC is [1x128]
mEnergyWeightsDevice = CudaMemory<float>(energyWeightsHost);
// initialize cublas
if (cublasCreate(&mCublasHandle) != CUBLAS_STATUS_SUCCESS)
{
throw std::runtime_error("Failed to create cublas handle.");
}
}
Taco2AttentionLayerKernel::~Taco2AttentionLayerKernel()
{
cublasDestroy(mCublasHandle);
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
void Taco2AttentionLayerKernel::execute(const float* const memoryDevice, const float* const processedMemoryDevice,
const float* const weightsDevice, const float* const attentionHiddenDevice, float* const outputContextDevice,
float* const outputWeightsDevice, const int inputLength, float* const workspace, cudaStream_t stream)
{
float* const queryOutput = workspace;
float* const convOutput = queryOutput + mNumAttentionDimension;
float* const elemSum = convOutput + (inputLength * mNumFilters);
float* const energyScratch = elemSum + (inputLength * mNumAttentionDimension);
cublasSetStream(mCublasHandle, stream);
// launch fully connected layer to parse LSTM hidden states -
// multiplying 128x1024 weights with 1024 inputs, to get 128 outputs
{
const dim3 grid(mNumAttentionDimension);
const dim3 block(QUERY_COL_SIZE);
attentionQueryGemvKernel<<<grid, block, 0, stream>>>(
mQueryWeightsDevice.data(),
attentionHiddenDevice,
queryOutput,
mNumQueryDimension,
mNumAttentionDimension);
}
// perform convolution
{
const dim3 grid(mNumFilters);
const dim3 block(CONV_BLOCK_SIZE, 2);
// only works for 2 channels
assert(mConvKernelSize <= CONV_BLOCK_SIZE);
attentionConvolutionKernel<<<grid, block, 0, stream>>>(
mConvWeightsDevice.data(),
weightsDevice,
convOutput,
inputLength,
mConvKernelSize);
}
// location linear layer - 128x128x32
cublasStatus_t err = cublasSgemm(
mCublasHandle,
CUBLAS_OP_N,
CUBLAS_OP_T,
mNumAttentionDimension,
inputLength,
mNumFilters,
&ONE,
mLocationWeightsDevice.data(),
mNumAttentionDimension,
convOutput,
inputLength,
&ZERO,
elemSum,
mNumAttentionDimension);
if (err != CUBLAS_STATUS_SUCCESS)
{
throw std::runtime_error("Location layer failed in cublas.");
}
// perform energy calculation
{
const int numBlocks = inputLength;
if (ENERGY_BLOCK_SIZE != mNumAttentionDimension)
{
throw std::runtime_error("mNumAttentionDimension must be " + std::to_string(ENERGY_BLOCK_SIZE));
}
const dim3 grid(numBlocks);
const dim3 block(ENERGY_BLOCK_SIZE);
attentionEnergyKernel<<<grid, block, 0, stream>>>(
queryOutput,
processedMemoryDevice,
elemSum,
mEnergyWeightsDevice.data(),
inputLength,
energyScratch);
attentionNormalizeAndSumKernel<<<grid, block, 0, stream>>>(
weightsDevice + inputLength, outputWeightsDevice, inputLength, energyScratch);
}
// finally perform mmLayer
err = cublasSgemv(mCublasHandle, CUBLAS_OP_N, mNumEncodingDimension, inputLength, &ONE, memoryDevice,
mNumEncodingDimension, outputWeightsDevice, 1, &ZERO, outputContextDevice, 1);
if (err != CUBLAS_STATUS_SUCCESS)
{
throw std::runtime_error("Matrix multiply layer failed in cublas.");
}
}
} // namespace plugin
} // namespace nvinfer1

View file

@ -0,0 +1,111 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_ATTENTIONLAYERKERNEL_H
#define TT2I_ATTENTIONLAYERKERNEL_H
#include "cudaMemory.h"
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include <vector>
namespace nvinfer1
{
namespace plugin
{
class Taco2AttentionLayerKernel
{
public:
/**
* @brief Create a new Taco2AttentionLayerKernel.
*
* @param queryWeights The query weights.
* @param convWeights The convolution weights.
* @param locationWeights The location weights.
* @param energyWeights The energy weights.
* @param encLength The encoding length.
* @param queryDimension The number of query dimensions.
* @param numFilters The number of convolution filters.
* @param convKernelSize The convolution kernel size.
* @param attDimension The number of attention dimensions.
*/
Taco2AttentionLayerKernel(const std::vector<float>& queryWeights, const std::vector<float>& convWeights,
const std::vector<float>& locationWeights, const std::vector<float>& energyWeights, int encLength,
int queryDimension, int numFilters, int convKernelSize, int attDimension);
// delete copy constructor and operator
Taco2AttentionLayerKernel(const Taco2AttentionLayerKernel& other) = delete;
Taco2AttentionLayerKernel& operator=(const Taco2AttentionLayerKernel& other) = delete;
/**
* @brief Destructor.
*/
~Taco2AttentionLayerKernel();
/**
* @brief Execute this kernel.
*
* @param memoryDevice The "Memory" tensor on the device.
* @param processedMemoryDevice The "Processed Memory" tensor on the
* device.
* @param weightsDevice The "Weights" tensor for input on the device.
* @param attentionHiddenDevice The hidden states from the attention LSTM
* on the device.
* @param outputContextDevice The attention context on the device to write
* to.
* @param outputWeightsDevice The "Weights" tensor to use as output.
* @param inputLength The length of the input to process (number chars).
* @param workspace The workspace.
* @param stream The stream to operate on.
*/
void execute(const float* memoryDevice, const float* processedMemoryDevice, const float* weightsDevice,
const float* attentionHiddenDevice, float* const outputContextDevice, float* const outputWeightsDevice,
const int inputLength, float* const workspace, cudaStream_t stream);
private:
static const float ONE;
static const float ZERO;
int mNumEncodingDimension;
int mNumQueryDimension;
int mNumFilters;
int mConvKernelSize;
int mNumAttentionDimension;
tts::CudaMemory<float> mQueryWeightsDevice;
tts::CudaMemory<float> mConvWeightsDevice;
tts::CudaMemory<float> mLocationWeightsDevice;
tts::CudaMemory<float> mEnergyWeightsDevice;
cublasHandle_t mCublasHandle;
};
} // namespace plugin
} // namespace nvinfer1
#endif

View file

@ -0,0 +1,483 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "taco2AttentionLayerPlugin.h"
#include "taco2AttentionLayerKernel.h"
#include "taco2Utils.h"
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <cuda_runtime.h> // cudaError_t
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
using namespace nvinfer1;
namespace nvinfer1
{
namespace plugin
{
using value_type = Taco2AttentionLayerPlugin::value_type;
/******************************************************************************
* CONSTANTS ******************************************************************
*****************************************************************************/
namespace
{
constexpr const char* const PLUGIN_NAME = "Taco2Attention";
constexpr const char* const PLUGIN_VERSION = "0.1.0";
} // namespace
/******************************************************************************
* HELPER FUNCTIONS ***********************************************************
*****************************************************************************/
namespace
{
std::vector<value_type> toVector(const Weights& weights)
{
if (weights.type != DataType::kFLOAT)
{
throw std::runtime_error(
"Invalid data type for Attention weights: " + std::to_string(static_cast<int>(weights.type)));
}
const value_type* const valuesBegin = static_cast<const value_type*>(weights.values);
const value_type* const valuesEnd = valuesBegin + weights.count;
return std::vector<value_type>(valuesBegin, valuesEnd);
}
const void* offset(const void* ptr, const size_t offset)
{
return reinterpret_cast<const void*>(static_cast<const uint8_t*>(ptr) + offset);
}
} // namespace
/******************************************************************************
* STATIC METHODS *************************************************************
*****************************************************************************/
const char* Taco2AttentionLayerPlugin::getName()
{
return PLUGIN_NAME;
}
const char* Taco2AttentionLayerPlugin::getVersion()
{
return PLUGIN_VERSION;
}
Taco2AttentionLayerPlugin Taco2AttentionLayerPlugin::deserialize(const void* const data, const size_t length)
{
static constexpr const size_t numDims = 5;
if (length < numDims * sizeof(int32_t))
{
throw std::runtime_error("Invalid serialized size: " + std::to_string(length));
}
const int numEncodingDimension = static_cast<const int32_t*>(data)[0];
const int numQueryDimension = static_cast<const int32_t*>(data)[1];
const int numFilters = static_cast<const int32_t*>(data)[2];
const int convKernelSize = static_cast<const int32_t*>(data)[3];
const int numAttentionDimension = static_cast<const int32_t*>(data)[4];
const int numQueryWeights = numQueryDimension * numAttentionDimension;
const int numConvWeights = numFilters * 2 * convKernelSize;
const int numLocationWeights = numFilters * numAttentionDimension;
const int numEnergyWeights = numAttentionDimension;
const size_t reqSize = numDims * sizeof(int32_t)
+ sizeof(value_type) * (numQueryWeights + numConvWeights + numLocationWeights + numEnergyWeights);
if (reqSize != length)
{
throw std::runtime_error(
"Invalid serialized size: " + std::to_string(length) + " / " + std::to_string(reqSize));
}
const Weights queryWeights{DataType::kFLOAT, offset(data, numDims * sizeof(int32_t)), numQueryWeights};
const Weights convWeights{
DataType::kFLOAT, offset(queryWeights.values, sizeof(value_type) * numQueryWeights), numConvWeights};
const Weights locationWeights{
DataType::kFLOAT, offset(convWeights.values, sizeof(value_type) * numConvWeights), numLocationWeights};
const Weights energyWeights{
DataType::kFLOAT, offset(locationWeights.values, sizeof(value_type) * numLocationWeights), numEnergyWeights};
return Taco2AttentionLayerPlugin(numEncodingDimension, numQueryDimension, numFilters, convKernelSize,
numAttentionDimension, queryWeights, convWeights, locationWeights, energyWeights);
}
/******************************************************************************
* CONSTRUCTORS / DESTRUCTOR **************************************************
*****************************************************************************/
Taco2AttentionLayerPlugin::Taco2AttentionLayerPlugin(int encDimension, int queryDimension, int numFilters,
int convKernelSize, int attDimension, const nvinfer1::Weights& queryWeights, const nvinfer1::Weights& convWeights,
const nvinfer1::Weights& locationWeights, const nvinfer1::Weights& energyWeights)
: mNumEncodingDimension(encDimension)
, mNumQueryDimension(queryDimension)
, mNumFilters(numFilters)
, mConvKernelSize(convKernelSize)
, mNumAttentionDimension(attDimension)
, mQueryWeightsHost(toVector(queryWeights))
, mConvWeightsHost(toVector(convWeights))
, mLocationWeightsHost(toVector(locationWeights))
, mEnergyWeightsHost(toVector(energyWeights))
, mKernel(nullptr)
, mNamespace()
{
const size_t expectedQueryWeights = mNumQueryDimension * mNumAttentionDimension;
const size_t expectedConvWeights = mNumFilters * mConvKernelSize * 2;
const size_t expectedLocationWeights = mNumFilters * mNumAttentionDimension;
const size_t expectedEnergyWeights = mNumAttentionDimension;
if (mQueryWeightsHost.size() != expectedQueryWeights)
{
throw std::runtime_error("Attention expected " + std::to_string(expectedQueryWeights)
+ " query weights but given " + std::to_string(mQueryWeightsHost.size()));
}
if (mConvWeightsHost.size() != expectedConvWeights)
{
throw std::runtime_error("Attention expected " + std::to_string(expectedConvWeights)
+ " conv weights but given " + std::to_string(mConvWeightsHost.size()));
}
if (mLocationWeightsHost.size() != expectedLocationWeights)
{
throw std::runtime_error("Attention expected " + std::to_string(expectedLocationWeights)
+ " location weights but given " + std::to_string(mLocationWeightsHost.size()));
}
if (mEnergyWeightsHost.size() != expectedEnergyWeights)
{
throw std::runtime_error("Attention expected " + std::to_string(expectedEnergyWeights)
+ " energy weights but given " + std::to_string(mEnergyWeightsHost.size()));
}
}
Taco2AttentionLayerPlugin::Taco2AttentionLayerPlugin(Taco2AttentionLayerPlugin&& other)
: mNumEncodingDimension(other.mNumEncodingDimension)
, mNumQueryDimension(other.mNumQueryDimension)
, mNumFilters(other.mNumFilters)
, mConvKernelSize(other.mConvKernelSize)
, mNumAttentionDimension(other.mNumAttentionDimension)
, mQueryWeightsHost(std::move(other.mQueryWeightsHost))
, mConvWeightsHost(std::move(other.mConvWeightsHost))
, mLocationWeightsHost(std::move(other.mLocationWeightsHost))
, mEnergyWeightsHost(std::move(other.mEnergyWeightsHost))
, mKernel(std::move(other.mKernel))
, mNamespace(std::move(other.mNamespace))
{
other.mNumEncodingDimension = 0;
other.mNumQueryDimension = 0;
other.mNumFilters = 0;
other.mConvKernelSize = 0;
other.mNumAttentionDimension = 0;
}
Taco2AttentionLayerPlugin::~Taco2AttentionLayerPlugin()
{
destroy();
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
Taco2AttentionLayerPlugin& Taco2AttentionLayerPlugin::operator=(Taco2AttentionLayerPlugin&& other)
{
// defere to constructor
*this = Taco2AttentionLayerPlugin(std::move(other));
return *this;
}
DataType Taco2AttentionLayerPlugin::getOutputDataType(
const int /* index */, const DataType* const /* inputTypes */, const int /* nbInputs */) const
{
return DataType::kFLOAT;
}
const char* Taco2AttentionLayerPlugin::getPluginType() const
{
return getName();
}
const char* Taco2AttentionLayerPlugin::getPluginVersion() const
{
return getVersion();
}
int Taco2AttentionLayerPlugin::getNbOutputs() const
{
return 2;
}
DimsExprs Taco2AttentionLayerPlugin::getOutputDimensions(
const int outputIndex, const DimsExprs* inputs, const int nbInputs, IExprBuilder& exprBuilder)
{
if (outputIndex >= getNbOutputs())
{
throw std::runtime_error(
"Invalid output index: " + std::to_string(outputIndex) + " / " + std::to_string(getNbOutputs()) + ".");
}
if (nbInputs != NUM_INPUTS)
{
throw std::runtime_error(
"Can only handle " + std::to_string(NUM_INPUTS) + " input tensors: " + std::to_string(nbInputs));
}
if (outputIndex == CONTEXT_OUTPUT)
{
return DimsExprs{
3, {inputs[MEMORY_INDEX].d[0], exprBuilder.constant(1), exprBuilder.constant(mNumEncodingDimension)}};
}
else if (outputIndex == WEIGHT_OUTPUT)
{
return DimsExprs{3, {inputs[MEMORY_INDEX].d[0], exprBuilder.constant(2), inputs[MEMORY_INDEX].d[1]}};
}
else
{
throw std::runtime_error("Unknown output index: " + std::to_string(outputIndex));
}
}
bool Taco2AttentionLayerPlugin::supportsFormatCombination(
const int pos, const PluginTensorDesc* const inOut, const int /* nbInputs */, const int /* nbOutputs */)
{
return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT;
}
void Taco2AttentionLayerPlugin::configurePlugin(const DynamicPluginTensorDesc* const in, const int nbInputs,
const DynamicPluginTensorDesc* const out, const int nbOutputs)
{
if (nbInputs != NUM_INPUTS)
{
throw std::runtime_error(
"Can only handle " + std::to_string(NUM_INPUTS) + " input tensors: " + std::to_string(nbInputs));
}
for (int i = 0; i < nbInputs; ++i)
{
if (in[i].desc.type != DataType::kFLOAT)
{
throw std::runtime_error("Only FLOAT supported as input " + std::to_string(i) + " : "
+ std::to_string(static_cast<int>(in[i].desc.type)));
}
}
// assert dimensions
if (in[MEMORY_INDEX].desc.dims.d[2] != mNumEncodingDimension)
{
throw std::runtime_error("Memory input must be L x " + std::to_string(mNumEncodingDimension) + " but got "
+ taco2::Taco2Utils::dimsToString(in[MEMORY_INDEX].desc.dims));
}
if (in[PROCESSED_MEMORY_INDEX].desc.dims.d[2] != mNumAttentionDimension)
{
throw std::runtime_error("Processed Memory input must be L x " + std::to_string(mNumAttentionDimension)
+ " but got " + taco2::Taco2Utils::dimsToString(in[PROCESSED_MEMORY_INDEX].desc.dims));
}
if (in[WEIGHT_INDEX].desc.dims.d[1] != 2)
{
throw std::runtime_error(
"Weights input must be 2 x L but got " + taco2::Taco2Utils::dimsToString(in[WEIGHT_INDEX].desc.dims));
}
if (taco2::Taco2Utils::getDimensionsSize(in[ATTENTION_HIDDEN_INDEX].desc.dims)
!= static_cast<size_t>(mNumQueryDimension))
{
throw std::runtime_error("Attention hidden input must be " + std::to_string(mNumQueryDimension) + " but got "
+ taco2::Taco2Utils::dimsToString(in[ATTENTION_HIDDEN_INDEX].desc.dims) + " ("
+ std::to_string(taco2::Taco2Utils::getDimensionsSize(in[ATTENTION_HIDDEN_INDEX].desc.dims)) + ").");
}
if (nbOutputs != NUM_OUTPUTS)
{
throw std::runtime_error("Only two outputs is implemented: " + std::to_string(nbOutputs));
}
for (int i = 0; i < nbOutputs; ++i)
{
if (out[i].desc.type != DataType::kFLOAT)
{
throw std::runtime_error("Only FLOAT supported as output: " + std::to_string(i) + " : "
+ std::to_string(static_cast<int>(out[i].desc.type)));
}
}
}
int Taco2AttentionLayerPlugin::initialize()
{
try
{
mKernel.reset(
new Taco2AttentionLayerKernel(mQueryWeightsHost, mConvWeightsHost, mLocationWeightsHost, mEnergyWeightsHost,
mNumEncodingDimension, mNumQueryDimension, mNumFilters, mConvKernelSize, mNumAttentionDimension));
}
catch (const std::exception& e)
{
std::cerr << "Taco2AttentionLayerPlugin initialization failed: " << e.what() << std::endl;
return 1;
}
return 0;
}
void Taco2AttentionLayerPlugin::terminate()
{
mKernel.reset();
}
size_t Taco2AttentionLayerPlugin::getWorkspaceSize(
const PluginTensorDesc* const in, const int nbInputs, const PluginTensorDesc* const /* out */, const int /* nbOutputs */) const
{
if (nbInputs != NUM_INPUTS) {
throw std::runtime_error("Invalid number of inputs: " +
std::to_string(nbInputs) + ", but expected " + std::to_string(NUM_INPUTS));
}
const int inputLength = in[MEMORY_INDEX].dims.d[1];
const int batchSize = in[MEMORY_INDEX].dims.d[0];
// space for queryOutput (num attention dimensions),
// convOutput (input length*num filters), elemSum (input length), and
// energyScratch (inputLength).
return sizeof(value_type) * batchSize * (mNumAttentionDimension + (inputLength * mNumFilters) + 2 * inputLength);
}
int Taco2AttentionLayerPlugin::enqueue(const PluginTensorDesc* const inputDesc,
const PluginTensorDesc* /* outputDesc */,
const void* const* const inputs, void* const* const outputs, void* const workspace, cudaStream_t stream)
{
const int inputLength = inputDesc[MEMORY_INDEX].dims.d[1];
const int batchSize = inputDesc[MEMORY_INDEX].dims.d[0];
if (batchSize != 1)
{
// we only support batch size of 1 right now
std::cerr << "Taco2AttentionLayerPlugin plugin does not support batch size other than "
"1: got "
<< batchSize << std::endl;
std::cerr << "Recompile without plugins to use a larger batch size." << std::endl;
return 1;
}
// name inputs and outputs
const value_type* const memoryDevice = static_cast<const value_type*>(inputs[MEMORY_INDEX]);
const value_type* const processedMemoryDevice = static_cast<const value_type*>(inputs[PROCESSED_MEMORY_INDEX]);
const value_type* const weightsDevice = static_cast<const value_type*>(inputs[WEIGHT_INDEX]);
const value_type* const attentionHiddenDevice = static_cast<const value_type*>(inputs[ATTENTION_HIDDEN_INDEX]);
value_type* const outputContextDevice = static_cast<value_type*>(outputs[CONTEXT_OUTPUT]);
value_type* const outputWeightsDevice = static_cast<value_type*>(outputs[WEIGHT_OUTPUT]);
try
{
mKernel->execute(memoryDevice, processedMemoryDevice, weightsDevice, attentionHiddenDevice, outputContextDevice,
outputWeightsDevice, inputLength, static_cast<value_type*>(workspace), stream);
}
catch (const std::exception& e)
{
std::cerr << "Taco2AttentionLayerPlugin failed: " << e.what() << std::endl;
return 1;
}
return 0;
}
size_t Taco2AttentionLayerPlugin::getSerializationSize() const
{
const int numQueryWeights = mNumQueryDimension * mNumAttentionDimension;
const int numConvWeights = mNumFilters * 2 * mConvKernelSize;
const int numLocationWeights = mNumFilters * mNumAttentionDimension;
const int numEnergyWeights = mNumAttentionDimension;
return 5 * sizeof(int32_t)
+ sizeof(value_type) * (numQueryWeights + numConvWeights + numLocationWeights + numEnergyWeights);
}
void Taco2AttentionLayerPlugin::serialize(void* const buffer) const
{
static_cast<int32_t*>(buffer)[0] = mNumEncodingDimension;
static_cast<int32_t*>(buffer)[1] = mNumQueryDimension;
static_cast<int32_t*>(buffer)[2] = mNumFilters;
static_cast<int32_t*>(buffer)[3] = mConvKernelSize;
static_cast<int32_t*>(buffer)[4] = mNumAttentionDimension;
float* const queryWeights = reinterpret_cast<float*>(static_cast<int32_t*>(buffer) + 5);
float* const convWeights = queryWeights + mQueryWeightsHost.size();
float* const locationWeights = convWeights + mConvWeightsHost.size();
float* const energyWeights = locationWeights + mLocationWeightsHost.size();
memcpy(queryWeights, mQueryWeightsHost.data(), sizeof(value_type) * mQueryWeightsHost.size());
memcpy(convWeights, mConvWeightsHost.data(), sizeof(value_type) * mConvWeightsHost.size());
memcpy(locationWeights, mLocationWeightsHost.data(), sizeof(value_type) * mLocationWeightsHost.size());
memcpy(energyWeights, mEnergyWeightsHost.data(), sizeof(value_type) * mEnergyWeightsHost.size());
}
void Taco2AttentionLayerPlugin::destroy()
{
terminate();
}
IPluginV2DynamicExt* Taco2AttentionLayerPlugin::clone() const
{
// call constructor which copy's data
Taco2AttentionLayerPlugin clone(mNumEncodingDimension, mNumQueryDimension, mNumFilters, mConvKernelSize,
mNumAttentionDimension,
Weights{DataType::kFLOAT, mQueryWeightsHost.data(), static_cast<int64_t>(mQueryWeightsHost.size())},
Weights{DataType::kFLOAT, mConvWeightsHost.data(), static_cast<int64_t>(mConvWeightsHost.size())},
Weights{DataType::kFLOAT, mLocationWeightsHost.data(), static_cast<int64_t>(mLocationWeightsHost.size())},
Weights{DataType::kFLOAT, mEnergyWeightsHost.data(), static_cast<int64_t>(mEnergyWeightsHost.size())});
if (mKernel)
{
// initialize the clone too
clone.initialize();
}
// move it to the heap last to avoid exceptions causing memory leaks
return new Taco2AttentionLayerPlugin(std::move(clone));
}
void Taco2AttentionLayerPlugin::setPluginNamespace(const char* pluginNamespace)
{
mNamespace = pluginNamespace;
}
const char* Taco2AttentionLayerPlugin::getPluginNamespace() const
{
return mNamespace.c_str();
}
} // namespace plugin
} // namespace nvinfer1

View file

@ -0,0 +1,300 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_ATTENTIONLAYER_H
#define TT2I_ATTENTIONLAYER_H
#include "NvInfer.h"
#include <memory>
#include <string>
#include <vector>
namespace nvinfer1
{
namespace plugin
{
class Taco2AttentionLayerKernel;
class Taco2AttentionLayerPlugin : public nvinfer1::IPluginV2DynamicExt
{
public:
using value_type = float;
enum Inputs
{
MEMORY_INDEX = 0,
PROCESSED_MEMORY_INDEX = 1,
WEIGHT_INDEX = 2,
ATTENTION_HIDDEN_INDEX = 3,
NUM_INPUTS = 4
};
enum Outputs
{
CONTEXT_OUTPUT = 0,
WEIGHT_OUTPUT = 1,
NUM_OUTPUTS = 2
};
/**
* @brief Get the name of this plugin.
*
* @return The name.
*/
static const char* getName();
/**
* @brief Get the version of this plugin.
*
* @return The version.
*/
static const char* getVersion();
/**
* @brief Create a new Taco2AttentionLayerPlugin from serialized data.
*
* @param data The data.
* @param length The length of the data in bytes.
*
* @return The instantiated plugin.
*/
static Taco2AttentionLayerPlugin deserialize(const void* data, size_t length);
/**
* @brief Create a new Taco2AttentionLayerPlugin.
*
* @param inputLength The length of the input.
* @param encDimension The number of encoding dimensions.
* @param queryDimension The number of query dimensions.
* @param numFilters The number of convolution filters.
* @param convKernelSize The convolution kernel size.
* @param attDimension The attention dimension.
* @param queryWeights The query questions.
* @param convWeights The convolution weights.
* @param locationWeights The location weights.
* @param energyWeights The energy weights.
*/
Taco2AttentionLayerPlugin(int encDimension, int queryDimension, int numFilters, int convKernelSize,
int attDimension, const nvinfer1::Weights& queryWeights, const nvinfer1::Weights& convWeights,
const nvinfer1::Weights& locationWeights, const nvinfer1::Weights& energyWeights);
/**
* @brief Move constructor.
*
* @param other The Taco2AttentionLayer to move.
*/
Taco2AttentionLayerPlugin(Taco2AttentionLayerPlugin&& other);
/**
* @brief Move assignment operator.
*
* @param other The Taco2AttentionLayerPlugin to move.
*
* @return This Taco2AttentionLayerPlugin.
*/
Taco2AttentionLayerPlugin& operator=(Taco2AttentionLayerPlugin&& other);
/**
* @brief Destructor.
*/
~Taco2AttentionLayerPlugin();
// disable copying
Taco2AttentionLayerPlugin(const Taco2AttentionLayerPlugin& other) = delete;
Taco2AttentionLayerPlugin& operator=(const Taco2AttentionLayerPlugin& other) = delete;
/**
* @brief Return the data type of the plugin output at the requested index.
*
* @param index The output index.
* @param inputTypes The input data types.
* @param nbInputs The number of inputs.
*
* @return The type of output.
*/
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override;
/**
* @brief Get the plugin type.
*
* @return The plugin type.
*/
const char* getPluginType() const override;
/**
* @brief Get the plugin version.
*
* @return The plugin version.
*/
const char* getPluginVersion() const override;
/**
* @brief Get the number of outputs.
*
* @return The number of outputs.
*/
int getNbOutputs() const override;
/**
* @brief Get the dimensions of an output tensor.
*
* @param outputIndex The index of the output tensor.
* @param inputs Expressions for dimensions of the input tensors.
* @param nbInputs The number of input tensors.
* @param expBuilder Object for generating new expressions.
*
* @return The resulting dimensions.
*/
nvinfer1::DimsExprs getOutputDimensions(
int outputIndex, const DimsExprs* inputs, int nbInputs, IExprBuilder& expBuilder) override;
/**
* @brief Check if the given plugin format is supported.
*
* @param pos The format position/index in inOut.format[].
* @param inOut The input and output formats.
* @param nbInputs The number of inputs.
* @param nbOutputs The number of outputs.
*
* @return True if it is supported.
*/
bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) override;
/**
* @brief Configure this plugin with the given inputs, outputs, and datat
* types.
*
* @param in The input tensor attributes that used for configuration.
* @param nbInputs The number of inputs.
* @param out The output tensor attributes that are used for configuration.
* @param nbOutputs The number of outputs.
*/
void configurePlugin(
const DynamicPluginTensorDesc* in, int nbInputs, const DynamicPluginTensorDesc* out, int nbOutputs) override;
/**
* @brief Initialize the plugin.
*
* @return 0 if initialization was successful. Non-zero otherwise.
*/
int initialize() override;
/**
* @brief Terminate the plugin (deinitialize).
*/
void terminate() override;
/**
* @brief Get workspace size required by this plugin for up to the given
* batch size.
*
* @param in The input tensor descriptors.
* @param nbInputs The number of inputs.
* @param out The output tensor descriptors.
* @param nbOutputs The number of outputs.
*
* @return The workspace size in bytes.
*/
size_t getWorkspaceSize(
const PluginTensorDesc* in, int nbInputs, const PluginTensorDesc* out, int nbOutputs) const override;
/**
* @brief Set this plugin for execution on the stream.
*
* @param inputDesc The input tensor descriptors.
* @param outputDesc The output tensor descriptors.
* @param inputs The input tensors.
* @param outputs The output tensors.
* @param workspace The allocated workspace.
* @param stream The stream to operate on.
*
* @return 0 if successfully queued, non-zero otherwise.
*/
int enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc, const void* const* inputs,
void* const* outputs, void* workspace, cudaStream_t stream);
/**
* @brief Get the number of bytes occupied by this plugin if serialized.
*
* @return The size in bytes.
*/
size_t getSerializationSize() const override;
/**
* @brief Serialize this plugin.
*
* @param buffer The buffer to write to.
*/
void serialize(void* buffer) const override;
/**
* @brief Destroy this plugin instance.
*/
void destroy() override;
/**
* @brief Clone this pulgin instance.
*
* @return The cloned plugin.
*/
IPluginV2DynamicExt* clone() const override;
/**
* @brief Set the namespace of this plugin.
*
* @param pluginNamespace The namespace.
*/
void setPluginNamespace(const char* pluginNamespace) override;
/**
* @brief Get the namespace of this plugin.
*
* @return The namespace.
*/
const char* getPluginNamespace() const override;
private:
int mNumEncodingDimension;
int mNumQueryDimension;
int mNumFilters;
int mConvKernelSize;
int mNumAttentionDimension;
std::vector<value_type> mQueryWeightsHost;
std::vector<value_type> mConvWeightsHost;
std::vector<value_type> mLocationWeightsHost;
std::vector<value_type> mEnergyWeightsHost;
std::unique_ptr<Taco2AttentionLayerKernel> mKernel;
std::string mNamespace;
};
} // namespace plugin
} // namespace nvinfer1
#endif

View file

@ -0,0 +1,201 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "taco2AttentionLayerPluginCreator.h"
#include "taco2AttentionLayerPlugin.h"
#include <stdexcept>
#include <vector>
using namespace nvinfer1;
namespace nvinfer1
{
namespace plugin
{
/******************************************************************************
* CONSTANTS ******************************************************************
*****************************************************************************/
namespace
{
constexpr const char* const INPUT_LENGTH_STR = "InputLength";
constexpr const char* const ENCODING_DIMENSION_STR = "EncodingDimension";
constexpr const char* const QUERY_DIMENSION_STR = "QueryDimension";
constexpr const char* const NUM_FILTERS_STR = "NumFilters";
constexpr const char* const CONV_KERNEL_SIZE_STR = "ConvKernelSize";
constexpr const char* const ATTENTION_DIMENSION_STR = "AttentionDimension";
constexpr const char* const QUERY_WEIGHTS_STR = "QueryWeight";
constexpr const char* const CONV_WEIGHTS_STR = "ConvWeight";
constexpr const char* const LOCATION_WEIGHTS_STR = "LocationWeight";
constexpr const char* const ENERGY_WEIGHTS_STR = "EnergyWeight";
} // namespace
/******************************************************************************
* PUBLIC STATIC METHODS ******************************************************
*****************************************************************************/
PluginFieldCollection* Taco2AttentionLayerPluginCreator::getFields()
{
static PluginFieldCollection* pluginPtr = nullptr;
static const std::vector<PluginField> fields{{INPUT_LENGTH_STR, nullptr, PluginFieldType::kINT32, 0},
{ENCODING_DIMENSION_STR, nullptr, PluginFieldType::kINT32, 0},
{QUERY_DIMENSION_STR, nullptr, PluginFieldType::kINT32, 0},
{NUM_FILTERS_STR, nullptr, PluginFieldType::kINT32, 0},
{CONV_KERNEL_SIZE_STR, nullptr, PluginFieldType::kINT32, 0},
{ATTENTION_DIMENSION_STR, nullptr, PluginFieldType::kINT32, 0},
{QUERY_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0},
{CONV_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0},
{LOCATION_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0},
{ENERGY_WEIGHTS_STR, nullptr, PluginFieldType::kFLOAT32, 0}};
if (!pluginPtr)
{
pluginPtr
= static_cast<PluginFieldCollection*>(malloc(sizeof(*pluginPtr) + fields.size() * sizeof(PluginField)));
pluginPtr->nbFields = static_cast<int>(fields.size());
pluginPtr->fields = fields.data();
}
return pluginPtr;
}
/******************************************************************************
* CONSTRUCTORS / DESTRUCTOR **************************************************
*****************************************************************************/
Taco2AttentionLayerPluginCreator::Taco2AttentionLayerPluginCreator()
: mNamespace()
{
// do nothing
}
/******************************************************************************
* PUBLIC METHODS *************************************************************
*****************************************************************************/
const char* Taco2AttentionLayerPluginCreator::getPluginName() const
{
return Taco2AttentionLayerPlugin::getName();
}
const char* Taco2AttentionLayerPluginCreator::getPluginVersion() const
{
return Taco2AttentionLayerPlugin::getVersion();
}
const PluginFieldCollection* Taco2AttentionLayerPluginCreator::getFieldNames()
{
return getFields();
}
IPluginV2* Taco2AttentionLayerPluginCreator::createPlugin(const char* const /*name*/, const PluginFieldCollection* fc)
{
int encDimension = 0;
int queryDimension = 0;
int numFilters = 0;
int convKernelSize = 0;
int attDimension = 0;
Weights queryWeights{DataType::kFLOAT, nullptr, 0};
Weights locationWeights{DataType::kFLOAT, nullptr, 0};
Weights convWeights{DataType::kFLOAT, nullptr, 0};
Weights energyWeights{DataType::kFLOAT, nullptr, 0};
for (int i = 0; i < fc->nbFields; ++i)
{
const std::string name(fc->fields[i].name);
if (name == ENCODING_DIMENSION_STR)
{
encDimension = static_cast<const int32_t*>(fc->fields[i].data)[0];
}
else if (name == QUERY_DIMENSION_STR)
{
queryDimension = static_cast<const int32_t*>(fc->fields[i].data)[0];
}
else if (name == NUM_FILTERS_STR)
{
numFilters = static_cast<const int32_t*>(fc->fields[i].data)[0];
}
else if (name == CONV_KERNEL_SIZE_STR)
{
convKernelSize = static_cast<const int32_t*>(fc->fields[i].data)[0];
}
else if (name == ATTENTION_DIMENSION_STR)
{
attDimension = static_cast<const int32_t*>(fc->fields[i].data)[0];
}
else if (name == QUERY_WEIGHTS_STR)
{
queryWeights.values = fc->fields[i].data;
queryWeights.count = fc->fields[i].length;
}
else if (name == CONV_WEIGHTS_STR)
{
convWeights.values = fc->fields[i].data;
convWeights.count = fc->fields[i].length;
}
else if (name == LOCATION_WEIGHTS_STR)
{
locationWeights.values = fc->fields[i].data;
locationWeights.count = fc->fields[i].length;
}
else if (name == ENERGY_WEIGHTS_STR)
{
energyWeights.values = fc->fields[i].data;
energyWeights.count = fc->fields[i].length;
}
else
{
throw std::runtime_error("Unknown plugin field: '" + name + "'");
}
}
return new Taco2AttentionLayerPlugin(encDimension, queryDimension, numFilters, convKernelSize, attDimension,
queryWeights, convWeights, locationWeights, energyWeights);
}
IPluginV2* Taco2AttentionLayerPluginCreator::deserializePlugin(
const char* const /* layerName */, const void* const serialData, size_t const serialLength)
{
return new Taco2AttentionLayerPlugin(Taco2AttentionLayerPlugin::deserialize(serialData, serialLength));
}
void Taco2AttentionLayerPluginCreator::setPluginNamespace(const char* pluginNamespace)
{
mNamespace = pluginNamespace;
}
const char* Taco2AttentionLayerPluginCreator::getPluginNamespace() const
{
return mNamespace.c_str();
}
} // namespace plugin
} // namespace nvinfer1

View file

@ -0,0 +1,128 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef TT2I_ENERGYLAYERPLUGINCREATOR_H
#define TT2I_ENERGYLAYERPLUGINCREATOR_H
#include "NvInfer.h"
#include <string>
#ifdef DEVEL
// The destructor of nvinfer1::IPluginCreator is non-virtual and public, so
// we need to supress the warning.
#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
#endif
namespace nvinfer1
{
namespace plugin
{
class Taco2AttentionLayerPluginCreator : public nvinfer1::IPluginCreator
{
public:
/**
* @brief Get the collection of fields for this plugin, with their names only.
*
* @return The collection of fields.
*/
static nvinfer1::PluginFieldCollection* getFields();
/**
* @brief Create a new Taco2AttentionLayerPluginCreator.
*/
Taco2AttentionLayerPluginCreator();
/**
* @brief Get the name of the plugin.
*
* @return The name of the plugin.
*/
const char* getPluginName() const override;
/**
* @brief Get the plugin version.
*
* @return The plugin version.
*/
const char* getPluginVersion() const override;
/**
* @brief Get the collection of fields for this plugin.
*
* @return The collection of fields.
*/
const nvinfer1::PluginFieldCollection* getFieldNames() override;
/**
* @brief Create a new Taco2AttentionLayerPlugin.
*
* @param name The name (unused currently).
* @param fc The collection of fields to initialize with.
*
* @return The created plugin.
*/
nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) override;
/**
* @brief Create a custom layer by name from a data stream.
*
* @param layerName The name of the layer.
* @param serialData The serialized data for the layer.
* @param serialLength The length of the serialized data.
*
* @return The plugin. Clients must destroy the plugin once all consumers of
* it have been destroyed.
*/
nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;
/**
* @brief Set the namespace for created plugins.
*
* @param pluginNamespace The namespace.
*/
void setPluginNamespace(const char* pluginNamespace) override;
/**
* @brief Get the namespace for created plugins.
*
* @return The namespace.
*/
const char* getPluginNamespace() const override;
private:
std::string mNamespace;
};
} // namespace plugin
} // namespace nvinfer1
#ifdef DEVEL
#pragma GCC diagnostic pop
#endif
#endif

View file

@ -0,0 +1,18 @@
#
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
file(GLOB SRCS *.cpp *.cu)
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} ${SRCS})
set(PLUGIN_SOURCES ${PLUGIN_SOURCES} PARENT_SCOPE)

Some files were not shown because too many files have changed in this diff Show more