// Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #define HAVE_CUDA 1 // Loading Kaldi headers with GPU #include #include #include "cudadecoder/batched-threaded-nnet3-cuda-online-pipeline.h" #include "fstext/fstext-lib.h" #include "lat/kaldi-lattice.h" #include "lat/lattice-functions.h" #include "nnet3/am-nnet-simple.h" #include "nnet3/nnet-utils.h" #include "util/kaldi-thread.h" #include "src/core/model_config.h" #include "src/core/model_config.pb.h" #include "src/custom/sdk/custom_instance.h" using kaldi::BaseFloat; namespace nvidia { namespace inferenceserver { namespace custom { namespace kaldi_cbe { // Context object. All state must be kept in this object. class Context { public: Context(const std::string& instance_name, const ModelConfig& config, const int gpu_device); virtual ~Context(); // Initialize the context. Validate that the model configuration, // etc. is something that we can handle. int Init(); // Perform custom execution on the payloads. int Execute(const uint32_t payload_cnt, CustomPayload* payloads, CustomGetNextInputFn_t input_fn, CustomGetOutputFn_t output_fn); private: // init kaldi pipeline int InitializeKaldiPipeline(); int InputOutputSanityCheck(); int ReadModelParameters(); int GetSequenceInput(CustomGetNextInputFn_t& input_fn, void* input_context, CorrelationID* corr_id, int32_t* start, int32_t* ready, int32_t* dim, int32_t* end, const kaldi::BaseFloat** wave_buffer, std::vector* input_buffer); int SetOutputs(kaldi::CompactLattice& clat, CustomGetOutputFn_t output_fn, CustomPayload payload); int SetOutputByName(const char* output_name, const std::string& out_bytes, CustomGetOutputFn_t output_fn, CustomPayload payload); bool CheckPayloadError(const CustomPayload& payload); int FlushBatch(); // The name of this instance of the backend. const std::string instance_name_; // The model configuration. const ModelConfig model_config_; // The GPU device ID to execute on or CUSTOM_NO_GPU_DEVICE if should // execute on CPU. const int gpu_device_; // Models paths std::string nnet3_rxfilename_, fst_rxfilename_; std::string word_syms_rxfilename_; // batch_size int max_batch_size_; int num_channels_; int num_worker_threads_; std::vector batch_corr_ids_; std::vector> batch_wave_samples_; std::vector batch_is_first_chunk_; std::vector batch_is_last_chunk_; BaseFloat sample_freq_, seconds_per_chunk_; int chunk_num_bytes_, chunk_num_samps_; // feature_config includes configuration for the iVector adaptation, // as well as the basic features. kaldi::cuda_decoder::BatchedThreadedNnet3CudaOnlinePipelineConfig batched_decoder_config_; std::unique_ptr cuda_pipeline_; // Maintain the state of some shared objects kaldi::TransitionModel trans_model_; kaldi::nnet3::AmNnetSimple am_nnet_; fst::SymbolTable* word_syms_; const uint64_t int32_byte_size_; const uint64_t int64_byte_size_; std::vector output_shape_; std::vector byte_buffer_; std::vector> wave_byte_buffers_; }; } // namespace kaldi_cbe } // namespace custom } // namespace inferenceserver } // namespace nvidia