DeepLearningExamples/Kaldi/SpeechRecognition/kaldi-asr-client/asr_client_imp.h
2021-10-12 17:45:31 -07:00

87 lines
2.5 KiB
C++

// Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <grpc_client.h>
#include <queue>
#include <string>
#include <unordered_map>
#include <vector>
#ifndef TRITON_KALDI_ASR_CLIENT_H_
#define TRITON_KALDI_ASR_CLIENT_H_
namespace ni = nvidia::inferenceserver;
namespace nic = nvidia::inferenceserver::client;
// time with arbitrary reference
double inline gettime_monotonic() {
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC, &ts);
double time = ts.tv_sec;
time += (double)(ts.tv_nsec) / 1e9;
return time;
}
class TritonASRClient {
struct TritonClient {
std::unique_ptr<nic::InferenceServerGrpcClient> triton_client;
};
std::string url_;
std::string model_name_;
std::vector<TritonClient> clients_;
int nclients_;
std::vector<uint8_t> chunk_buf_;
std::vector<int64_t> shape_;
int max_chunk_byte_size_;
std::atomic<int> n_in_flight_;
double started_at_;
double total_audio_;
bool print_results_;
bool print_partial_results_;
bool ctm_;
std::mutex stdout_m_;
int samps_per_chunk_;
float samp_freq_;
struct Result {
std::string raw_lattice;
double latency;
};
std::unordered_map<uint64_t, double> start_timestamps_;
std::mutex start_timestamps_m_;
std::unordered_map<uint64_t, Result> results_;
std::mutex results_m_;
public:
TritonASRClient(const std::string& url, const std::string& model_name,
const int ncontextes, bool print_results,
bool print_partial_results, bool ctm, float samp_freq);
void CreateClientContext();
void SendChunk(uint64_t corr_id, bool start_of_sequence, bool end_of_sequence,
float* chunk, int chunk_byte_size, uint64_t index);
void WaitForCallbacks();
void PrintStats(bool print_latency_stats, bool print_throughput);
void WriteLatticesToFile(
const std::string& clat_wspecifier,
const std::unordered_map<uint64_t, std::string>& corr_id_and_keys);
};
#endif // TRITON_KALDI_ASR_CLIENT_H_