#ifndef VAD_SERVER_FSMNVAD_H #define VAD_SERVER_FSMNVAD_H #include "precomp.h" class FsmnVad { public: FsmnVad(); void Test(); void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len, float vad_speech_noise_thres); std::vector> Infer(const std::vector &waves); void Reset(); private: void ReadModel(const std::string &vad_model); static void GetInputOutputInfo( const std::shared_ptr &session, std::vector *in_names, std::vector *out_names); void FbankKaldi(float sample_rate, std::vector> &vad_feats, const std::vector &waves); std::vector> &LfrCmvn(std::vector> &vad_feats, int lfr_m, int lfr_n); void Forward( const std::vector> &chunk_feats, std::vector> *out_prob); void LoadCmvn(const char *filename); void InitCache(); std::shared_ptr vad_session_ = nullptr; Ort::Env env_; Ort::SessionOptions session_options_; std::vector vad_in_names_; std::vector vad_out_names_; std::vector> in_cache_; knf::FbankOptions fbank_opts; std::vector means_list; std::vector vars_list; int vad_sample_rate_ = 16000; int vad_silence_duration_ = 800; int vad_max_len_ = 15000; double vad_speech_noise_thres_ = 0.9; }; #endif //VAD_SERVER_FSMNVAD_H