| | |
| | | */ |
| | | #pragma once |
| | | |
| | | #ifndef PARAFORMER_MODELIMP_H |
| | | #define PARAFORMER_MODELIMP_H |
| | | |
| | | #include "precomp.h" |
| | | |
| | | namespace paraformer { |
| | | namespace funasr { |
| | | |
| | | class Paraformer : public Model { |
| | | /** |
| | |
| | | * https://arxiv.org/pdf/2206.08317.pdf |
| | | */ |
| | | private: |
| | | //std::unique_ptr<knf::OnlineFbank> fbank_; |
| | | knf::FbankOptions fbank_opts; |
| | | Vocab* vocab = nullptr; |
| | | SegDict* seg_dict = nullptr; |
| | | //const float scale = 22.6274169979695; |
| | | const float scale = 1.0; |
| | | |
| | | Vocab* vocab; |
| | | vector<float> means_list; |
| | | vector<float> vars_list; |
| | | const float scale = 22.6274169979695; |
| | | int32_t lfr_window_size = 7; |
| | | int32_t lfr_window_shift = 6; |
| | | |
| | | void LoadConfigFromYaml(const char* filename); |
| | | void LoadOnlineConfigFromYaml(const char* filename); |
| | | void LoadCmvn(const char *filename); |
| | | vector<float> ApplyLfr(const vector<float> &in); |
| | | void ApplyCmvn(vector<float> *v); |
| | | string GreedySearch( float* in, int n_len, int64_t token_nums); |
| | | |
| | | std::shared_ptr<Ort::Session> m_session; |
| | | Ort::Env env_; |
| | | Ort::SessionOptions session_options; |
| | | |
| | | vector<string> m_strInputNames, m_strOutputNames; |
| | | vector<const char*> m_szInputNames; |
| | | vector<const char*> m_szOutputNames; |
| | | std::shared_ptr<Ort::Session> hw_m_session = nullptr; |
| | | Ort::Env hw_env_; |
| | | Ort::SessionOptions hw_session_options; |
| | | vector<string> hw_m_strInputNames, hw_m_strOutputNames; |
| | | vector<const char*> hw_m_szInputNames; |
| | | vector<const char*> hw_m_szOutputNames; |
| | | bool use_hotword; |
| | | |
| | | public: |
| | | Paraformer(); |
| | | ~Paraformer(); |
| | | void InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num); |
| | | // online |
| | | void InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num); |
| | | // 2pass |
| | | void InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num); |
| | | void InitHwCompiler(const std::string &hw_model, int thread_num); |
| | | void InitSegDict(const std::string &seg_dict_model); |
| | | std::vector<std::vector<float>> CompileHotwordEmbedding(std::string &hotwords); |
| | | void Reset(); |
| | | vector<float> FbankKaldi(float sample_rate, const float* waves, int len); |
| | | string ForwardChunk(float* din, int len, int flag); |
| | | string Forward(float* din, int len, int flag); |
| | | string Forward(float* din, int len, bool input_finished=true, const std::vector<std::vector<float>> &hw_emb={{0.0}}); |
| | | string GreedySearch( float* in, int n_len, int64_t token_nums, bool is_stamp=false, std::vector<float> us_alphas={0}, std::vector<float> us_cif_peak={0}); |
| | | void TimestampOnnx(std::vector<float> &us_alphas, vector<float> us_cif_peak, vector<string>& char_list, std::string &res_str, |
| | | vector<vector<float>> ×tamp_list, float begin_time = 0.0, float total_offset = -1.5); |
| | | string PostProcess(std::vector<string> &raw_char, std::vector<std::vector<float>> ×tamp_list); |
| | | |
| | | string Rescoring(); |
| | | string GetLang(){return language;}; |
| | | |
| | | knf::FbankOptions fbank_opts_; |
| | | vector<float> means_list_; |
| | | vector<float> vars_list_; |
| | | int lfr_m = PARA_LFR_M; |
| | | int lfr_n = PARA_LFR_N; |
| | | |
| | | // paraformer-offline |
| | | std::shared_ptr<Ort::Session> m_session_ = nullptr; |
| | | Ort::Env env_; |
| | | Ort::SessionOptions session_options_; |
| | | |
| | | vector<string> m_strInputNames, m_strOutputNames; |
| | | vector<const char*> m_szInputNames; |
| | | vector<const char*> m_szOutputNames; |
| | | |
| | | std::string language="zh-cn"; |
| | | |
| | | // paraformer-online |
| | | std::shared_ptr<Ort::Session> encoder_session_ = nullptr; |
| | | std::shared_ptr<Ort::Session> decoder_session_ = nullptr; |
| | | vector<string> en_strInputNames, en_strOutputNames; |
| | | vector<const char*> en_szInputNames_; |
| | | vector<const char*> en_szOutputNames_; |
| | | vector<string> de_strInputNames, de_strOutputNames; |
| | | vector<const char*> de_szInputNames_; |
| | | vector<const char*> de_szOutputNames_; |
| | | |
| | | string window_type = "hamming"; |
| | | int frame_length = 25; |
| | | int frame_shift = 10; |
| | | int n_mels = 80; |
| | | int encoder_size = 512; |
| | | int fsmn_layers = 16; |
| | | int fsmn_lorder = 10; |
| | | int fsmn_dims = 512; |
| | | float cif_threshold = 1.0; |
| | | float tail_alphas = 0.45; |
| | | |
| | | |
| | | }; |
| | | |
| | | } // namespace paraformer |
| | | #endif |
| | | } // namespace funasr |