| | |
| | | } |
| | | |
| | | // offline |
| | | void Paraformer::InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){ |
| | | void Paraformer::InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, const std::string &token_file, int thread_num){ |
| | | LoadConfigFromYaml(am_config.c_str()); |
| | | // knf options |
| | | fbank_opts_.frame_opts.dither = 0; |
| | |
| | | m_szInputNames.push_back(item.c_str()); |
| | | for (auto& item : m_strOutputNames) |
| | | m_szOutputNames.push_back(item.c_str()); |
| | | vocab = new Vocab(am_config.c_str()); |
| | | phone_set_ = new PhoneSet(am_config.c_str()); |
| | | vocab = new Vocab(token_file.c_str()); |
| | | phone_set_ = new PhoneSet(token_file.c_str()); |
| | | LoadCmvn(am_cmvn.c_str()); |
| | | } |
| | | |
| | | // online |
| | | void Paraformer::InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){ |
| | | void Paraformer::InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, const std::string &token_file, int thread_num){ |
| | | |
| | | LoadOnlineConfigFromYaml(am_config.c_str()); |
| | | // knf options |
| | |
| | | for (auto& item : de_strOutputNames) |
| | | de_szOutputNames_.push_back(item.c_str()); |
| | | |
| | | vocab = new Vocab(am_config.c_str()); |
| | | phone_set_ = new PhoneSet(am_config.c_str()); |
| | | vocab = new Vocab(token_file.c_str()); |
| | | phone_set_ = new PhoneSet(token_file.c_str()); |
| | | LoadCmvn(am_cmvn.c_str()); |
| | | } |
| | | |
| | | // 2pass |
| | | void Paraformer::InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num){ |
| | | void Paraformer::InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, const std::string &token_file, int thread_num){ |
| | | // online |
| | | InitAsr(en_model, de_model, am_cmvn, am_config, thread_num); |
| | | InitAsr(en_model, de_model, am_cmvn, am_config, token_file, thread_num); |
| | | |
| | | // offline |
| | | try { |
| | |
| | | lm_ = std::shared_ptr<fst::Fst<fst::StdArc>>( |
| | | fst::Fst<fst::StdArc>::Read(lm_file)); |
| | | if (lm_){ |
| | | if (vocab) { delete vocab; } |
| | | vocab = new Vocab(lm_cfg_file.c_str(), lex_file.c_str()); |
| | | lm_vocab = new Vocab(lm_cfg_file.c_str(), lex_file.c_str()); |
| | | LOG(INFO) << "Successfully load lm file " << lm_file; |
| | | }else{ |
| | | LOG(ERROR) << "Failed to load lm file " << lm_file; |
| | |
| | | { |
| | | if(vocab){ |
| | | delete vocab; |
| | | } |
| | | if(lm_vocab){ |
| | | delete lm_vocab; |
| | | } |
| | | if(seg_dict){ |
| | | delete seg_dict; |
| | |
| | | asr_feats = out_feats; |
| | | } |
| | | |
| | | string Paraformer::Forward(float* din, int len, bool input_finished, const std::vector<std::vector<float>> &hw_emb, void* decoder_handle) |
| | | std::vector<std::string> Paraformer::Forward(float** din, int* len, bool input_finished, const std::vector<std::vector<float>> &hw_emb, void* decoder_handle, int batch_in) |
| | | { |
| | | std::vector<std::string> results; |
| | | string result=""; |
| | | WfstDecoder* wfst_decoder = (WfstDecoder*)decoder_handle; |
| | | int32_t in_feat_dim = fbank_opts_.mel_opts.num_bins; |
| | | |
| | | if(batch_in != 1){ |
| | | results.push_back(result); |
| | | return results; |
| | | } |
| | | |
| | | std::vector<std::vector<float>> asr_feats; |
| | | FbankKaldi(asr_sample_rate, din, len, asr_feats); |
| | | FbankKaldi(asr_sample_rate, din[0], len[0], asr_feats); |
| | | if(asr_feats.size() == 0){ |
| | | return ""; |
| | | results.push_back(result); |
| | | return results; |
| | | } |
| | | LfrCmvn(asr_feats); |
| | | int32_t feat_dim = lfr_m*in_feat_dim; |
| | |
| | | if (use_hotword) { |
| | | if(hw_emb.size()<=0){ |
| | | LOG(ERROR) << "hw_emb is null"; |
| | | return ""; |
| | | results.push_back(result); |
| | | return results; |
| | | } |
| | | //PrintMat(hw_emb, "input_clas_emb"); |
| | | const int64_t hotword_shape[3] = {1, static_cast<int64_t>(hw_emb.size()), static_cast<int64_t>(hw_emb[0].size())}; |
| | |
| | | }catch (std::exception const &e) |
| | | { |
| | | LOG(ERROR)<<e.what(); |
| | | return ""; |
| | | results.push_back(result); |
| | | return results; |
| | | } |
| | | |
| | | string result=""; |
| | | try { |
| | | auto outputTensor = m_session_->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size()); |
| | | std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); |
| | |
| | | LOG(ERROR)<<e.what(); |
| | | } |
| | | |
| | | return result; |
| | | results.push_back(result); |
| | | return results; |
| | | } |
| | | |
| | | |
| | |
| | | return vocab; |
| | | } |
| | | |
| | | Vocab* Paraformer::GetLmVocab() |
| | | { |
| | | return lm_vocab; |
| | | } |
| | | |
| | | PhoneSet* Paraformer::GetPhoneSet() |
| | | { |
| | | return phone_set_; |