| | |
| | | using namespace std; |
| | | using namespace paraformer; |
| | | |
| | | ModelImp::ModelImp(const char* path,int nNumThread, bool quantize) |
| | | { |
| | | ModelImp::ModelImp(const char* path,int nNumThread, bool quantize, bool use_vad) |
| | | :env_(ORT_LOGGING_LEVEL_ERROR, "paraformer"),sessionOptions{}{ |
| | | string model_path; |
| | | string cmvn_path; |
| | | string config_path; |
| | | |
| | | // VAD model |
| | | if(use_vad){ |
| | | string vad_path = pathAppend(path, "vad_model.onnx"); |
| | | string mvn_path = pathAppend(path, "vad.mvn"); |
| | | vadHandle = make_unique<FsmnVad>(); |
| | | vadHandle->init_vad(vad_path, mvn_path, model_sample_rate, 800, 15000, 0.9); |
| | | } |
| | | |
| | | if(quantize) |
| | | { |
| | |
| | | cmvn_path = pathAppend(path, "am.mvn"); |
| | | config_path = pathAppend(path, "config.yaml"); |
| | | |
| | | //fe = new FeatureExtract(3); |
| | | // knf options |
| | | fbank_opts.frame_opts.dither = 0; |
| | | fbank_opts.mel_opts.num_bins = 80; |
| | | fbank_opts.frame_opts.samp_freq = model_sample_rate; |
| | | fbank_opts.frame_opts.window_type = "hamming"; |
| | | fbank_opts.frame_opts.frame_shift_ms = 10; |
| | | fbank_opts.frame_opts.frame_length_ms = 25; |
| | | fbank_opts.energy_floor = 0; |
| | | fbank_opts.mel_opts.debug_mel = false; |
| | | // fbank_ = std::make_unique<knf::OnlineFbank>(fbank_opts); |
| | | |
| | | //sessionOptions.SetInterOpNumThreads(1); |
| | | // sessionOptions.SetInterOpNumThreads(1); |
| | | sessionOptions.SetIntraOpNumThreads(nNumThread); |
| | | sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); |
| | | sessionOptions.SetGraphOptimizationLevel(ORT_ENABLE_ALL); |
| | | // DisableCpuMemArena can improve performance |
| | | sessionOptions.DisableCpuMemArena(); |
| | | |
| | | #ifdef _WIN32 |
| | | wstring wstrPath = strToWstr(model_path); |
| | | m_session = new Ort::Session(env, wstrPath.c_str(), sessionOptions); |
| | | m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), sessionOptions); |
| | | #else |
| | | m_session = new Ort::Session(env, model_path.c_str(), sessionOptions); |
| | | m_session = std::make_unique<Ort::Session>(env_, model_path.c_str(), sessionOptions); |
| | | #endif |
| | | |
| | | string strName; |
| | | getInputName(m_session, strName); |
| | | getInputName(m_session.get(), strName); |
| | | m_strInputNames.push_back(strName.c_str()); |
| | | getInputName(m_session, strName,1); |
| | | getInputName(m_session.get(), strName,1); |
| | | m_strInputNames.push_back(strName); |
| | | |
| | | getOutputName(m_session, strName); |
| | | getOutputName(m_session.get(), strName); |
| | | m_strOutputNames.push_back(strName); |
| | | getOutputName(m_session, strName,1); |
| | | getOutputName(m_session.get(), strName,1); |
| | | m_strOutputNames.push_back(strName); |
| | | |
| | | for (auto& item : m_strInputNames) |
| | |
| | | |
| | | ModelImp::~ModelImp() |
| | | { |
| | | //if(fe) |
| | | // delete fe; |
| | | if (m_session) |
| | | { |
| | | delete m_session; |
| | | m_session = nullptr; |
| | | } |
| | | if(vocab) |
| | | delete vocab; |
| | | } |
| | | |
| | | void ModelImp::reset() |
| | | { |
| | | //fe->reset(); |
| | | printf("Not Imp!!!!!!\n"); |
| | | } |
| | | |
| | | void ModelImp::apply_lfr(Tensor<float>*& din) |
| | | { |
| | | int mm = din->size[2]; |
| | | int ll = ceil(mm / 6.0); |
| | | Tensor<float>* tmp = new Tensor<float>(ll, 560); |
| | | int out_offset = 0; |
| | | for (int i = 0; i < ll; i++) { |
| | | for (int j = 0; j < 7; j++) { |
| | | int idx = i * 6 + j - 3; |
| | | if (idx < 0) { |
| | | idx = 0; |
| | | } |
| | | if (idx >= mm) { |
| | | idx = mm - 1; |
| | | } |
| | | memcpy(tmp->buff + out_offset, din->buff + idx * 80, |
| | | sizeof(float) * 80); |
| | | out_offset += 80; |
| | | } |
| | | vector<std::vector<int>> ModelImp::vad_seg(std::vector<float>& pcm_data){ |
| | | return vadHandle->infer(pcm_data); |
| | | } |
| | | |
| | | vector<float> ModelImp::FbankKaldi(float sample_rate, const float* waves, int len) { |
| | | knf::OnlineFbank fbank_(fbank_opts); |
| | | fbank_.AcceptWaveform(sample_rate, waves, len); |
| | | //fbank_->InputFinished(); |
| | | int32_t frames = fbank_.NumFramesReady(); |
| | | int32_t feature_dim = fbank_opts.mel_opts.num_bins; |
| | | vector<float> features(frames * feature_dim); |
| | | float *p = features.data(); |
| | | |
| | | for (int32_t i = 0; i != frames; ++i) { |
| | | const float *f = fbank_.GetFrame(i); |
| | | std::copy(f, f + feature_dim, p); |
| | | p += feature_dim; |
| | | } |
| | | delete din; |
| | | din = tmp; |
| | | |
| | | return features; |
| | | } |
| | | |
| | | void ModelImp::load_cmvn(const char *filename) |
| | |
| | | } |
| | | } |
| | | |
| | | void ModelImp::apply_cmvn(Tensor<float>* din) |
| | | { |
| | | const float* var; |
| | | const float* mean; |
| | | var = vars_list.data(); |
| | | mean= means_list.data(); |
| | | |
| | | int m = din->size[2]; |
| | | int n = din->size[3]; |
| | | |
| | | for (int i = 0; i < m; i++) { |
| | | for (int j = 0; j < n; j++) { |
| | | int idx = i * n + j; |
| | | din->buff[idx] = (din->buff[idx] + mean[j]) * var[j]; |
| | | } |
| | | } |
| | | } |
| | | |
| | | string ModelImp::greedy_search(float * in, int nLen ) |
| | | { |
| | | vector<int> hyps; |
| | |
| | | return vocab->vector2stringV2(hyps); |
| | | } |
| | | |
| | | vector<float> ModelImp::ApplyLFR(const std::vector<float> &in) |
| | | { |
| | | int32_t in_feat_dim = fbank_opts.mel_opts.num_bins; |
| | | int32_t in_num_frames = in.size() / in_feat_dim; |
| | | int32_t out_num_frames = |
| | | (in_num_frames - lfr_window_size) / lfr_window_shift + 1; |
| | | int32_t out_feat_dim = in_feat_dim * lfr_window_size; |
| | | |
| | | std::vector<float> out(out_num_frames * out_feat_dim); |
| | | |
| | | const float *p_in = in.data(); |
| | | float *p_out = out.data(); |
| | | |
| | | for (int32_t i = 0; i != out_num_frames; ++i) { |
| | | std::copy(p_in, p_in + out_feat_dim, p_out); |
| | | |
| | | p_out += out_feat_dim; |
| | | p_in += lfr_window_shift * in_feat_dim; |
| | | } |
| | | |
| | | return out; |
| | | } |
| | | |
| | | void ModelImp::ApplyCMVN(std::vector<float> *v) |
| | | { |
| | | int32_t dim = means_list.size(); |
| | | int32_t num_frames = v->size() / dim; |
| | | |
| | | float *p = v->data(); |
| | | |
| | | for (int32_t i = 0; i != num_frames; ++i) { |
| | | for (int32_t k = 0; k != dim; ++k) { |
| | | p[k] = (p[k] + means_list[k]) * vars_list[k]; |
| | | } |
| | | |
| | | p += dim; |
| | | } |
| | | } |
| | | |
| | | string ModelImp::forward(float* din, int len, int flag) |
| | | { |
| | | Tensor<float>* in; |
| | | FeatureExtract* fe = new FeatureExtract(3); |
| | | fe->reset(); |
| | | fe->insert(din, len, flag); |
| | | fe->fetch(in); |
| | | apply_lfr(in); |
| | | apply_cmvn(in); |
| | | Ort::RunOptions run_option; |
| | | |
| | | std::array<int64_t, 3> input_shape_{ in->size[0],in->size[2],in->size[3] }; |
| | | int32_t in_feat_dim = fbank_opts.mel_opts.num_bins; |
| | | std::vector<float> wav_feats = FbankKaldi(model_sample_rate, din, len); |
| | | wav_feats = ApplyLFR(wav_feats); |
| | | ApplyCMVN(&wav_feats); |
| | | |
| | | int32_t feat_dim = lfr_window_size*in_feat_dim; |
| | | int32_t num_frames = wav_feats.size() / feat_dim; |
| | | |
| | | #ifdef _WIN_X86 |
| | | Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU); |
| | | #else |
| | | Ort::MemoryInfo m_memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); |
| | | #endif |
| | | |
| | | const int64_t input_shape_[3] = {1, num_frames, feat_dim}; |
| | | Ort::Value onnx_feats = Ort::Value::CreateTensor<float>(m_memoryInfo, |
| | | in->buff, |
| | | in->buff_size, |
| | | input_shape_.data(), |
| | | input_shape_.size()); |
| | | wav_feats.data(), |
| | | wav_feats.size(), |
| | | input_shape_, |
| | | 3); |
| | | |
| | | std::vector<int32_t> feats_len{ in->size[2] }; |
| | | std::vector<int64_t> feats_len_dim{ 1 }; |
| | | Ort::Value onnx_feats_len = Ort::Value::CreateTensor( |
| | | m_memoryInfo, |
| | | feats_len.data(), |
| | | feats_len.size() * sizeof(int32_t), |
| | | feats_len_dim.data(), |
| | | feats_len_dim.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32); |
| | | const int64_t paraformer_length_shape[1] = {1}; |
| | | std::vector<int32_t> paraformer_length; |
| | | paraformer_length.emplace_back(num_frames); |
| | | Ort::Value onnx_feats_len = Ort::Value::CreateTensor<int32_t>( |
| | | m_memoryInfo, paraformer_length.data(), paraformer_length.size(), paraformer_length_shape, 1); |
| | | |
| | | std::vector<Ort::Value> input_onnx; |
| | | input_onnx.emplace_back(std::move(onnx_feats)); |
| | | input_onnx.emplace_back(std::move(onnx_feats_len)); |
| | | |
| | | string result; |
| | | try { |
| | | |
| | | auto outputTensor = m_session->Run(run_option, m_szInputNames.data(), input_onnx.data(), m_szInputNames.size(), m_szOutputNames.data(), m_szOutputNames.size()); |
| | | auto outputTensor = m_session->Run(Ort::RunOptions{nullptr}, m_szInputNames.data(), input_onnx.data(), input_onnx.size(), m_szOutputNames.data(), m_szOutputNames.size()); |
| | | std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape(); |
| | | |
| | | int64_t outputCount = std::accumulate(outputShape.begin(), outputShape.end(), 1, std::multiplies<int64_t>()); |
| | |
| | | catch (...) |
| | | { |
| | | result = ""; |
| | | } |
| | | |
| | | if(in){ |
| | | delete in; |
| | | in = nullptr; |
| | | } |
| | | if(fe){ |
| | | delete fe; |
| | | fe = nullptr; |
| | | } |
| | | |
| | | return result; |