From fd22b6e7f36e963ef29dbd3eafb0e0d6f2e12fa7 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 09 八月 2023 14:27:20 +0800
Subject: [PATCH] Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
---
funasr/runtime/onnxruntime/src/paraformer.h | 91 +++++++++++++++++++++++++++++----------------
1 files changed, 59 insertions(+), 32 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/paraformer.h b/funasr/runtime/onnxruntime/src/paraformer.h
index e29a4a9..16460bf 100644
--- a/funasr/runtime/onnxruntime/src/paraformer.h
+++ b/funasr/runtime/onnxruntime/src/paraformer.h
@@ -1,53 +1,80 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License (https://opensource.org/licenses/MIT)
+*/
#pragma once
-
-
-#ifndef PARAFORMER_MODELIMP_H
-#define PARAFORMER_MODELIMP_H
#include "precomp.h"
-namespace paraformer {
+namespace funasr {
class Paraformer : public Model {
+ /**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
+ * https://arxiv.org/pdf/2206.08317.pdf
+ */
private:
- //std::unique_ptr<knf::OnlineFbank> fbank_;
- knf::FbankOptions fbank_opts;
+ Vocab* vocab = nullptr;
+ //const float scale = 22.6274169979695;
+ const float scale = 1.0;
- std::unique_ptr<FsmnVad> vad_handle;
- std::unique_ptr<CTTransformer> punc_handle;
-
- Vocab* vocab;
- vector<float> means_list;
- vector<float> vars_list;
- const float scale = 22.6274169979695;
- int32_t lfr_window_size = 7;
- int32_t lfr_window_shift = 6;
-
+ void LoadOnlineConfigFromYaml(const char* filename);
void LoadCmvn(const char *filename);
vector<float> ApplyLfr(const vector<float> &in);
void ApplyCmvn(vector<float> *v);
- string GreedySearch( float* in, int n_len);
+ public:
+ Paraformer();
+ ~Paraformer();
+ void InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
+ // online
+ void InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
+ // 2pass
+ void InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
+ void Reset();
+ vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
+ string Forward(float* din, int len, bool input_finished=true);
+ string GreedySearch( float* in, int n_len, int64_t token_nums);
+ string Rescoring();
- std::shared_ptr<Ort::Session> m_session;
+ knf::FbankOptions fbank_opts_;
+ vector<float> means_list_;
+ vector<float> vars_list_;
+ int lfr_m = PARA_LFR_M;
+ int lfr_n = PARA_LFR_N;
+
+ // paraformer-offline
+ std::shared_ptr<Ort::Session> m_session_ = nullptr;
Ort::Env env_;
- Ort::SessionOptions session_options;
+ Ort::SessionOptions session_options_;
vector<string> m_strInputNames, m_strOutputNames;
vector<const char*> m_szInputNames;
vector<const char*> m_szOutputNames;
- public:
- Paraformer(const char* path, int thread_num=0, bool quantize=false, bool use_vad=false, bool use_punc=false);
- ~Paraformer();
- void Reset();
- vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
- string ForwardChunk(float* din, int len, int flag);
- string Forward(float* din, int len, int flag);
- string Rescoring();
- std::vector<std::vector<int>> VadSeg(std::vector<float>& pcm_data);
- string AddPunc(const char* sz_input);
+ // paraformer-online
+ std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
+ std::shared_ptr<Ort::Session> decoder_session_ = nullptr;
+ vector<string> en_strInputNames, en_strOutputNames;
+ vector<const char*> en_szInputNames_;
+ vector<const char*> en_szOutputNames_;
+ vector<string> de_strInputNames, de_strOutputNames;
+ vector<const char*> de_szInputNames_;
+ vector<const char*> de_szOutputNames_;
+
+ string window_type = "hamming";
+ int frame_length = 25;
+ int frame_shift = 10;
+ int n_mels = 80;
+ int encoder_size = 512;
+ int fsmn_layers = 16;
+ int fsmn_lorder = 10;
+ int fsmn_dims = 512;
+ float cif_threshold = 1.0;
+ float tail_alphas = 0.45;
+
+
};
-} // namespace paraformer
-#endif
+} // namespace funasr
--
Gitblit v1.9.1