From fbd9fbbde066a483fb903fe9c6c76fb95bc6fc2b Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期四, 17 八月 2023 17:13:37 +0800
Subject: [PATCH] update timestamp

---
 funasr/runtime/onnxruntime/src/paraformer.h |   79 ++++++++++++++++++++++++++-------------
 1 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/paraformer.h b/funasr/runtime/onnxruntime/src/paraformer.h
index 3aa7057..0dd55b5 100644
--- a/funasr/runtime/onnxruntime/src/paraformer.h
+++ b/funasr/runtime/onnxruntime/src/paraformer.h
@@ -4,12 +4,9 @@
 */
 #pragma once
 
-#ifndef PARAFORMER_MODELIMP_H
-#define PARAFORMER_MODELIMP_H
-
 #include "precomp.h"
 
-namespace paraformer {
+namespace funasr {
 
     class Paraformer : public Model {
     /**
@@ -18,39 +15,69 @@
      * https://arxiv.org/pdf/2206.08317.pdf
     */
     private:
-        //std::unique_ptr<knf::OnlineFbank> fbank_;
-        knf::FbankOptions fbank_opts;
+        Vocab* vocab = nullptr;
+        //const float scale = 22.6274169979695;
+        const float scale = 1.0;
 
-        Vocab* vocab;
-        vector<float> means_list;
-        vector<float> vars_list;
-        const float scale = 22.6274169979695;
-        int32_t lfr_window_size = 7;
-        int32_t lfr_window_shift = 6;
-
+        void LoadOnlineConfigFromYaml(const char* filename);
         void LoadCmvn(const char *filename);
         vector<float> ApplyLfr(const vector<float> &in);
         void ApplyCmvn(vector<float> *v);
-        string GreedySearch( float* in, int n_len, int64_t token_nums);
-
-        std::shared_ptr<Ort::Session> m_session;
-        Ort::Env env_;
-        Ort::SessionOptions session_options;
-
-        vector<string> m_strInputNames, m_strOutputNames;
-        vector<const char*> m_szInputNames;
-        vector<const char*> m_szOutputNames;
 
     public:
         Paraformer();
         ~Paraformer();
         void InitAsr(const std::string &am_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
+        // online
+        void InitAsr(const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
+        // 2pass
+        void InitAsr(const std::string &am_model, const std::string &en_model, const std::string &de_model, const std::string &am_cmvn, const std::string &am_config, int thread_num);
         void Reset();
         vector<float> FbankKaldi(float sample_rate, const float* waves, int len);
-        string ForwardChunk(float* din, int len, int flag);
-        string Forward(float* din, int len, int flag);
+        string Forward(float* din, int len, bool input_finished=true);
+        string GreedySearch( float* in, int n_len, int64_t token_nums, bool is_stamp=false, std::vector<float> us_alphas={0}, std::vector<float> us_cif_peak={0});
+        void TimestampOnnx(std::vector<float> &us_alphas, vector<float> us_cif_peak, vector<string>& char_list, std::string &res_str, 
+                           vector<vector<float>> &timestamp_list, float begin_time = 0.0, float total_offset = -1.5);
+        string PostProcess(std::vector<string> &raw_char, std::vector<std::vector<float>> &timestamp_list);
         string Rescoring();
+
+        knf::FbankOptions fbank_opts_;
+        vector<float> means_list_;
+        vector<float> vars_list_;
+        int lfr_m = PARA_LFR_M;
+        int lfr_n = PARA_LFR_N;
+
+        // paraformer-offline
+        std::shared_ptr<Ort::Session> m_session_ = nullptr;
+        Ort::Env env_;
+        Ort::SessionOptions session_options_;
+
+        vector<string> m_strInputNames, m_strOutputNames;
+        vector<const char*> m_szInputNames;
+        vector<const char*> m_szOutputNames;
+
+        // paraformer-online
+        std::shared_ptr<Ort::Session> encoder_session_ = nullptr;
+        std::shared_ptr<Ort::Session> decoder_session_ = nullptr;
+        vector<string> en_strInputNames, en_strOutputNames;
+        vector<const char*> en_szInputNames_;
+        vector<const char*> en_szOutputNames_;
+        vector<string> de_strInputNames, de_strOutputNames;
+        vector<const char*> de_szInputNames_;
+        vector<const char*> de_szOutputNames_;
+        
+        string window_type = "hamming";
+        int frame_length = 25;
+        int frame_shift = 10;
+        int n_mels = 80;
+        int encoder_size = 512;
+        int fsmn_layers = 16;
+        int fsmn_lorder = 10;
+        int fsmn_dims = 512;
+        float cif_threshold = 1.0;
+        float tail_alphas = 0.45;
+
+
     };
 
-} // namespace paraformer
-#endif
+} // namespace funasr

--
Gitblit v1.9.1