From b454a1054fadbff0ee963944ff42f66b98317582 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期二, 08 八月 2023 11:17:43 +0800
Subject: [PATCH] update online runtime, including vad-online, paraformer-online, punc-online,2pass (#815)

---
 funasr/runtime/onnxruntime/include/audio.h |   34 ++++++++++++++++++++++++++++++----
 1 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/funasr/runtime/onnxruntime/include/audio.h b/funasr/runtime/onnxruntime/include/audio.h
index a1b6312..c8ca876 100644
--- a/funasr/runtime/onnxruntime/include/audio.h
+++ b/funasr/runtime/onnxruntime/include/audio.h
@@ -5,6 +5,7 @@
 #include <stdint.h>
 #include "vad-model.h"
 #include "offline-stream.h"
+#include "com-define.h"
 
 #ifndef WAV_HEADER_SIZE
 #define WAV_HEADER_SIZE 44
@@ -17,11 +18,13 @@
   private:
     int start;
     int end;
-    int len;
+
 
   public:
     AudioFrame();
     AudioFrame(int len);
+    AudioFrame(const AudioFrame &other);
+    AudioFrame(int start, int end, bool is_final);
 
     ~AudioFrame();
     int SetStart(int val);
@@ -29,6 +32,10 @@
     int GetStart();
     int GetLen();
     int Disp();
+    // 2pass
+    bool is_final = false;
+    float* data = nullptr;
+    int len;
 };
 
 class Audio {
@@ -38,10 +45,11 @@
     char* speech_char=nullptr;
     int speech_len;
     int speech_align_len;
-    int offset;
     float align_size;
     int data_type;
     queue<AudioFrame *> frame_queue;
+    queue<AudioFrame *> asr_online_queue;
+    queue<AudioFrame *> asr_offline_queue;
 
   public:
     Audio(int data_type);
@@ -56,17 +64,35 @@
     bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
     bool LoadPcmwav2Char(const char* filename, int32_t* sampling_rate);
     bool LoadOthers2Char(const char* filename);
-    bool FfmpegLoad(const char *filename);
+    bool FfmpegLoad(const char *filename, bool copy2char=false);
     bool FfmpegLoad(const char* buf, int n_file_len);
-    int FetchChunck(float *&dout, int len);
+    int FetchChunck(AudioFrame *&frame);
+    int FetchTpass(AudioFrame *&frame);
     int Fetch(float *&dout, int &len, int &flag);
     void Padding();
     void Split(OfflineStream* offline_streamj);
     void Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished=true);
+    void Split(VadModel* vad_obj, int chunk_len, bool input_finished=true, ASR_TYPE asr_mode=ASR_TWO_PASS);
     float GetTimeLen();
     int GetQueueSize() { return (int)frame_queue.size(); }
     char* GetSpeechChar(){return speech_char;}
     int GetSpeechLen(){return speech_len;}
+
+    // 2pass
+    vector<float> all_samples;
+    int offset = 0;
+    int speech_start=-1, speech_end=0;
+    int speech_offline_start=-1;
+
+    int seg_sample = MODEL_SAMPLE_RATE/1000;
+    bool LoadPcmwavOnline(const char* buf, int n_file_len, int32_t* sampling_rate);
+    void ResetIndex(){
+      speech_start=-1;
+      speech_end=0;
+      speech_offline_start=-1;
+      offset = 0;
+      all_samples.clear();
+    }
 };
 
 } // namespace funasr

--
Gitblit v1.9.1