From c2dee5e3c29eba79e591d9e9caebaef15ea4e56b Mon Sep 17 00:00:00 2001
From: hnluo <haoneng.lhn@alibaba-inc.com>
Date: 星期四, 29 六月 2023 11:09:28 +0800
Subject: [PATCH] Merge pull request #687 from alibaba-damo-academy/dev_lhn

---
 funasr/runtime/onnxruntime/include/audio.h |   51 +++++++++++++++++++++++++++++----------------------
 1 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/funasr/runtime/onnxruntime/include/audio.h b/funasr/runtime/onnxruntime/include/audio.h
index 9e25aac..d2100a4 100644
--- a/funasr/runtime/onnxruntime/include/audio.h
+++ b/funasr/runtime/onnxruntime/include/audio.h
@@ -1,16 +1,17 @@
-
 #ifndef AUDIO_H
 #define AUDIO_H
 
 #include <queue>
 #include <stdint.h>
-#include "model.h"
+#include "vad-model.h"
+#include "offline-stream.h"
 
 #ifndef WAV_HEADER_SIZE
 #define WAV_HEADER_SIZE 44
 #endif
 
 using namespace std;
+namespace funasr {
 
 class AudioFrame {
   private:
@@ -23,17 +24,18 @@
     AudioFrame(int len);
 
     ~AudioFrame();
-    int set_start(int val);
-    int set_end(int val);
-    int get_start();
-    int get_len();
-    int disp();
+    int SetStart(int val);
+    int SetEnd(int val);
+    int GetStart();
+    int GetLen();
+    int Disp();
 };
 
 class Audio {
   private:
-    float *speech_data;
-    int16_t *speech_buff;
+    float *speech_data=nullptr;
+    int16_t *speech_buff=nullptr;
+    char* speech_char=nullptr;
     int speech_len;
     int speech_align_len;
     int offset;
@@ -45,19 +47,24 @@
     Audio(int data_type);
     Audio(int data_type, int size);
     ~Audio();
-    void disp();
-    bool loadwav(const char* filename, int32_t* sampling_rate);
-    void wavResample(int32_t sampling_rate, const float *waveform, int32_t n);
-    bool loadwav(const char* buf, int nLen, int32_t* sampling_rate);
-    bool loadpcmwav(const char* buf, int nFileLen, int32_t* sampling_rate);
-    bool loadpcmwav(const char* filename, int32_t* sampling_rate);
-    int fetch_chunck(float *&dout, int len);
-    int fetch(float *&dout, int &len, int &flag);
-    void padding();
-    void split(Model* pRecogObj);
-    float get_time_len();
-
-    int get_queue_size() { return (int)frame_queue.size(); }
+    void Disp();
+    void WavResample(int32_t sampling_rate, const float *waveform, int32_t n);
+    bool LoadWav(const char* buf, int n_len, int32_t* sampling_rate);
+    bool LoadWav(const char* filename, int32_t* sampling_rate);
+    bool LoadWav2Char(const char* filename, int32_t* sampling_rate);
+    bool LoadPcmwav(const char* buf, int n_file_len, int32_t* sampling_rate);
+    bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
+    bool LoadPcmwav2Char(const char* filename, int32_t* sampling_rate);
+    int FetchChunck(float *&dout, int len);
+    int Fetch(float *&dout, int &len, int &flag);
+    void Padding();
+    void Split(OfflineStream* offline_streamj);
+    void Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished=true);
+    float GetTimeLen();
+    int GetQueueSize() { return (int)frame_queue.size(); }
+    char* GetSpeechChar(){return speech_char;}
+    int GetSpeechLen(){return speech_len;}
 };
 
+} // namespace funasr
 #endif

--
Gitblit v1.9.1