From b454a1054fadbff0ee963944ff42f66b98317582 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期二, 08 八月 2023 11:17:43 +0800
Subject: [PATCH] update online runtime, including vad-online, paraformer-online, punc-online,2pass (#815)
---
funasr/runtime/onnxruntime/include/audio.h | 34 ++++++++++++++++++++++++++++++----
1 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/funasr/runtime/onnxruntime/include/audio.h b/funasr/runtime/onnxruntime/include/audio.h
index a1b6312..c8ca876 100644
--- a/funasr/runtime/onnxruntime/include/audio.h
+++ b/funasr/runtime/onnxruntime/include/audio.h
@@ -5,6 +5,7 @@
#include <stdint.h>
#include "vad-model.h"
#include "offline-stream.h"
+#include "com-define.h"
#ifndef WAV_HEADER_SIZE
#define WAV_HEADER_SIZE 44
@@ -17,11 +18,13 @@
private:
int start;
int end;
- int len;
+
public:
AudioFrame();
AudioFrame(int len);
+ AudioFrame(const AudioFrame &other);
+ AudioFrame(int start, int end, bool is_final);
~AudioFrame();
int SetStart(int val);
@@ -29,6 +32,10 @@
int GetStart();
int GetLen();
int Disp();
+ // 2pass
+ bool is_final = false;
+ float* data = nullptr;
+ int len;
};
class Audio {
@@ -38,10 +45,11 @@
char* speech_char=nullptr;
int speech_len;
int speech_align_len;
- int offset;
float align_size;
int data_type;
queue<AudioFrame *> frame_queue;
+ queue<AudioFrame *> asr_online_queue;
+ queue<AudioFrame *> asr_offline_queue;
public:
Audio(int data_type);
@@ -56,17 +64,35 @@
bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
bool LoadPcmwav2Char(const char* filename, int32_t* sampling_rate);
bool LoadOthers2Char(const char* filename);
- bool FfmpegLoad(const char *filename);
+ bool FfmpegLoad(const char *filename, bool copy2char=false);
bool FfmpegLoad(const char* buf, int n_file_len);
- int FetchChunck(float *&dout, int len);
+ int FetchChunck(AudioFrame *&frame);
+ int FetchTpass(AudioFrame *&frame);
int Fetch(float *&dout, int &len, int &flag);
void Padding();
void Split(OfflineStream* offline_streamj);
void Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished=true);
+ void Split(VadModel* vad_obj, int chunk_len, bool input_finished=true, ASR_TYPE asr_mode=ASR_TWO_PASS);
float GetTimeLen();
int GetQueueSize() { return (int)frame_queue.size(); }
char* GetSpeechChar(){return speech_char;}
int GetSpeechLen(){return speech_len;}
+
+ // 2pass
+ vector<float> all_samples;
+ int offset = 0;
+ int speech_start=-1, speech_end=0;
+ int speech_offline_start=-1;
+
+ int seg_sample = MODEL_SAMPLE_RATE/1000;
+ bool LoadPcmwavOnline(const char* buf, int n_file_len, int32_t* sampling_rate);
+ void ResetIndex(){
+ speech_start=-1;
+ speech_end=0;
+ speech_offline_start=-1;
+ offset = 0;
+ all_samples.clear();
+ }
};
} // namespace funasr
--
Gitblit v1.9.1