From 3372b13d24aceef7002cfa0fc8222b3085c15110 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期五, 02 六月 2023 22:02:31 +0800
Subject: [PATCH] add fsmn-vad-online
---
funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp | 75 +++++++++++++++++++++++++++++--------
1 files changed, 58 insertions(+), 17 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/funasr-onnx-offline-vad.cpp b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
similarity index 67%
copy from funasr/runtime/onnxruntime/src/funasr-onnx-offline-vad.cpp
copy to funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
index 0f606c6..d9944a0 100644
--- a/funasr/runtime/onnxruntime/src/funasr-onnx-offline-vad.cpp
+++ b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
@@ -18,6 +18,7 @@
#include "funasrruntime.h"
#include "tclap/CmdLine.h"
#include "com-define.h"
+#include "audio.h"
using namespace std;
@@ -39,9 +40,15 @@
}
void print_segs(vector<vector<int>>* vec) {
+ if((*vec).size() == 0){
+ return;
+ }
string seg_out="[";
for (int i = 0; i < vec->size(); i++) {
vector<int> inner_vec = (*vec)[i];
+ if(inner_vec.size() == 0){
+ continue;
+ }
seg_out += "[";
for (int j = 0; j < inner_vec.size(); j++) {
seg_out += to_string(inner_vec[j]);
@@ -120,32 +127,66 @@
LOG(ERROR)<<"Please check the wav extension!";
exit(-1);
}
-
+ // init online features
+ FUNASR_HANDLE online_hanlde=FsmnVadOnlineInit(vad_hanlde);
float snippet_time = 0.0f;
long taking_micros = 0;
for(auto& wav_file : wav_list){
- gettimeofday(&start, NULL);
- FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), FSMN_VAD_OFFLINE, NULL, 16000);
- gettimeofday(&end, NULL);
- seconds = (end.tv_sec - start.tv_sec);
- taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
- if (result)
- {
- vector<std::vector<int>>* vad_segments = FsmnVadGetResult(result, 0);
- print_segs(vad_segments);
- snippet_time += FsmnVadGetRetSnippetTime(result);
- FsmnVadFreeResult(result);
- }
- else
- {
- LOG(ERROR) << ("No return data!\n");
+ int32_t sampling_rate_ = -1;
+ funasr::Audio audio(1);
+ if(is_target_file(wav_file.c_str(), "wav")){
+ int32_t sampling_rate_ = -1;
+ if(!audio.LoadWav2Char(wav_file.c_str(), &sampling_rate_)){
+ LOG(ERROR)<<"Failed to load "<< wav_file;
+ exit(-1);
+ }
+ }else if(is_target_file(wav_file.c_str(), "pcm")){
+ if (!audio.LoadPcmwav2Char(wav_file.c_str(), &sampling_rate_)){
+ LOG(ERROR)<<"Failed to load "<< wav_file;
+ exit(-1);
+ }
+ }else{
+ LOG(ERROR)<<"Wrong wav extension";
+ exit(-1);
+ }
+ char* speech_buff = audio.GetSpeechChar();
+ int buff_len = audio.GetSpeechLen()*2;
+
+ int step = 3200;
+ bool is_final = false;
+
+ for (int sample_offset = 0; sample_offset < buff_len; sample_offset += std::min(step, buff_len - sample_offset)) {
+ if (sample_offset + step >= buff_len - 1) {
+ step = buff_len - sample_offset;
+ is_final = true;
+ } else {
+ is_final = false;
+ }
+ gettimeofday(&start, NULL);
+ FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, NULL, is_final, 16000);
+ gettimeofday(&end, NULL);
+ seconds = (end.tv_sec - start.tv_sec);
+ taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
+
+ if (result)
+ {
+ vector<std::vector<int>>* vad_segments = FsmnVadGetResult(result, 0);
+ print_segs(vad_segments);
+ snippet_time += FsmnVadGetRetSnippetTime(result);
+ FsmnVadFreeResult(result);
+ }
+ else
+ {
+ LOG(ERROR) << ("No return data!\n");
+ }
}
}
-
+
LOG(INFO) << "Audio length: " << (double)snippet_time << " s";
LOG(INFO) << "Model inference takes: " << (double)taking_micros / 1000000 <<" s";
LOG(INFO) << "Model inference RTF: " << (double)taking_micros/ (snippet_time*1000000);
+ FsmnVadUninit(online_hanlde);
FsmnVadUninit(vad_hanlde);
return 0;
}
--
Gitblit v1.9.1