From fd22b6e7f36e963ef29dbd3eafb0e0d6f2e12fa7 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 09 八月 2023 14:27:20 +0800
Subject: [PATCH] Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main
---
funasr/runtime/onnxruntime/src/fsmn-vad-online.cpp | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/fsmn-vad-online.cpp b/funasr/runtime/onnxruntime/src/fsmn-vad-online.cpp
index 0346916..e16a1fc 100644
--- a/funasr/runtime/onnxruntime/src/fsmn-vad-online.cpp
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad-online.cpp
@@ -55,7 +55,7 @@
int frame_from_waves = (waves.size() - frame_sample_length_) / frame_shift_sample_length_ + 1;
int minus_frame = reserve_waveforms_.empty() ? (lfr_m - 1) / 2 : 0;
int lfr_splice_frame_idxs = OnlineLfrCmvn(vad_feats, input_finished);
- int reserve_frame_idx = lfr_splice_frame_idxs - minus_frame;
+ int reserve_frame_idx = std::abs(lfr_splice_frame_idxs - minus_frame);
reserve_waveforms_.clear();
reserve_waveforms_.insert(reserve_waveforms_.begin(),
waves.begin() + reserve_frame_idx * frame_shift_sample_length_,
@@ -86,7 +86,7 @@
int FsmnVadOnline::OnlineLfrCmvn(vector<vector<float>> &vad_feats, bool input_finished) {
vector<vector<float>> out_feats;
int T = vad_feats.size();
- int T_lrf = ceil((T - (lfr_m - 1) / 2) / lfr_n);
+ int T_lrf = ceil((T - (lfr_m - 1) / 2) / (float)lfr_n);
int lfr_splice_frame_idxs = T_lrf;
vector<float> p;
for (int i = 0; i < T_lrf; i++) {
@@ -175,6 +175,9 @@
vad_silence_duration_ = vad_silence_duration;
vad_max_len_ = vad_max_len;
vad_speech_noise_thres_ = vad_speech_noise_thres;
+
+ // 2pass
+ audio_handle = make_unique<Audio>(1);
}
FsmnVadOnline::~FsmnVadOnline() {
--
Gitblit v1.9.1