From 654af12d8d73b4ff1504451e072be1297bbae0ca Mon Sep 17 00:00:00 2001
From: lyblsgo <wucong.lyb@alibaba-inc.com>
Date: 星期三, 11 十月 2023 19:48:17 +0800
Subject: [PATCH] Merge remote-tracking branch 'origin/main'
---
funasr/runtime/onnxruntime/src/audio.cpp | 44 ++++++++++++++++++++++++++++++++++++++------
1 files changed, 38 insertions(+), 6 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/audio.cpp b/funasr/runtime/onnxruntime/src/audio.cpp
index a882078..6bc6015 100644
--- a/funasr/runtime/onnxruntime/src/audio.cpp
+++ b/funasr/runtime/onnxruntime/src/audio.cpp
@@ -9,6 +9,14 @@
#include "audio.h"
#include "precomp.h"
+#ifdef _MSC_VER
+#pragma warning(disable:4996)
+#endif
+
+#if defined(__APPLE__)
+#include <string.h>
+#else
+
extern "C" {
#include <libavutil/opt.h>
#include <libavcodec/avcodec.h>
@@ -17,6 +25,10 @@
#include <libavutil/samplefmt.h>
#include <libswresample/swresample.h>
}
+
+#endif
+
+
using namespace std;
@@ -245,6 +257,9 @@
}
bool Audio::FfmpegLoad(const char *filename, bool copy2char){
+#if defined(__APPLE__)
+ return false;
+#else
// from file
AVFormatContext* formatContext = avformat_alloc_context();
if (avformat_open_input(&formatContext, filename, NULL, NULL) != 0) {
@@ -403,12 +418,15 @@
}
else
return false;
-
+#endif
}
bool Audio::FfmpegLoad(const char* buf, int n_file_len){
+#if defined(__APPLE__)
+ return false;
+#else
// from buf
- char* buf_copy = (char *)malloc(n_file_len);
+ void* buf_copy = av_malloc(n_file_len);
memcpy(buf_copy, buf, n_file_len);
AVIOContext* avio_ctx = avio_alloc_context(
@@ -577,7 +595,7 @@
}
else
return false;
-
+#endif
}
@@ -1097,6 +1115,8 @@
if(asr_mode != ASR_OFFLINE){
if(buff_len >= step){
frame = new AudioFrame(step);
+ frame->global_start = speech_start;
+ frame->global_end = speech_start + step/seg_sample;
frame->data = (float*)malloc(sizeof(float) * step);
memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
asr_online_queue.push(frame);
@@ -1123,6 +1143,8 @@
if(asr_mode != ASR_OFFLINE){
frame = new AudioFrame(end-start);
frame->is_final = true;
+ frame->global_start = speech_start_i;
+ frame->global_end = speech_end_i;
frame->data = (float*)malloc(sizeof(float) * (end-start));
memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
asr_online_queue.push(frame);
@@ -1132,6 +1154,8 @@
if(asr_mode != ASR_ONLINE){
frame = new AudioFrame(end-start);
frame->is_final = true;
+ frame->global_start = speech_start_i;
+ frame->global_end = speech_end_i;
frame->data = (float*)malloc(sizeof(float) * (end-start));
memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
asr_offline_queue.push(frame);
@@ -1153,6 +1177,8 @@
if(asr_mode != ASR_OFFLINE){
if(buff_len >= step){
frame = new AudioFrame(step);
+ frame->global_start = speech_start;
+ frame->global_end = speech_start + step/seg_sample;
frame->data = (float*)malloc(sizeof(float) * step);
memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
asr_online_queue.push(frame);
@@ -1163,8 +1189,8 @@
}else if(speech_end_i != -1){ // [-1,100]
if(speech_start == -1 or speech_offline_start == -1){
- LOG(ERROR) <<"Vad start is null while vad end is available." ;
- exit(-1);
+ LOG(ERROR) <<"Vad start is null while vad end is available. Set vad start 0" ;
+ speech_start = 0;
}
int start = speech_start*seg_sample;
@@ -1176,6 +1202,8 @@
if(asr_mode != ASR_ONLINE){
frame = new AudioFrame(end-offline_start);
frame->is_final = true;
+ frame->global_start = speech_offline_start;
+ frame->global_end = speech_end_i;
frame->data = (float*)malloc(sizeof(float) * (end-offline_start));
memcpy(frame->data, all_samples.data()+offline_start-offset, (end-offline_start)*sizeof(float));
asr_offline_queue.push(frame);
@@ -1192,6 +1220,8 @@
}
frame = new AudioFrame(step);
frame->is_final = is_final;
+ frame->global_start = (int)((start+sample_offset)/seg_sample);
+ frame->global_end = frame->global_start + step/seg_sample;
frame->data = (float*)malloc(sizeof(float) * step);
memcpy(frame->data, all_samples.data()+start-offset+sample_offset, step*sizeof(float));
asr_online_queue.push(frame);
@@ -1200,6 +1230,8 @@
}else{
frame = new AudioFrame(0);
frame->is_final = true;
+ frame->global_start = speech_start; // in this case start >= end
+ frame->global_end = speech_end_i;
asr_online_queue.push(frame);
frame = NULL;
}
@@ -1229,4 +1261,4 @@
}
-} // namespace funasr
\ No newline at end of file
+} // namespace funasr
--
Gitblit v1.9.1