From b9bcf1f093c3053fdc4e2cf4a1d38e27bbf429fb Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 19 十月 2023 14:03:48 +0800
Subject: [PATCH] docs
---
funasr/runtime/onnxruntime/src/audio.cpp | 56 +++++++++++++++++++++++++++++++++++++++-----------------
1 files changed, 39 insertions(+), 17 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/audio.cpp b/funasr/runtime/onnxruntime/src/audio.cpp
index 94fcefc..76a01f9 100644
--- a/funasr/runtime/onnxruntime/src/audio.cpp
+++ b/funasr/runtime/onnxruntime/src/audio.cpp
@@ -9,6 +9,9 @@
#include "audio.h"
#include "precomp.h"
+#ifdef _MSC_VER
+#pragma warning(disable:4996)
+#endif
#if defined(__APPLE__)
#include <string.h>
@@ -260,14 +263,14 @@
// from file
AVFormatContext* formatContext = avformat_alloc_context();
if (avformat_open_input(&formatContext, filename, NULL, NULL) != 0) {
- printf("Error: Could not open input file.");
+ LOG(ERROR) << "Error: Could not open input file.";
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
return false;
}
if (avformat_find_stream_info(formatContext, NULL) < 0) {
- printf("Error: Could not find stream information.");
+ LOG(ERROR) << "Error: Could not open input file.";
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
return false;
@@ -277,23 +280,28 @@
int audioStreamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0);
if (audioStreamIndex >= 0) {
codecParameters = formatContext->streams[audioStreamIndex]->codecpar;
+ }else {
+ LOG(ERROR) << "Error: Could not open input file.";
+ avformat_close_input(&formatContext);
+ avformat_free_context(formatContext);
+ return false;
}
AVCodecContext* codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
- fprintf(stderr, "Failed to allocate codec context\n");
+ LOG(ERROR) << "Failed to allocate codec context";
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
return false;
}
if (avcodec_parameters_to_context(codecContext, codecParameters) != 0) {
- printf("Error: Could not copy codec parameters to codec context.");
+ LOG(ERROR) << "Error: Could not copy codec parameters to codec context.";
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
return false;
}
if (avcodec_open2(codecContext, codec, NULL) < 0) {
- printf("Error: Could not open audio decoder.");
+ LOG(ERROR) << "Error: Could not open audio decoder.";
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
@@ -311,14 +319,14 @@
nullptr // parent context
);
if (swr_ctx == nullptr) {
- std::cerr << "Could not initialize resampler" << std::endl;
+ LOG(ERROR) << "Could not initialize resampler";
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
return false;
}
if (swr_init(swr_ctx) != 0) {
- std::cerr << "Could not initialize resampler" << std::endl;
+ LOG(ERROR) << "Could not initialize resampler";
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
@@ -356,7 +364,7 @@
in_samples // input buffer size
);
if (ret < 0) {
- std::cerr << "Error resampling audio" << std::endl;
+ LOG(ERROR) << "Error resampling audio";
break;
}
std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
@@ -423,7 +431,7 @@
return false;
#else
// from buf
- char* buf_copy = (char *)malloc(n_file_len);
+ void* buf_copy = av_malloc(n_file_len);
memcpy(buf_copy, buf, n_file_len);
AVIOContext* avio_ctx = avio_alloc_context(
@@ -438,7 +446,7 @@
AVFormatContext* formatContext = avformat_alloc_context();
formatContext->pb = avio_ctx;
if (avformat_open_input(&formatContext, "", NULL, NULL) != 0) {
- printf("Error: Could not open input file.");
+ LOG(ERROR) << "Error: Could not open input file.";
avio_context_free(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
@@ -446,7 +454,7 @@
}
if (avformat_find_stream_info(formatContext, NULL) < 0) {
- printf("Error: Could not find stream information.");
+ LOG(ERROR) << "Error: Could not find stream information.";
avio_context_free(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
@@ -460,14 +468,14 @@
}
AVCodecContext* codecContext = avcodec_alloc_context3(codec);
if (!codecContext) {
- fprintf(stderr, "Failed to allocate codec context\n");
+ LOG(ERROR) << "Failed to allocate codec context";
avio_context_free(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
return false;
}
if (avcodec_parameters_to_context(codecContext, codecParameters) != 0) {
- printf("Error: Could not copy codec parameters to codec context.");
+ LOG(ERROR) << "Error: Could not copy codec parameters to codec context.";
avio_context_free(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
@@ -475,7 +483,7 @@
return false;
}
if (avcodec_open2(codecContext, codec, NULL) < 0) {
- printf("Error: Could not open audio decoder.");
+ LOG(ERROR) << "Error: Could not open audio decoder.";
avio_context_free(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
@@ -494,7 +502,7 @@
nullptr // parent context
);
if (swr_ctx == nullptr) {
- std::cerr << "Could not initialize resampler" << std::endl;
+ LOG(ERROR) << "Could not initialize resampler";
avio_context_free(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
@@ -502,7 +510,7 @@
return false;
}
if (swr_init(swr_ctx) != 0) {
- std::cerr << "Could not initialize resampler" << std::endl;
+ LOG(ERROR) << "Could not initialize resampler";
avio_context_free(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
@@ -541,7 +549,7 @@
in_samples // input buffer size
);
if (ret < 0) {
- std::cerr << "Error resampling audio" << std::endl;
+ LOG(ERROR) << "Error resampling audio";
break;
}
std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
@@ -1112,6 +1120,8 @@
if(asr_mode != ASR_OFFLINE){
if(buff_len >= step){
frame = new AudioFrame(step);
+ frame->global_start = speech_start;
+ frame->global_end = speech_start + step/seg_sample;
frame->data = (float*)malloc(sizeof(float) * step);
memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
asr_online_queue.push(frame);
@@ -1138,6 +1148,8 @@
if(asr_mode != ASR_OFFLINE){
frame = new AudioFrame(end-start);
frame->is_final = true;
+ frame->global_start = speech_start_i;
+ frame->global_end = speech_end_i;
frame->data = (float*)malloc(sizeof(float) * (end-start));
memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
asr_online_queue.push(frame);
@@ -1147,6 +1159,8 @@
if(asr_mode != ASR_ONLINE){
frame = new AudioFrame(end-start);
frame->is_final = true;
+ frame->global_start = speech_start_i;
+ frame->global_end = speech_end_i;
frame->data = (float*)malloc(sizeof(float) * (end-start));
memcpy(frame->data, all_samples.data()+start-offset, (end-start)*sizeof(float));
asr_offline_queue.push(frame);
@@ -1168,6 +1182,8 @@
if(asr_mode != ASR_OFFLINE){
if(buff_len >= step){
frame = new AudioFrame(step);
+ frame->global_start = speech_start;
+ frame->global_end = speech_start + step/seg_sample;
frame->data = (float*)malloc(sizeof(float) * step);
memcpy(frame->data, all_samples.data()+start-offset, step*sizeof(float));
asr_online_queue.push(frame);
@@ -1191,6 +1207,8 @@
if(asr_mode != ASR_ONLINE){
frame = new AudioFrame(end-offline_start);
frame->is_final = true;
+ frame->global_start = speech_offline_start;
+ frame->global_end = speech_end_i;
frame->data = (float*)malloc(sizeof(float) * (end-offline_start));
memcpy(frame->data, all_samples.data()+offline_start-offset, (end-offline_start)*sizeof(float));
asr_offline_queue.push(frame);
@@ -1207,6 +1225,8 @@
}
frame = new AudioFrame(step);
frame->is_final = is_final;
+ frame->global_start = (int)((start+sample_offset)/seg_sample);
+ frame->global_end = frame->global_start + step/seg_sample;
frame->data = (float*)malloc(sizeof(float) * step);
memcpy(frame->data, all_samples.data()+start-offset+sample_offset, step*sizeof(float));
asr_online_queue.push(frame);
@@ -1215,6 +1235,8 @@
}else{
frame = new AudioFrame(0);
frame->is_final = true;
+ frame->global_start = speech_start; // in this case start >= end
+ frame->global_end = speech_end_i;
asr_online_queue.push(frame);
frame = NULL;
}
--
Gitblit v1.9.1