From 3313eb681e34d34292019dc20f6a1aff48a6dcfc Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期五, 15 十二月 2023 10:27:59 +0800
Subject: [PATCH] fi bug of FfmpegLoad
---
runtime/onnxruntime/src/audio.cpp | 36 ++++++++++++++++--------------------
runtime/onnxruntime/include/audio.h | 4 ++--
2 files changed, 18 insertions(+), 22 deletions(-)
diff --git a/runtime/onnxruntime/include/audio.h b/runtime/onnxruntime/include/audio.h
index ce9e16b..ded8366 100644
--- a/runtime/onnxruntime/include/audio.h
+++ b/runtime/onnxruntime/include/audio.h
@@ -61,10 +61,10 @@
void Disp();
void WavResample(int32_t sampling_rate, const float *waveform, int32_t n);
bool LoadWav(const char* buf, int n_len, int32_t* sampling_rate);
- bool LoadWav(const char* filename, int32_t* sampling_rate);
+ bool LoadWav(const char* filename, int32_t* sampling_rate, bool resample=true);
bool LoadWav2Char(const char* filename, int32_t* sampling_rate);
bool LoadPcmwav(const char* buf, int n_file_len, int32_t* sampling_rate);
- bool LoadPcmwav(const char* filename, int32_t* sampling_rate);
+ bool LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample=true);
bool LoadPcmwav2Char(const char* filename, int32_t* sampling_rate);
bool LoadOthers2Char(const char* filename);
bool FfmpegLoad(const char *filename, bool copy2char=false);
diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp
index 22d7f3c..ffec2c9 100644
--- a/runtime/onnxruntime/src/audio.cpp
+++ b/runtime/onnxruntime/src/audio.cpp
@@ -354,9 +354,7 @@
while (avcodec_receive_frame(codecContext, frame) >= 0) {
// Resample audio if necessary
std::vector<uint8_t> resampled_buffer;
- int in_samples = frame->nb_samples;
- uint8_t **in_data = frame->extended_data;
- int out_samples = av_rescale_rnd(in_samples,
+ int out_samples = av_rescale_rnd(swr_get_delay(swr_ctx, codecContext->sample_rate) + frame->nb_samples,
dest_sample_rate,
codecContext->sample_rate,
AV_ROUND_DOWN);
@@ -364,20 +362,20 @@
int resampled_size = out_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
if (resampled_buffer.size() < resampled_size) {
resampled_buffer.resize(resampled_size);
- }
+ }
uint8_t *resampled_data = resampled_buffer.data();
int ret = swr_convert(
swr_ctx,
&resampled_data, // output buffer
- resampled_size, // output buffer size
- (const uint8_t **)(frame->data), //(const uint8_t **)(frame->extended_data)
- in_samples // input buffer size
+ out_samples, // output buffer size
+ (const uint8_t **)(frame->data), // choose channel
+ frame->nb_samples // input buffer size
);
if (ret < 0) {
LOG(ERROR) << "Error resampling audio";
break;
}
- std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
+ resampled_buffers.insert(resampled_buffers.end(), resampled_buffer.begin(), resampled_buffer.begin() + resampled_size);
}
}
}
@@ -539,9 +537,7 @@
while (avcodec_receive_frame(codecContext, frame) >= 0) {
// Resample audio if necessary
std::vector<uint8_t> resampled_buffer;
- int in_samples = frame->nb_samples;
- uint8_t **in_data = frame->extended_data;
- int out_samples = av_rescale_rnd(in_samples,
+ int out_samples = av_rescale_rnd(swr_get_delay(swr_ctx, codecContext->sample_rate) + frame->nb_samples,
dest_sample_rate,
codecContext->sample_rate,
AV_ROUND_DOWN);
@@ -549,20 +545,20 @@
int resampled_size = out_samples * av_get_bytes_per_sample(AV_SAMPLE_FMT_S16);
if (resampled_buffer.size() < resampled_size) {
resampled_buffer.resize(resampled_size);
- }
+ }
uint8_t *resampled_data = resampled_buffer.data();
int ret = swr_convert(
swr_ctx,
&resampled_data, // output buffer
- resampled_size, // output buffer size
- (const uint8_t **)(frame->data), //(const uint8_t **)(frame->extended_data)
- in_samples // input buffer size
+ out_samples, // output buffer size
+ (const uint8_t **)(frame->data), // choose channel: channel_data
+ frame->nb_samples // input buffer size
);
if (ret < 0) {
LOG(ERROR) << "Error resampling audio";
break;
}
- std::copy(resampled_buffer.begin(), resampled_buffer.end(), std::back_inserter(resampled_buffers));
+ resampled_buffers.insert(resampled_buffers.end(), resampled_buffer.begin(), resampled_buffer.begin() + resampled_size);
}
}
}
@@ -614,7 +610,7 @@
}
-bool Audio::LoadWav(const char *filename, int32_t* sampling_rate)
+bool Audio::LoadWav(const char *filename, int32_t* sampling_rate, bool resample)
{
WaveHeader header;
if (speech_data != NULL) {
@@ -676,7 +672,7 @@
}
//resample
- if(*sampling_rate != dest_sample_rate){
+ if(resample && *sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
}
@@ -867,7 +863,7 @@
return false;
}
-bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate)
+bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample)
{
if (speech_data != NULL) {
free(speech_data);
@@ -908,7 +904,7 @@
}
//resample
- if(*sampling_rate != dest_sample_rate){
+ if(resample && *sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
}
--
Gitblit v1.9.1