From 13993a1d8c31a5db61abc6021b74bd11e0806da1 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期一, 15 一月 2024 16:25:52 +0800
Subject: [PATCH] rm speech_buf for other funcs
---
runtime/onnxruntime/src/audio.cpp | 145 ++++++++++++++++++++----------------------------
1 files changed, 61 insertions(+), 84 deletions(-)
diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp
index ffec2c9..a2c19dc 100644
--- a/runtime/onnxruntime/src/audio.cpp
+++ b/runtime/onnxruntime/src/audio.cpp
@@ -228,6 +228,17 @@
if (speech_char != NULL) {
free(speech_char);
}
+ ClearQueue(frame_queue);
+ ClearQueue(asr_online_queue);
+ ClearQueue(asr_offline_queue);
+}
+
+void Audio::ClearQueue(std::queue<AudioFrame*>& q) {
+ while (!q.empty()) {
+ AudioFrame* frame = q.front();
+ delete frame;
+ q.pop();
+ }
}
void Audio::Disp()
@@ -243,9 +254,9 @@
void Audio::WavResample(int32_t sampling_rate, const float *waveform,
int32_t n)
{
- LOG(INFO) << "Creating a resampler:\n"
- << " in_sample_rate: "<< sampling_rate << "\n"
- << " output_sample_rate: " << static_cast<int32_t>(dest_sample_rate);
+ LOG(INFO) << "Creating a resampler: "
+ << " in_sample_rate: "<< sampling_rate
+ << " output_sample_rate: " << static_cast<int32_t>(dest_sample_rate);
float min_freq =
std::min<int32_t>(sampling_rate, dest_sample_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
@@ -392,9 +403,6 @@
if (speech_data != NULL) {
free(speech_data);
}
- if (speech_buff != NULL) {
- free(speech_buff);
- }
if (speech_char != NULL) {
free(speech_char);
}
@@ -407,30 +415,25 @@
}
speech_len = (resampled_buffers.size()) / 2;
- speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
- if (speech_buff)
- {
- memset(speech_buff, 0, sizeof(int16_t) * speech_len);
- memcpy((void*)speech_buff, (const void*)resampled_buffers.data(), speech_len * sizeof(int16_t));
-
- speech_data = (float*)malloc(sizeof(float) * speech_len);
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
+ if(speech_data){
memset(speech_data, 0, sizeof(float) * speech_len);
-
float scale = 1;
if (data_type == 1) {
- scale = 32768;
+ scale = 32768.0f;
}
- for (int32_t i = 0; i != speech_len; ++i) {
- speech_data[i] = (float)speech_buff[i] / scale;
+ for (int32_t i = 0; i < speech_len; ++i) {
+ int16_t val = (int16_t)((resampled_buffers[2 * i + 1] << 8) | resampled_buffers[2 * i]);
+ speech_data[i] = (float)val / scale;
}
-
AudioFrame* frame = new AudioFrame(speech_len);
frame_queue.push(frame);
return true;
- }
- else
+ }else{
return false;
+ }
+
#endif
}
@@ -451,6 +454,10 @@
nullptr, // write callback (not used here)
nullptr // seek callback (not used here)
);
+ if (!avio_ctx) {
+ av_free(buf_copy);
+ return false;
+ }
AVFormatContext* formatContext = avformat_alloc_context();
formatContext->pb = avio_ctx;
if (avformat_open_input(&formatContext, "", NULL, NULL) != 0) {
@@ -565,7 +572,9 @@
av_packet_unref(packet);
}
- avio_context_free(&avio_ctx);
+ //avio_context_free(&avio_ctx);
+ av_freep(&avio_ctx ->buffer);
+ av_freep(&avio_ctx);
avformat_close_input(&formatContext);
avformat_free_context(formatContext);
avcodec_free_context(&codecContext);
@@ -576,36 +585,27 @@
if (speech_data != NULL) {
free(speech_data);
}
- if (speech_buff != NULL) {
- free(speech_buff);
- }
- offset = 0;
speech_len = (resampled_buffers.size()) / 2;
- speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
- if (speech_buff)
- {
- memset(speech_buff, 0, sizeof(int16_t) * speech_len);
- memcpy((void*)speech_buff, (const void*)resampled_buffers.data(), speech_len * sizeof(int16_t));
-
- speech_data = (float*)malloc(sizeof(float) * speech_len);
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
+ if(speech_data){
memset(speech_data, 0, sizeof(float) * speech_len);
-
float scale = 1;
if (data_type == 1) {
- scale = 32768;
+ scale = 32768.0f;
}
- for (int32_t i = 0; i != speech_len; ++i) {
- speech_data[i] = (float)speech_buff[i] / scale;
+ for (int32_t i = 0; i < speech_len; ++i) {
+ int16_t val = (int16_t)((resampled_buffers[2 * i + 1] << 8) | resampled_buffers[2 * i]);
+ speech_data[i] = (float)val / scale;
}
-
AudioFrame* frame = new AudioFrame(speech_len);
frame_queue.push(frame);
return true;
- }
- else
+ }else{
return false;
+ }
+
#endif
}
@@ -733,7 +733,6 @@
if (speech_buff != NULL) {
free(speech_buff);
}
- offset = 0;
std::memcpy(&header, buf, sizeof(header));
@@ -776,30 +775,20 @@
if (speech_data != NULL) {
free(speech_data);
}
- if (speech_buff != NULL) {
- free(speech_buff);
- }
- offset = 0;
speech_len = n_buf_len / 2;
- speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
- if (speech_buff)
- {
- memset(speech_buff, 0, sizeof(int16_t) * speech_len);
- memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
-
- speech_data = (float*)malloc(sizeof(float) * speech_len);
- memset(speech_data, 0, sizeof(float) * speech_len);
-
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
+ if(speech_data){
float scale = 1;
if (data_type == 1) {
- scale = 32768;
+ scale = 32768.0f;
+ }
+ const uint8_t* byte_buf = reinterpret_cast<const uint8_t*>(buf);
+ for (int32_t i = 0; i < speech_len; ++i) {
+ int16_t val = (int16_t)((byte_buf[2 * i + 1] << 8) | byte_buf[2 * i]);
+ speech_data[i] = (float)val / scale;
}
- for (int32_t i = 0; i != speech_len; ++i) {
- speech_data[i] = (float)speech_buff[i] / scale;
- }
-
//resample
if(*sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
@@ -807,11 +796,11 @@
AudioFrame* frame = new AudioFrame(speech_len);
frame_queue.push(frame);
+
return true;
-
- }
- else
+ }else{
return false;
+ }
}
bool Audio::LoadPcmwavOnline(const char* buf, int n_buf_len, int32_t* sampling_rate)
@@ -819,32 +808,20 @@
if (speech_data != NULL) {
free(speech_data);
}
- if (speech_buff != NULL) {
- free(speech_buff);
- }
- if (speech_char != NULL) {
- free(speech_char);
- }
speech_len = n_buf_len / 2;
- speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_len);
- if (speech_buff)
- {
- memset(speech_buff, 0, sizeof(int16_t) * speech_len);
- memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
-
- speech_data = (float*)malloc(sizeof(float) * speech_len);
- memset(speech_data, 0, sizeof(float) * speech_len);
-
+ speech_data = (float*)malloc(sizeof(float) * speech_len);
+ if(speech_data){
float scale = 1;
if (data_type == 1) {
- scale = 32768;
+ scale = 32768.0f;
+ }
+ const uint8_t* byte_buf = reinterpret_cast<const uint8_t*>(buf);
+ for (int32_t i = 0; i < speech_len; ++i) {
+ int16_t val = (int16_t)((byte_buf[2 * i + 1] << 8) | byte_buf[2 * i]);
+ speech_data[i] = (float)val / scale;
}
- for (int32_t i = 0; i != speech_len; ++i) {
- speech_data[i] = (float)speech_buff[i] / scale;
- }
-
//resample
if(*sampling_rate != dest_sample_rate){
WavResample(*sampling_rate, speech_data, speech_len);
@@ -856,11 +833,11 @@
AudioFrame* frame = new AudioFrame(speech_len);
frame_queue.push(frame);
+
return true;
-
- }
- else
+ }else{
return false;
+ }
}
bool Audio::LoadPcmwav(const char* filename, int32_t* sampling_rate, bool resample)
--
Gitblit v1.9.1