From 94de39dde2e616a01683c518023d0fab72b4e103 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 19 二月 2024 22:21:50 +0800
Subject: [PATCH] aishell example
---
runtime/onnxruntime/src/audio.cpp | 75 +++++++++++++++++++++++++++++++++++++
1 files changed, 75 insertions(+), 0 deletions(-)
diff --git a/runtime/onnxruntime/src/audio.cpp b/runtime/onnxruntime/src/audio.cpp
index 40ea871..9b93dc8 100644
--- a/runtime/onnxruntime/src/audio.cpp
+++ b/runtime/onnxruntime/src/audio.cpp
@@ -133,6 +133,7 @@
};
~AudioWindow(){
free(window);
+ window = nullptr;
};
int put(int val)
{
@@ -162,6 +163,7 @@
AudioFrame::~AudioFrame(){
if(data != nullptr){
free(data);
+ data = nullptr;
}
}
int AudioFrame::SetStart(int val)
@@ -221,12 +223,15 @@
{
if (speech_buff != nullptr) {
free(speech_buff);
+ speech_buff = nullptr;
}
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
if (speech_char != nullptr) {
free(speech_char);
+ speech_char = nullptr;
}
ClearQueue(frame_queue);
ClearQueue(asr_online_queue);
@@ -271,6 +276,7 @@
speech_len = samples.size();
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
speech_data = (float*)malloc(sizeof(float) * speech_len);
memset(speech_data, 0, sizeof(float) * speech_len);
@@ -402,9 +408,11 @@
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
if (speech_char != nullptr) {
free(speech_char);
+ speech_char = nullptr;
}
offset = 0;
@@ -584,6 +592,7 @@
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
speech_len = (resampled_buffers.size()) / 2;
@@ -615,9 +624,11 @@
WaveHeader header;
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
if (speech_buff != nullptr) {
free(speech_buff);
+ speech_buff = nullptr;
}
offset = 0;
@@ -690,6 +701,7 @@
WaveHeader header;
if (speech_char != nullptr) {
free(speech_char);
+ speech_char = nullptr;
}
offset = 0;
std::ifstream is(filename, std::ifstream::binary);
@@ -729,9 +741,11 @@
WaveHeader header;
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
if (speech_buff != nullptr) {
free(speech_buff);
+ speech_buff = nullptr;
}
std::memcpy(&header, buf, sizeof(header));
@@ -774,6 +788,7 @@
{
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
speech_len = n_buf_len / 2;
@@ -807,6 +822,7 @@
{
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
speech_len = n_buf_len / 2;
@@ -844,9 +860,11 @@
{
if (speech_data != nullptr) {
free(speech_data);
+ speech_data = nullptr;
}
if (speech_buff != nullptr) {
free(speech_buff);
+ speech_buff = nullptr;
}
offset = 0;
@@ -899,6 +917,7 @@
{
if (speech_char != nullptr) {
free(speech_char);
+ speech_char = nullptr;
}
offset = 0;
@@ -926,6 +945,7 @@
{
if (speech_char != nullptr) {
free(speech_char);
+ speech_char = nullptr;
}
FILE* fp;
@@ -1030,6 +1050,7 @@
new_data[tmp_off + i] = speech_data[ii];
}
free(speech_data);
+ speech_data = nullptr;
speech_data = new_data;
speech_len = num_new_samples;
@@ -1064,6 +1085,60 @@
}
}
+void Audio::CutSplit(OfflineStream* offline_stream)
+{
+ std::unique_ptr<VadModel> vad_online_handle = make_unique<FsmnVadOnline>((FsmnVad*)(offline_stream->vad_handle).get());
+ AudioFrame *frame;
+
+ frame = frame_queue.front();
+ frame_queue.pop();
+ int sp_len = frame->GetLen();
+ delete frame;
+ frame = nullptr;
+
+ int step = dest_sample_rate*10;
+ bool is_final=false;
+ vector<std::vector<int>> vad_segments;
+ for (int sample_offset = 0; sample_offset < speech_len; sample_offset += std::min(step, speech_len - sample_offset)) {
+ if (sample_offset + step >= speech_len - 1) {
+ step = speech_len - sample_offset;
+ is_final = true;
+ } else {
+ is_final = false;
+ }
+ std::vector<float> pcm_data(speech_data+sample_offset, speech_data+sample_offset+step);
+ vector<std::vector<int>> cut_segments = vad_online_handle->Infer(pcm_data, is_final);
+ vad_segments.insert(vad_segments.end(), cut_segments.begin(), cut_segments.end());
+ }
+
+ int speech_start_i = -1, speech_end_i =-1;
+ for(vector<int> vad_segment:vad_segments)
+ {
+ if(vad_segment.size() != 2){
+ LOG(ERROR) << "Size of vad_segment is not 2.";
+ break;
+ }
+ if(vad_segment[0] != -1){
+ speech_start_i = vad_segment[0];
+ }
+ if(vad_segment[1] != -1){
+ speech_end_i = vad_segment[1];
+ }
+
+ if(speech_start_i!=-1 && speech_end_i!=-1){
+ frame = new AudioFrame();
+ int start = speech_start_i*seg_sample;
+ int end = speech_end_i*seg_sample;
+ frame->SetStart(start);
+ frame->SetEnd(end);
+ frame_queue.push(frame);
+ frame = nullptr;
+ speech_start_i=-1;
+ speech_end_i=-1;
+ }
+ }
+}
+
void Audio::Split(VadModel* vad_obj, vector<std::vector<int>>& vad_segments, bool input_finished)
{
AudioFrame *frame;
--
Gitblit v1.9.1