From 6427c834dfd97b1f05c6659cdc7ccf010bf82fe1 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期一, 24 四月 2023 19:50:07 +0800
Subject: [PATCH] update

---
 funasr/runtime/onnxruntime/src/Audio.cpp |  227 ++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 202 insertions(+), 25 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/Audio.cpp b/funasr/runtime/onnxruntime/src/Audio.cpp
index f515a6d..bce3a90 100644
--- a/funasr/runtime/onnxruntime/src/Audio.cpp
+++ b/funasr/runtime/onnxruntime/src/Audio.cpp
@@ -3,7 +3,6 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <webrtc_vad.h>
 
 #include "Audio.h"
 
@@ -25,8 +24,7 @@
         out_idx = 1;
         sum = 0;
     };
-    ~AudioWindow()
-    {
+    ~AudioWindow(){
         free(window);
     };
     int put(int val)
@@ -102,6 +100,11 @@
 {
     if (speech_buff != NULL) {
         free(speech_buff);
+        
+    }
+
+    if (speech_data != NULL) {
+        
         free(speech_data);
     }
 }
@@ -112,12 +115,20 @@
            speech_len);
 }
 
+float Audio::get_time_len()
+{
+    return (float)speech_len / 16000;
+       //speech_len);
+}
+
 bool Audio::loadwav(const char *filename)
 {
 
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
     if (speech_buff != NULL) {
         free(speech_buff);
-        free(speech_data);
     }
 
     offset = 0;
@@ -126,34 +137,198 @@
     fp = fopen(filename, "rb");
     if (fp == nullptr)
         return false;
-    fseek(fp, 0, SEEK_END);
-    uint32_t nFileLen = ftell(fp);
-    fseek(fp, 44, SEEK_SET);
+    fseek(fp, 0, SEEK_END);  /*瀹氫綅鍒版枃浠舵湯灏�*/
+    uint32_t nFileLen = ftell(fp);  /*寰楀埌鏂囦欢澶у皬*/
+    fseek(fp, 44, SEEK_SET);  /*璺宠繃wav鏂囦欢澶�*/
 
     speech_len = (nFileLen - 44) / 2;
     speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
     speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
-    memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
-    int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
-    fclose(fp);
 
-    speech_data = (float *)malloc(sizeof(float) * speech_align_len);
-    memset(speech_data, 0, sizeof(float) * speech_align_len);
-    int i;
-    float scale = 1;
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+        fclose(fp);
 
-    if (data_type == 1) {
-        scale = 32768;
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+
+
+        return true;
     }
-
-    for (i = 0; i < speech_len; i++) {
-        speech_data[i] = (float)speech_buff[i] / scale;
-    }
-
-    AudioFrame *frame = new AudioFrame(speech_len);
-    frame_queue.push(frame);
-    return true;
+    else
+        return false;
 }
+
+
+bool Audio::loadwav(const char* buf, int nFileLen)
+{
+
+    
+
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+
+    offset = 0;
+
+    size_t nOffset = 0;
+
+#define WAV_HEADER_SIZE 44
+
+    speech_len = (nFileLen - WAV_HEADER_SIZE) / 2;
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        memcpy((void*)speech_buff, (const void*)(buf + WAV_HEADER_SIZE), speech_len * sizeof(int16_t));
+
+
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+
+        return true;
+    }
+    else
+        return false;
+
+}
+
+
+bool Audio::loadpcmwav(const char* buf, int nBufLen)
+{
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    offset = 0;
+
+    size_t nOffset = 0;
+
+
+
+    speech_len = nBufLen / 2;
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        memcpy((void*)speech_buff, (const void*)buf, speech_len * sizeof(int16_t));
+
+
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+
+     
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+        return true;
+
+    }
+    else
+        return false;
+
+    
+}
+
+bool Audio::loadpcmwav(const char* filename)
+{
+
+    if (speech_data != NULL) {
+        free(speech_data);
+    }
+    if (speech_buff != NULL) {
+        free(speech_buff);
+    }
+    offset = 0;
+
+    FILE* fp;
+    fp = fopen(filename, "rb");
+    if (fp == nullptr)
+        return false;
+    fseek(fp, 0, SEEK_END);
+    uint32_t nFileLen = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+
+    speech_len = (nFileLen) / 2;
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t*)malloc(sizeof(int16_t) * speech_align_len);
+    if (speech_buff)
+    {
+        memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+        int ret = fread(speech_buff, sizeof(int16_t), speech_len, fp);
+        fclose(fp);
+
+        speech_data = (float*)malloc(sizeof(float) * speech_align_len);
+        memset(speech_data, 0, sizeof(float) * speech_align_len);
+
+
+
+        int i;
+        float scale = 1;
+
+        if (data_type == 1) {
+            scale = 32768;
+        }
+
+        for (i = 0; i < speech_len; i++) {
+            speech_data[i] = (float)speech_buff[i] / scale;
+        }
+
+
+        AudioFrame* frame = new AudioFrame(speech_len);
+        frame_queue.push(frame);
+
+    
+        return true;
+    }
+    else
+        return false;
+
+}
+
 
 int Audio::fetch_chunck(float *&dout, int len)
 {
@@ -163,7 +338,7 @@
     } else if (offset == speech_align_len - len) {
         dout = speech_data + offset;
         offset = speech_align_len;
-        // 涓存椂瑙e喅
+        // 涓存椂瑙e喅 
         AudioFrame *frame = frame_queue.front();
         frame_queue.pop();
         delete frame;
@@ -238,6 +413,7 @@
 #define SPEECH_LEN_20S (16000 * 20)
 #define SPEECH_LEN_30S (16000 * 30)
 
+/*
 void Audio::split()
 {
     VadInst *handle = WebRtcVad_Create();
@@ -296,3 +472,4 @@
     }
     WebRtcVad_Free(handle);
 }
+*/
\ No newline at end of file

--
Gitblit v1.9.1