雾聪
2023-06-02 3372b13d24aceef7002cfa0fc8222b3085c15110
funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
copy from funasr/runtime/onnxruntime/src/funasr-onnx-offline-vad.cpp copy to funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
File was copied from funasr/runtime/onnxruntime/src/funasr-onnx-offline-vad.cpp
@@ -18,6 +18,7 @@
#include "funasrruntime.h"
#include "tclap/CmdLine.h"
#include "com-define.h"
#include "audio.h"
using namespace std;
@@ -39,9 +40,15 @@
}
void print_segs(vector<vector<int>>* vec) {
    if((*vec).size() == 0){
        return;
    }
    string seg_out="[";
    for (int i = 0; i < vec->size(); i++) {
        vector<int> inner_vec = (*vec)[i];
        if(inner_vec.size() == 0){
            continue;
        }
        seg_out += "[";
        for (int j = 0; j < inner_vec.size(); j++) {
            seg_out += to_string(inner_vec[j]);
@@ -120,32 +127,66 @@
        LOG(ERROR)<<"Please check the wav extension!";
        exit(-1);
    }
    // init online features
    FUNASR_HANDLE online_hanlde=FsmnVadOnlineInit(vad_hanlde);
    float snippet_time = 0.0f;
    long taking_micros = 0;
    for(auto& wav_file : wav_list){
        gettimeofday(&start, NULL);
        FUNASR_RESULT result=FsmnVadInfer(vad_hanlde, wav_file.c_str(), FSMN_VAD_OFFLINE, NULL, 16000);
        gettimeofday(&end, NULL);
        seconds = (end.tv_sec - start.tv_sec);
        taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
        if (result)
        {
            vector<std::vector<int>>* vad_segments = FsmnVadGetResult(result, 0);
            print_segs(vad_segments);
            snippet_time += FsmnVadGetRetSnippetTime(result);
            FsmnVadFreeResult(result);
        }
        else
        {
            LOG(ERROR) << ("No return data!\n");
        int32_t sampling_rate_ = -1;
        funasr::Audio audio(1);
      if(is_target_file(wav_file.c_str(), "wav")){
         int32_t sampling_rate_ = -1;
         if(!audio.LoadWav2Char(wav_file.c_str(), &sampling_rate_)){
            LOG(ERROR)<<"Failed to load "<< wav_file;
                exit(-1);
            }
      }else if(is_target_file(wav_file.c_str(), "pcm")){
         if (!audio.LoadPcmwav2Char(wav_file.c_str(), &sampling_rate_)){
            LOG(ERROR)<<"Failed to load "<< wav_file;
                exit(-1);
            }
      }else{
         LOG(ERROR)<<"Wrong wav extension";
         exit(-1);
      }
        char* speech_buff = audio.GetSpeechChar();
        int buff_len = audio.GetSpeechLen()*2;
        int step = 3200;
        bool is_final = false;
        for (int sample_offset = 0; sample_offset < buff_len; sample_offset += std::min(step, buff_len - sample_offset)) {
            if (sample_offset + step >= buff_len - 1) {
                    step = buff_len - sample_offset;
                    is_final = true;
                } else {
                    is_final = false;
            }
            gettimeofday(&start, NULL);
            FUNASR_RESULT result = FsmnVadInferBuffer(online_hanlde, speech_buff+sample_offset, step, NULL, is_final, 16000);
            gettimeofday(&end, NULL);
            seconds = (end.tv_sec - start.tv_sec);
            taking_micros += ((seconds * 1000000) + end.tv_usec) - (start.tv_usec);
            if (result)
            {
                vector<std::vector<int>>* vad_segments = FsmnVadGetResult(result, 0);
                print_segs(vad_segments);
                snippet_time += FsmnVadGetRetSnippetTime(result);
                FsmnVadFreeResult(result);
            }
            else
            {
                LOG(ERROR) << ("No return data!\n");
            }
        }
    }
    LOG(INFO) << "Audio length: " << (double)snippet_time << " s";
    LOG(INFO) << "Model inference takes: " << (double)taking_micros / 1000000 <<" s";
    LOG(INFO) << "Model inference RTF: " << (double)taking_micros/ (snippet_time*1000000);
    FsmnVadUninit(online_hanlde);
    FsmnVadUninit(vad_hanlde);
    return 0;
}