From 33d3d2084403fd34b79c835d2f2fe04f6cd8f738 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 13 九月 2023 09:33:54 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add

---
 funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp |   20 +++++++++++++-------
 1 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
index d9944a0..cd79726 100644
--- a/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
+++ b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
@@ -39,11 +39,11 @@
     }
 }
 
-void print_segs(vector<vector<int>>* vec) {
+void print_segs(vector<vector<int>>* vec, string &wav_id) {
     if((*vec).size() == 0){
         return;
     }    
-    string seg_out="[";
+    string seg_out=wav_id + ": [";
     for (int i = 0; i < vec->size(); i++) {
         vector<int> inner_vec = (*vec)[i];
         if(inner_vec.size() == 0){
@@ -72,7 +72,7 @@
 
     TCLAP::CmdLine cmd("funasr-onnx-offline-vad", ' ', "1.0");
     TCLAP::ValueArg<std::string>    model_dir("", MODEL_DIR, "the vad model path, which contains model.onnx, vad.yaml, vad.mvn", true, "", "string");
-    TCLAP::ValueArg<std::string>    quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
+    TCLAP::ValueArg<std::string>    quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "true", "string");
 
     TCLAP::ValueArg<std::string>    wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
 
@@ -104,9 +104,12 @@
 
     // read wav_path
     vector<string> wav_list;
+    vector<string> wav_ids;
+    string default_id = "wav_default_id";
     string wav_path_ = model_path.at(WAV_PATH);
     if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){
         wav_list.emplace_back(wav_path_);
+        wav_ids.emplace_back(default_id);
     }
     else if(is_target_file(wav_path_, "scp")){
         ifstream in(wav_path_);
@@ -120,7 +123,8 @@
             istringstream iss(line);
             string column1, column2;
             iss >> column1 >> column2;
-            wav_list.emplace_back(column2); 
+            wav_list.emplace_back(column2);
+            wav_ids.emplace_back(column1);
         }
         in.close();
     }else{
@@ -131,7 +135,9 @@
     FUNASR_HANDLE online_hanlde=FsmnVadOnlineInit(vad_hanlde);
     float snippet_time = 0.0f;
     long taking_micros = 0;
-    for(auto& wav_file : wav_list){
+    for (int i = 0; i < wav_list.size(); i++) {
+        auto& wav_file = wav_list[i];
+        auto& wav_id = wav_ids[i];
 
         int32_t sampling_rate_ = -1;
         funasr::Audio audio(1);
@@ -153,7 +159,7 @@
         char* speech_buff = audio.GetSpeechChar();
         int buff_len = audio.GetSpeechLen()*2;
 
-        int step = 3200;
+        int step = 800*2;
         bool is_final = false;
 
         for (int sample_offset = 0; sample_offset < buff_len; sample_offset += std::min(step, buff_len - sample_offset)) {
@@ -172,7 +178,7 @@
             if (result)
             {
                 vector<std::vector<int>>* vad_segments = FsmnVadGetResult(result, 0);
-                print_segs(vad_segments);
+                print_segs(vad_segments, wav_id);
                 snippet_time += FsmnVadGetRetSnippetTime(result);
                 FsmnVadFreeResult(result);
             }

--
Gitblit v1.9.1