From 33d3d2084403fd34b79c835d2f2fe04f6cd8f738 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 13 九月 2023 09:33:54 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR add
---
funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp | 20 +++++++++++++-------
1 files changed, 13 insertions(+), 7 deletions(-)
diff --git a/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
index d9944a0..cd79726 100644
--- a/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
+++ b/funasr/runtime/onnxruntime/bin/funasr-onnx-online-vad.cpp
@@ -39,11 +39,11 @@
}
}
-void print_segs(vector<vector<int>>* vec) {
+void print_segs(vector<vector<int>>* vec, string &wav_id) {
if((*vec).size() == 0){
return;
}
- string seg_out="[";
+ string seg_out=wav_id + ": [";
for (int i = 0; i < vec->size(); i++) {
vector<int> inner_vec = (*vec)[i];
if(inner_vec.size() == 0){
@@ -72,7 +72,7 @@
TCLAP::CmdLine cmd("funasr-onnx-offline-vad", ' ', "1.0");
TCLAP::ValueArg<std::string> model_dir("", MODEL_DIR, "the vad model path, which contains model.onnx, vad.yaml, vad.mvn", true, "", "string");
- TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "false", "string");
+ TCLAP::ValueArg<std::string> quantize("", QUANTIZE, "false (Default), load the model of model.onnx in model_dir. If set true, load the model of model_quant.onnx in model_dir", false, "true", "string");
TCLAP::ValueArg<std::string> wav_path("", WAV_PATH, "the input could be: wav_path, e.g.: asr_example.wav; pcm_path, e.g.: asr_example.pcm; wav.scp, kaldi style wav list (wav_id \t wav_path)", true, "", "string");
@@ -104,9 +104,12 @@
// read wav_path
vector<string> wav_list;
+ vector<string> wav_ids;
+ string default_id = "wav_default_id";
string wav_path_ = model_path.at(WAV_PATH);
if(is_target_file(wav_path_, "wav") || is_target_file(wav_path_, "pcm")){
wav_list.emplace_back(wav_path_);
+ wav_ids.emplace_back(default_id);
}
else if(is_target_file(wav_path_, "scp")){
ifstream in(wav_path_);
@@ -120,7 +123,8 @@
istringstream iss(line);
string column1, column2;
iss >> column1 >> column2;
- wav_list.emplace_back(column2);
+ wav_list.emplace_back(column2);
+ wav_ids.emplace_back(column1);
}
in.close();
}else{
@@ -131,7 +135,9 @@
FUNASR_HANDLE online_hanlde=FsmnVadOnlineInit(vad_hanlde);
float snippet_time = 0.0f;
long taking_micros = 0;
- for(auto& wav_file : wav_list){
+ for (int i = 0; i < wav_list.size(); i++) {
+ auto& wav_file = wav_list[i];
+ auto& wav_id = wav_ids[i];
int32_t sampling_rate_ = -1;
funasr::Audio audio(1);
@@ -153,7 +159,7 @@
char* speech_buff = audio.GetSpeechChar();
int buff_len = audio.GetSpeechLen()*2;
- int step = 3200;
+ int step = 800*2;
bool is_final = false;
for (int sample_offset = 0; sample_offset < buff_len; sample_offset += std::min(step, buff_len - sample_offset)) {
@@ -172,7 +178,7 @@
if (result)
{
vector<std::vector<int>>* vad_segments = FsmnVadGetResult(result, 0);
- print_segs(vad_segments);
+ print_segs(vad_segments, wav_id);
snippet_time += FsmnVadGetRetSnippetTime(result);
FsmnVadFreeResult(result);
}
--
Gitblit v1.9.1