From b78d47f1efb3d0662fce1b8d45a9eb11b3caef02 Mon Sep 17 00:00:00 2001
From: Lizerui9926 <110582652+Lizerui9926@users.noreply.github.com>
Date: 星期三, 26 四月 2023 17:17:52 +0800
Subject: [PATCH] Merge pull request #427 from alibaba-damo-academy/dev_gflags
---
funasr/runtime/onnxruntime/src/fsmn-vad.h | 23 +++++++++++++++--------
1 files changed, 15 insertions(+), 8 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/fsmn-vad.h b/funasr/runtime/onnxruntime/src/fsmn-vad.h
index e8569f9..1d5f68c 100644
--- a/funasr/runtime/onnxruntime/src/fsmn-vad.h
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad.h
@@ -1,3 +1,7 @@
+/**
+ * Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
+ * MIT License (https://opensource.org/licenses/MIT)
+*/
#ifndef VAD_SERVER_FSMNVAD_H
#define VAD_SERVER_FSMNVAD_H
@@ -14,15 +18,15 @@
public:
FsmnVad();
void Test();
- void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
- float vad_speech_noise_thres);
+ void InitVad(const std::string &vad_model, const std::string &vad_cmvn, const std::string &vad_config);
std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
void Reset();
private:
- void ReadModel(const std::string &vad_model);
+ void ReadModel(const char* vad_model);
+ void LoadConfigFromYaml(const char* filename);
static void GetInputOutputInfo(
const std::shared_ptr<Ort::Session> &session,
@@ -31,7 +35,7 @@
void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
const std::vector<float> &waves);
- std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n);
+ std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats);
void Forward(
const std::vector<std::vector<float>> &chunk_feats,
@@ -50,10 +54,13 @@
knf::FbankOptions fbank_opts;
std::vector<float> means_list;
std::vector<float> vars_list;
- int vad_sample_rate_ = 16000;
- int vad_silence_duration_ = 800;
- int vad_max_len_ = 15000;
- double vad_speech_noise_thres_ = 0.9;
+
+ int vad_sample_rate_ = MODEL_SAMPLE_RATE;
+ int vad_silence_duration_ = VAD_SILENCE_DURATION;
+ int vad_max_len_ = VAD_MAX_LEN;
+ double vad_speech_noise_thres_ = VAD_SPEECH_NOISE_THRES;
+ int lfr_m = VAD_LFR_M;
+ int lfr_n = VAD_LFR_N;
};
--
Gitblit v1.9.1