From 7ab2e5cf22bbb31808bcacf84c054c710e4e6a93 Mon Sep 17 00:00:00 2001
From: Yabin Li <wucong.lyb@alibaba-inc.com>
Date: 星期一, 24 四月 2023 16:19:17 +0800
Subject: [PATCH] Merge pull request #400 from alibaba-damo-academy/dev_knf

---
 funasr/runtime/onnxruntime/src/fsmn-vad.h |   60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 60 insertions(+), 0 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/fsmn-vad.h b/funasr/runtime/onnxruntime/src/fsmn-vad.h
new file mode 100644
index 0000000..e8569f9
--- /dev/null
+++ b/funasr/runtime/onnxruntime/src/fsmn-vad.h
@@ -0,0 +1,60 @@
+
+#ifndef VAD_SERVER_FSMNVAD_H
+#define VAD_SERVER_FSMNVAD_H
+
+#include "precomp.h"
+
+class FsmnVad {
+/**
+ * Author: Speech Lab of DAMO Academy, Alibaba Group
+ * Deep-FSMN for Large Vocabulary Continuous Speech Recognition
+ * https://arxiv.org/abs/1803.05030
+*/
+
+public:
+    FsmnVad();
+    void Test();
+    void InitVad(const std::string &vad_model, const std::string &vad_cmvn, int vad_sample_rate, int vad_silence_duration, int vad_max_len,
+                  float vad_speech_noise_thres);
+
+    std::vector<std::vector<int>> Infer(const std::vector<float> &waves);
+    void Reset();
+
+private:
+
+    void ReadModel(const std::string &vad_model);
+
+    static void GetInputOutputInfo(
+            const std::shared_ptr<Ort::Session> &session,
+            std::vector<const char *> *in_names, std::vector<const char *> *out_names);
+
+    void FbankKaldi(float sample_rate, std::vector<std::vector<float>> &vad_feats,
+                    const std::vector<float> &waves);
+
+    std::vector<std::vector<float>> &LfrCmvn(std::vector<std::vector<float>> &vad_feats, int lfr_m, int lfr_n);
+
+    void Forward(
+            const std::vector<std::vector<float>> &chunk_feats,
+            std::vector<std::vector<float>> *out_prob);
+
+    void LoadCmvn(const char *filename);
+    void InitCache();
+
+    std::shared_ptr<Ort::Session> vad_session_ = nullptr;
+    Ort::Env env_;
+    Ort::SessionOptions session_options_;
+    std::vector<const char *> vad_in_names_;
+    std::vector<const char *> vad_out_names_;
+    std::vector<std::vector<float>> in_cache_;
+    
+    knf::FbankOptions fbank_opts;
+    std::vector<float> means_list;
+    std::vector<float> vars_list;
+    int vad_sample_rate_ = 16000;
+    int vad_silence_duration_ = 800;
+    int vad_max_len_ = 15000;
+    double vad_speech_noise_thres_ = 0.9;
+};
+
+
+#endif //VAD_SERVER_FSMNVAD_H

--
Gitblit v1.9.1