From 3c83d64c84602de055f503af7d4e2761c829ec2e Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 12 十二月 2023 11:11:02 +0800
Subject: [PATCH] fst: support eng hotword

---
 runtime/onnxruntime/src/vocab.h |    6 +++++-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/runtime/onnxruntime/src/vocab.h b/runtime/onnxruntime/src/vocab.h
index 8834b97..19e3648 100644
--- a/runtime/onnxruntime/src/vocab.h
+++ b/runtime/onnxruntime/src/vocab.h
@@ -13,11 +13,14 @@
   private:
     vector<string> vocab;
     std::map<string, int> token_id;
+    std::map<string, string> lex_map;
     bool IsEnglish(string ch);
     void LoadVocabFromYaml(const char* filename);
+    void LoadLex(const char* filename);
 
   public:
     Vocab(const char *filename);
+    Vocab(const char *filename, const char *lex_file);
     ~Vocab();
     int Size() const;
     bool IsChinese(string ch);
@@ -26,7 +29,8 @@
     string Vector2StringV2(vector<int> in, std::string language="");
     string Id2String(int id) const;
     string WordFormat(std::string word);
-    int GetIdByToken(const std::string &token);
+    int GetIdByToken(const std::string &token) const;
+    string Word2Lex(const std::string &word) const;
 };
 
 } // namespace funasr

--
Gitblit v1.9.1