From 1596f6f414f6f41da66506debb1dff19fffeb3ec Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 24 六月 2024 11:55:17 +0800
Subject: [PATCH] fixbug hotwords

---
 runtime/onnxruntime/src/vocab.h |    8 +++++++-
 1 files changed, 7 insertions(+), 1 deletions(-)

diff --git a/runtime/onnxruntime/src/vocab.h b/runtime/onnxruntime/src/vocab.h
index 8834b97..36fabf4 100644
--- a/runtime/onnxruntime/src/vocab.h
+++ b/runtime/onnxruntime/src/vocab.h
@@ -6,6 +6,7 @@
 #include <string>
 #include <vector>
 #include <map>
+#include "nlohmann/json.hpp"
 using namespace std;
 
 namespace funasr {
@@ -13,11 +14,15 @@
   private:
     vector<string> vocab;
     std::map<string, int> token_id;
+    std::map<string, string> lex_map;
     bool IsEnglish(string ch);
     void LoadVocabFromYaml(const char* filename);
+    void LoadVocabFromJson(const char* filename);
+    void LoadLex(const char* filename);
 
   public:
     Vocab(const char *filename);
+    Vocab(const char *filename, const char *lex_file);
     ~Vocab();
     int Size() const;
     bool IsChinese(string ch);
@@ -26,7 +31,8 @@
     string Vector2StringV2(vector<int> in, std::string language="");
     string Id2String(int id) const;
     string WordFormat(std::string word);
-    int GetIdByToken(const std::string &token);
+    int GetIdByToken(const std::string &token) const;
+    string Word2Lex(const std::string &word) const;
 };
 
 } // namespace funasr

--
Gitblit v1.9.1