From 580b11b57ac4b62f7e2acda73813a4e10e8e4cd3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 10 十月 2023 17:17:29 +0800
Subject: [PATCH] v0.8.0

---
 funasr/runtime/onnxruntime/src/vocab.h |   18 ++++++++++++------
 1 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/funasr/runtime/onnxruntime/src/vocab.h b/funasr/runtime/onnxruntime/src/vocab.h
index 023671a..23b4bd6 100644
--- a/funasr/runtime/onnxruntime/src/vocab.h
+++ b/funasr/runtime/onnxruntime/src/vocab.h
@@ -5,21 +5,27 @@
 #include <stdint.h>
 #include <string>
 #include <vector>
+#include <map>
 using namespace std;
 
+namespace funasr {
 class Vocab {
   private:
     vector<string> vocab;
-    bool isChinese(string ch);
-    bool isEnglish(string ch);
-    void loadVocabFromYaml(const char* filename);
+    std::map<string, int> token_id;
+    bool IsEnglish(string ch);
+    void LoadVocabFromYaml(const char* filename);
 
   public:
     Vocab(const char *filename);
     ~Vocab();
-    int size();
-    string vector2string(vector<int> in);
-    string vector2stringV2(vector<int> in);
+    int Size();
+    bool IsChinese(string ch);
+    void Vector2String(vector<int> in, std::vector<std::string> &preds);
+    string Vector2StringV2(vector<int> in, std::string language="");
+    string WordFormat(std::string word);
+    int GetIdByToken(const std::string &token);
 };
 
+} // namespace funasr
 #endif

--
Gitblit v1.9.1