From 580b11b57ac4b62f7e2acda73813a4e10e8e4cd3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 10 十月 2023 17:17:29 +0800
Subject: [PATCH] v0.8.0
---
funasr/runtime/onnxruntime/src/vocab.h | 18 ++++++++++++------
1 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/vocab.h b/funasr/runtime/onnxruntime/src/vocab.h
index 023671a..23b4bd6 100644
--- a/funasr/runtime/onnxruntime/src/vocab.h
+++ b/funasr/runtime/onnxruntime/src/vocab.h
@@ -5,21 +5,27 @@
#include <stdint.h>
#include <string>
#include <vector>
+#include <map>
using namespace std;
+namespace funasr {
class Vocab {
private:
vector<string> vocab;
- bool isChinese(string ch);
- bool isEnglish(string ch);
- void loadVocabFromYaml(const char* filename);
+ std::map<string, int> token_id;
+ bool IsEnglish(string ch);
+ void LoadVocabFromYaml(const char* filename);
public:
Vocab(const char *filename);
~Vocab();
- int size();
- string vector2string(vector<int> in);
- string vector2stringV2(vector<int> in);
+ int Size();
+ bool IsChinese(string ch);
+ void Vector2String(vector<int> in, std::vector<std::string> &preds);
+ string Vector2StringV2(vector<int> in, std::string language="");
+ string WordFormat(std::string word);
+ int GetIdByToken(const std::string &token);
};
+} // namespace funasr
#endif
--
Gitblit v1.9.1