From 580b11b57ac4b62f7e2acda73813a4e10e8e4cd3 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 10 十月 2023 17:17:29 +0800
Subject: [PATCH] v0.8.0
---
funasr/runtime/onnxruntime/src/vocab.h | 6 +++++-
1 files changed, 5 insertions(+), 1 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/vocab.h b/funasr/runtime/onnxruntime/src/vocab.h
index 9b462b7..23b4bd6 100644
--- a/funasr/runtime/onnxruntime/src/vocab.h
+++ b/funasr/runtime/onnxruntime/src/vocab.h
@@ -5,12 +5,14 @@
#include <stdint.h>
#include <string>
#include <vector>
+#include <map>
using namespace std;
namespace funasr {
class Vocab {
private:
vector<string> vocab;
+ std::map<string, int> token_id;
bool IsEnglish(string ch);
void LoadVocabFromYaml(const char* filename);
@@ -20,7 +22,9 @@
int Size();
bool IsChinese(string ch);
void Vector2String(vector<int> in, std::vector<std::string> &preds);
- string Vector2StringV2(vector<int> in);
+ string Vector2StringV2(vector<int> in, std::string language="");
+ string WordFormat(std::string word);
+ int GetIdByToken(const std::string &token);
};
} // namespace funasr
--
Gitblit v1.9.1