From 2a66366be4c2715870e4859fd5a5db6e8a9dc00a Mon Sep 17 00:00:00 2001
From: chenmengzheAAA <123789350+chenmengzheAAA@users.noreply.github.com>
Date: 星期四, 14 九月 2023 19:00:17 +0800
Subject: [PATCH] Merge pull request #956 from alibaba-damo-academy/chenmengzheAAA-patch-4
---
funasr/runtime/onnxruntime/src/vocab.cpp | 33 ++++++++++++++++++++-------------
1 files changed, 20 insertions(+), 13 deletions(-)
diff --git a/funasr/runtime/onnxruntime/src/vocab.cpp b/funasr/runtime/onnxruntime/src/vocab.cpp
index 65af8b6..c29156f 100644
--- a/funasr/runtime/onnxruntime/src/vocab.cpp
+++ b/funasr/runtime/onnxruntime/src/vocab.cpp
@@ -29,19 +29,27 @@
exit(-1);
}
YAML::Node myList = config["token_list"];
+ int i = 0;
for (YAML::const_iterator it = myList.begin(); it != myList.end(); ++it) {
vocab.push_back(it->as<string>());
+ token_id[it->as<string>()] = i;
+ i ++;
}
}
-string Vocab::Vector2String(vector<int> in)
-{
- int i;
- stringstream ss;
- for (auto it = in.begin(); it != in.end(); it++) {
- ss << vocab[*it];
+int Vocab::GetIdByToken(const std::string &token) {
+ if (token_id.count(token)) {
+ return token_id[token];
}
- return ss.str();
+ return 0;
+}
+
+void Vocab::Vector2String(vector<int> in, std::vector<std::string> &preds)
+{
+ for (auto it = in.begin(); it != in.end(); it++) {
+ string word = vocab[*it];
+ preds.emplace_back(word);
+ }
}
int Str2Int(string str)
@@ -110,17 +118,16 @@
else {
// pre word is chinese
if (!is_pre_english) {
- word[0] = word[0] - 32;
+ // word[0] = word[0] - 32;
words.push_back(word);
pre_english_len = word.size();
-
}
// pre word is english word
else {
// single letter turn to upper case
- if (word.size() == 1) {
- word[0] = word[0] - 32;
- }
+ // if (word.size() == 1) {
+ // word[0] = word[0] - 32;
+ // }
if (pre_english_len > 1) {
words.push_back(" ");
@@ -153,4 +160,4 @@
return vocab.size();
}
-} // namespace funasr
\ No newline at end of file
+} // namespace funasr
--
Gitblit v1.9.1