From 55c09aeaa25b4bb88a50e09ba68fa6ff00a6d676 Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期一, 15 一月 2024 20:10:39 +0800
Subject: [PATCH] update readme, fix seaco bug
---
runtime/onnxruntime/src/bias-lm.h | 33 +++++++++++++++++++++++----------
1 files changed, 23 insertions(+), 10 deletions(-)
diff --git a/runtime/onnxruntime/src/bias-lm.h b/runtime/onnxruntime/src/bias-lm.h
index 7254d49..957197a 100644
--- a/runtime/onnxruntime/src/bias-lm.h
+++ b/runtime/onnxruntime/src/bias-lm.h
@@ -7,6 +7,9 @@
#include "vocab.h"
#include "util/text-utils.h"
#include <yaml-cpp/yaml.h>
+#ifdef _WIN32
+#include "win_func.h"
+#endif
// node type
#define ROOT_NODE 0
#define VALUE_ZERO 0.0f
@@ -62,12 +65,17 @@
if (text.size() > 1) {
score = std::stof(text[1]);
}
- Utf8ToCharset(text[0], split_str);
+ SplitChiEngCharacters(text[0], split_str);
for (auto &str : split_str) {
- split_id.push_back(phn_set_.String2Id(str));
- if (!phn_set_.Find(str)) {
- is_oov = true;
- break;
+ std::vector<string> lex_vec;
+ std::string lex_str = vocab_.Word2Lex(str);
+ SplitStringToVector(lex_str, " ", true, &lex_vec);
+ for (auto &token : lex_vec) {
+ split_id.push_back(phn_set_.String2Id(token));
+ if (!phn_set_.Find(token)) {
+ is_oov = true;
+ break;
+ }
}
}
if (!is_oov) {
@@ -100,12 +108,17 @@
std::vector<std::string> split_str;
std::vector<int> split_id;
score = kv.second;
- Utf8ToCharset(kv.first, split_str);
+ SplitChiEngCharacters(kv.first, split_str);
for (auto &str : split_str) {
- split_id.push_back(phn_set_.String2Id(str));
- if (!phn_set_.Find(str)) {
- is_oov = true;
- break;
+ std::vector<string> lex_vec;
+ std::string lex_str = vocab_.Word2Lex(str);
+ SplitStringToVector(lex_str, " ", true, &lex_vec);
+ for (auto &token : lex_vec) {
+ split_id.push_back(phn_set_.String2Id(token));
+ if (!phn_set_.Find(token)) {
+ is_oov = true;
+ break;
+ }
}
}
if (!is_oov) {
--
Gitblit v1.9.1