From 3c83d64c84602de055f503af7d4e2761c829ec2e Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期二, 12 十二月 2023 11:11:02 +0800
Subject: [PATCH] fst: support eng hotword

---
 runtime/onnxruntime/src/bias-lm.h |   30 ++++++++++++++++++++----------
 1 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/runtime/onnxruntime/src/bias-lm.h b/runtime/onnxruntime/src/bias-lm.h
index e2d28a2..957197a 100644
--- a/runtime/onnxruntime/src/bias-lm.h
+++ b/runtime/onnxruntime/src/bias-lm.h
@@ -65,12 +65,17 @@
       if (text.size() > 1) {
         score = std::stof(text[1]);
       }
-      Utf8ToCharset(text[0], split_str);
+      SplitChiEngCharacters(text[0], split_str);
       for (auto &str : split_str) {
-        split_id.push_back(phn_set_.String2Id(str));
-        if (!phn_set_.Find(str)) {
-          is_oov = true;
-          break;
+        std::vector<string> lex_vec;
+        std::string lex_str = vocab_.Word2Lex(str);
+        SplitStringToVector(lex_str, " ", true, &lex_vec);
+        for (auto &token : lex_vec) {
+          split_id.push_back(phn_set_.String2Id(token));
+          if (!phn_set_.Find(token)) {
+            is_oov = true;
+            break;
+          }
         }
       }
       if (!is_oov) {
@@ -103,12 +108,17 @@
       std::vector<std::string> split_str;
       std::vector<int> split_id;
       score = kv.second;
-      Utf8ToCharset(kv.first, split_str);
+      SplitChiEngCharacters(kv.first, split_str);
       for (auto &str : split_str) {
-        split_id.push_back(phn_set_.String2Id(str));
-        if (!phn_set_.Find(str)) {
-          is_oov = true;
-          break;
+        std::vector<string> lex_vec;
+        std::string lex_str = vocab_.Word2Lex(str);
+        SplitStringToVector(lex_str, " ", true, &lex_vec);
+        for (auto &token : lex_vec) {
+          split_id.push_back(phn_set_.String2Id(token));
+          if (!phn_set_.Find(token)) {
+            is_oov = true;
+            break;
+          }
         }
       }
       if (!is_oov) {

--
Gitblit v1.9.1