From 28ccfbfc51068a663a80764e14074df5edf2b5ba Mon Sep 17 00:00:00 2001
From: kongdeqiang <kongdeqiang960204@163.com>
Date: 星期五, 13 三月 2026 17:41:41 +0800
Subject: [PATCH] 提交

---
 runtime/onnxruntime/src/ct-transformer-online.cpp |   31 +++++++++++--------------------
 1 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/runtime/onnxruntime/src/ct-transformer-online.cpp b/runtime/onnxruntime/src/ct-transformer-online.cpp
index 2198667..3be99b8 100644
--- a/runtime/onnxruntime/src/ct-transformer-online.cpp
+++ b/runtime/onnxruntime/src/ct-transformer-online.cpp
@@ -11,7 +11,7 @@
 {
 }
 
-void CTTransformerOnline::InitPunc(const std::string &punc_model, const std::string &punc_config, int thread_num){
+void CTTransformerOnline::InitPunc(const std::string &punc_model, const std::string &punc_config, const std::string &token_file, int thread_num){
     session_options.SetIntraOpNumThreads(thread_num);
     session_options.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
     session_options.DisableCpuMemArena();
@@ -25,25 +25,11 @@
         exit(-1);
     }
     // read inputnames outputnames
-    string strName;
-    GetInputName(m_session.get(), strName);
-    m_strInputNames.push_back(strName.c_str());
-    GetInputName(m_session.get(), strName, 1);
-    m_strInputNames.push_back(strName);
-    GetInputName(m_session.get(), strName, 2);
-    m_strInputNames.push_back(strName);
-    GetInputName(m_session.get(), strName, 3);
-    m_strInputNames.push_back(strName);
-    
-    GetOutputName(m_session.get(), strName);
-    m_strOutputNames.push_back(strName);
+    GetInputNames(m_session.get(), m_strInputNames, m_szInputNames);
+    GetOutputNames(m_session.get(), m_strOutputNames, m_szOutputNames);
 
-    for (auto& item : m_strInputNames)
-        m_szInputNames.push_back(item.c_str());
-    for (auto& item : m_strOutputNames)
-        m_szOutputNames.push_back(item.c_str());
-
-	m_tokenizer.OpenYaml(punc_config.c_str());
+	m_tokenizer.OpenYaml(punc_config.c_str(), token_file.c_str());
+	m_tokenizer.JiebaInit(punc_config);
 }
 
 CTTransformerOnline::~CTTransformerOnline()
@@ -57,6 +43,11 @@
     vector<int> InputData;
     string strText; //full_text
     strText = accumulate(arr_cache.begin(), arr_cache.end(), strText);
+
+    // 濡傛灉涓婁竴鍙ョ殑缁撳熬鏄嫳璇瓧姣嶏紝骞朵笖杩欎竴鍙ョ殑寮�濮嬩篃鏄嫳璇瓧姣嶏紝搴旇娣诲姞绌烘牸
+    if ((strText.size() > 0 and !(strText[strText.size()-1] & 0x80)) && (strlen(sz_input) > 0 && !(sz_input[0] & 0x80)))
+        strText += " ";
+
     strText += sz_input;  // full_text = precache + text  
     m_tokenizer.Tokenize(strText.c_str(), strOut, InputData);
 
@@ -120,7 +111,7 @@
     vector<string> WordWithPunc;
     for (int i = 0; i < sentence_words_list.size(); i++) // for i in range(0, len(sentence_words_list)):
     {
-        if (i > 0 && !(sentence_words_list[i][0] & 0x80) && (i + 1) < sentence_words_list.size() && !(sentence_words_list[i + 1][0] & 0x80))
+        if (!(sentence_words_list[i][0] & 0x80) && (i + 1) < sentence_words_list.size() && !(sentence_words_list[i + 1][0] & 0x80))
         {
             sentence_words_list[i] = sentence_words_list[i] + " ";
         }

--
Gitblit v1.9.1