From 9c0d7bee934e6ab46aa2970c7c5f34bd6031b803 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 17 十二月 2024 11:16:16 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR merge

---
 model_zoo/readme_zh.md                            |    3 +--
 runtime/onnxruntime/src/ct-transformer-online.cpp |    7 ++++++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/model_zoo/readme_zh.md b/model_zoo/readme_zh.md
index 657b4f1..ca56526 100644
--- a/model_zoo/readme_zh.md
+++ b/model_zoo/readme_zh.md
@@ -23,6 +23,5 @@
 |              paraformer-zh-spk <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary)  [馃]() )              |  鍒嗚鑹茶闊宠瘑鍒紝甯︽椂闂存埑杈撳嚭锛岄潪瀹炴椂   |  60000灏忔椂锛屼腑鏂�  | 220M |
 |    paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃]() )    |        璇煶璇嗗埆锛屽疄鏃�        |  60000灏忔椂锛屼腑鏂�  | 220M |
 | paraformer-zh-streaming-small <br> ( [猸怾(https://www.modelscope.cn/models/iic/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃]() ) |        璇煶璇嗗埆锛屽疄鏃�        |  60000灏忔椂锛屼腑鏂�  | 220M |
-
-|       paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃]() )       |       璇煶璇嗗埆锛岄潪瀹炴椂        |  50000灏忔椂锛岃嫳鏂�  | 220M |
+| paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃]() )       |       璇煶璇嗗埆锛岄潪瀹炴椂        |  50000灏忔椂锛岃嫳鏂�  | 220M |
 
diff --git a/runtime/onnxruntime/src/ct-transformer-online.cpp b/runtime/onnxruntime/src/ct-transformer-online.cpp
index 769bb65..83c0327 100644
--- a/runtime/onnxruntime/src/ct-transformer-online.cpp
+++ b/runtime/onnxruntime/src/ct-transformer-online.cpp
@@ -42,6 +42,11 @@
     vector<int> InputData;
     string strText; //full_text
     strText = accumulate(arr_cache.begin(), arr_cache.end(), strText);
+
+    // 濡傛灉涓婁竴鍙ョ殑缁撳熬鏄嫳璇瓧姣嶏紝骞朵笖杩欎竴鍙ョ殑寮�濮嬩篃鏄嫳璇瓧姣嶏紝搴旇娣诲姞绌烘牸
+    if ((strText.size() > 0 and !(strText[strText.size()-1] & 0x80)) && (strlen(sz_input) > 0 && !(sz_input[0] & 0x80)))
+        strText += " ";
+
     strText += sz_input;  // full_text = precache + text  
     m_tokenizer.Tokenize(strText.c_str(), strOut, InputData);
 
@@ -107,7 +112,7 @@
     {
         if (!(sentence_words_list[i][0] & 0x80) && (i + 1) < sentence_words_list.size() && !(sentence_words_list[i + 1][0] & 0x80))
         {
-            sentence_words_list[i] = " " + sentence_words_list[i];
+            sentence_words_list[i] = sentence_words_list[i] + " ";
         }
         if (nSkipNum < arr_cache.size())  //    if skip_num < len(cache):
             nSkipNum++;

--
Gitblit v1.9.1