From 9c0d7bee934e6ab46aa2970c7c5f34bd6031b803 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 17 十二月 2024 11:16:16 +0800
Subject: [PATCH] Merge branch 'main' of github.com:alibaba-damo-academy/FunASR merge
---
model_zoo/readme_zh.md | 3 +--
runtime/onnxruntime/src/ct-transformer-online.cpp | 7 ++++++-
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/model_zoo/readme_zh.md b/model_zoo/readme_zh.md
index 657b4f1..ca56526 100644
--- a/model_zoo/readme_zh.md
+++ b/model_zoo/readme_zh.md
@@ -23,6 +23,5 @@
| paraformer-zh-spk <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/summary) [馃]() ) | 鍒嗚鑹茶闊宠瘑鍒紝甯︽椂闂存埑杈撳嚭锛岄潪瀹炴椂 | 60000灏忔椂锛屼腑鏂� | 220M |
| paraformer-zh-streaming <br> ( [猸怾(https://modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃]() ) | 璇煶璇嗗埆锛屽疄鏃� | 60000灏忔椂锛屼腑鏂� | 220M |
| paraformer-zh-streaming-small <br> ( [猸怾(https://www.modelscope.cn/models/iic/speech_paraformer_asr_nat-zh-cn-16k-common-vocab8404-online/summary) [馃]() ) | 璇煶璇嗗埆锛屽疄鏃� | 60000灏忔椂锛屼腑鏂� | 220M |
-
-| paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃]() ) | 璇煶璇嗗埆锛岄潪瀹炴椂 | 50000灏忔椂锛岃嫳鏂� | 220M |
+| paraformer-en <br> ( [猸怾(https://www.modelscope.cn/models/damo/speech_paraformer-large-vad-punc_asr_nat-en-16k-common-vocab10020/summary) [馃]() ) | 璇煶璇嗗埆锛岄潪瀹炴椂 | 50000灏忔椂锛岃嫳鏂� | 220M |
diff --git a/runtime/onnxruntime/src/ct-transformer-online.cpp b/runtime/onnxruntime/src/ct-transformer-online.cpp
index 769bb65..83c0327 100644
--- a/runtime/onnxruntime/src/ct-transformer-online.cpp
+++ b/runtime/onnxruntime/src/ct-transformer-online.cpp
@@ -42,6 +42,11 @@
vector<int> InputData;
string strText; //full_text
strText = accumulate(arr_cache.begin(), arr_cache.end(), strText);
+
+ // 濡傛灉涓婁竴鍙ョ殑缁撳熬鏄嫳璇瓧姣嶏紝骞朵笖杩欎竴鍙ョ殑寮�濮嬩篃鏄嫳璇瓧姣嶏紝搴旇娣诲姞绌烘牸
+ if ((strText.size() > 0 and !(strText[strText.size()-1] & 0x80)) && (strlen(sz_input) > 0 && !(sz_input[0] & 0x80)))
+ strText += " ";
+
strText += sz_input; // full_text = precache + text
m_tokenizer.Tokenize(strText.c_str(), strOut, InputData);
@@ -107,7 +112,7 @@
{
if (!(sentence_words_list[i][0] & 0x80) && (i + 1) < sentence_words_list.size() && !(sentence_words_list[i + 1][0] & 0x80))
{
- sentence_words_list[i] = " " + sentence_words_list[i];
+ sentence_words_list[i] = sentence_words_list[i] + " ";
}
if (nSkipNum < arr_cache.size()) // if skip_num < len(cache):
nSkipNum++;
--
Gitblit v1.9.1