From 0fd9640ced9c8ae9af43e5300068a8837d8ce26e Mon Sep 17 00:00:00 2001
From: 九耳 <mengzhe.cmz@alibaba-inc.com>
Date: 星期四, 30 三月 2023 16:48:55 +0800
Subject: [PATCH] fix
---
funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py | 19 +++++++++++++++++++
1 files changed, 19 insertions(+), 0 deletions(-)
diff --git a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
index 63bc0e4..0df954e 100644
--- a/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
+++ b/funasr/runtime/python/onnxruntime/funasr_onnx/utils/utils.py
@@ -228,6 +228,25 @@
sentences.append(words[sentence_len * word_limit:])
return sentences
+def code_mix_split_words(text: str):
+ words = []
+ segs = text.split()
+ for seg in segs:
+ # There is no space in seg.
+ current_word = ""
+ for c in seg:
+ if len(c.encode()) == 1:
+ # This is an ASCII char.
+ current_word += c
+ else:
+ # This is a Chinese char.
+ if len(current_word) > 0:
+ words.append(current_word)
+ current_word = ""
+ words.append(c)
+ if len(current_word) > 0:
+ words.append(current_word)
+ return words
def read_yaml(yaml_path: Union[str, Path]) -> Dict:
if not Path(yaml_path).exists():
--
Gitblit v1.9.1