From e30a17cf4e715b3d139fa1e0ba01cda1bcf0f884 Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期三, 10 一月 2024 11:23:41 +0800
Subject: [PATCH] update funasr-onnx

---
 funasr/models/ct_transformer/utils.py |   22 ++++++++++++++++++++++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/funasr/models/ct_transformer/utils.py b/funasr/models/ct_transformer/utils.py
index 0291dbc..a4a00e0 100644
--- a/funasr/models/ct_transformer/utils.py
+++ b/funasr/models/ct_transformer/utils.py
@@ -12,3 +12,25 @@
     if length % word_limit > 0:
         sentences.append(words[sentence_len * word_limit:])
     return sentences
+
+
+def split_words(text: str):
+    words = []
+    segs = text.split()
+    for seg in segs:
+        # There is no space in seg.
+        current_word = ""
+        for c in seg:
+            if len(c.encode()) == 1:
+                # This is an ASCII char.
+                current_word += c
+            else:
+                # This is a Chinese char.
+                if len(current_word) > 0:
+                    words.append(current_word)
+                    current_word = ""
+                words.append(c)
+        if len(current_word) > 0:
+            words.append(current_word)
+    
+    return words

--
Gitblit v1.9.1