From d783b24ba7d8a03dabfa2139fcbf40c216e0ea3d Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 16 三月 2023 19:34:52 +0800
Subject: [PATCH] Merge pull request #199 from alibaba-damo-academy/dev_xw
---
funasr/punctuation/text_preprocessor.py | 21 ---------------------
1 files changed, 0 insertions(+), 21 deletions(-)
diff --git a/funasr/punctuation/text_preprocessor.py b/funasr/punctuation/text_preprocessor.py
index 3d2c19e..c9c4bac 100644
--- a/funasr/punctuation/text_preprocessor.py
+++ b/funasr/punctuation/text_preprocessor.py
@@ -1,24 +1,3 @@
-def split_words(text: str):
- words = []
- segs = text.split()
- for seg in segs:
- # There is no space in seg.
- current_word = ""
- for c in seg:
- if len(c.encode()) == 1:
- # This is an ASCII char.
- current_word += c
- else:
- # This is a Chinese char.
- if len(current_word) > 0:
- words.append(current_word)
- current_word = ""
- words.append(c)
- if len(current_word) > 0:
- words.append(current_word)
- return words
-
-
def split_to_mini_sentence(words: list, word_limit: int = 20):
assert word_limit > 1
if len(words) <= word_limit:
--
Gitblit v1.9.1