From ccac6ceea98a1bcc7c06e4c6e010159f850f32cc Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期六, 13 一月 2024 22:35:20 +0800
Subject: [PATCH] fix punc model
---
funasr/datasets/preprocessor.py | 20 --------------------
1 files changed, 0 insertions(+), 20 deletions(-)
diff --git a/funasr/datasets/preprocessor.py b/funasr/datasets/preprocessor.py
index 966cc94..f3b7d43 100644
--- a/funasr/datasets/preprocessor.py
+++ b/funasr/datasets/preprocessor.py
@@ -664,26 +664,6 @@
if self.seg_jieba:
jieba.load_userdict(seg_dict_file)
- @classmethod
- def split_words(cls, text: str):
- words = []
- segs = text.split()
- for seg in segs:
- # There is no space in seg.
- current_word = ""
- for c in seg:
- if len(c.encode()) == 1:
- # This is an ASCII char.
- current_word += c
- else:
- # This is a Chinese char.
- if len(current_word) > 0:
- words.append(current_word)
- current_word = ""
- words.append(c)
- if len(current_word) > 0:
- words.append(current_word)
- return words
@classmethod
def isEnglish(cls, text:str):
--
Gitblit v1.9.1