游雁
2024-01-13 ccac6ceea98a1bcc7c06e4c6e010159f850f32cc
funasr/datasets/preprocessor.py
@@ -664,26 +664,6 @@
        if self.seg_jieba:
            jieba.load_userdict(seg_dict_file)
    @classmethod
    def split_words(cls, text: str):
        words = []
        segs = text.split()
        for seg in segs:
            # There is no space in seg.
            current_word = ""
            for c in seg:
                if len(c.encode()) == 1:
                    # This is an ASCII char.
                    current_word += c
                else:
                    # This is a Chinese char.
                    if len(current_word) > 0:
                        words.append(current_word)
                        current_word = ""
                    words.append(c)
            if len(current_word) > 0:
                words.append(current_word)
        return words
    @classmethod
    def isEnglish(cls, text:str):