python/FunASR-XL.git - Gitblit

python / FunASR-XL

FUNASR训练

blame | 历史 | 补丁 | 提交 | 提交对比 | ignore whitespace

游雁

2023-12-27 f6b611de44c3a535befa96da552d07b0ed1b073c

 funasr/models/ct_transformer/utils.py

@@ -12,3 +12,25 @@
    if length % word_limit > 0:
        sentences.append(words[sentence_len * word_limit:])
    return sentences


def split_words(text: str):
    words = []
    segs = text.split()
    for seg in segs:
        # There is no space in seg.
        current_word = ""
        for c in seg:
            if len(c.encode()) == 1:
                # This is an ASCII char.
                current_word += c
            else:
                # This is a Chinese char.
                if len(current_word) > 0:
                    words.append(current_word)
                    current_word = ""
                words.append(c)
        if len(current_word) > 0:
            words.append(current_word)
    
    return words