北念
2024-07-17 a98550fdf51a4ab5ec82506c359f9260104484bf
fix sense_voice_datasets
1个文件已修改
2 ■■■ 已修改文件
funasr/datasets/sense_voice_datasets/datasets.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/datasets/sense_voice_datasets/datasets.py
@@ -328,7 +328,7 @@
            emo_target = item["emo_target"]
            event_target = item["event_target"]
            text_language = item.get("text_language", "<|zh|>")
            punc_itn_bottom = item.get("with_or_wo_itn", "<|SPECIAL_TOKEN_13|>")
            punc_itn_bottom = item.get("with_or_wo_itn", "<|woitn|>")
            target_ids = self.tokenizer.encode(asr_target, allowed_special="all")
            target_ids_len = len(target_ids)  # [text]