funasr/datasets/sense_voice_datasets/datasets.py
@@ -325,8 +325,8 @@ asr_target = item["target"] if self.preprocessor_text: asr_target = self.preprocessor_text(asr_target) emo_target = item["emo_target"] event_target = item["event_target"] emo_target = item.get("emo_target", "<|NEUTRAL|>") event_target = item.get("event_target", "<|Speech|>") text_language = item.get("text_language", "<|zh|>") punc_itn_bottom = item.get("with_or_wo_itn", "<|woitn|>")