From 57e023e5cfa522007e24d87c8e3d82bf7c4a19cd Mon Sep 17 00:00:00 2001
From: Shi Xian <40013335+R1ckShi@users.noreply.github.com>
Date: 星期二, 18 二月 2025 15:15:40 +0800
Subject: [PATCH] Merge pull request #2382 from msgk239/dev_clean

---
 funasr/datasets/sense_voice_datasets/datasets.py |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/funasr/datasets/sense_voice_datasets/datasets.py b/funasr/datasets/sense_voice_datasets/datasets.py
index 6b57a9f..2e0266e 100644
--- a/funasr/datasets/sense_voice_datasets/datasets.py
+++ b/funasr/datasets/sense_voice_datasets/datasets.py
@@ -325,10 +325,10 @@
             asr_target = item["target"]
             if self.preprocessor_text:
                 asr_target = self.preprocessor_text(asr_target)
-            emo_target = item["emo_target"]
-            event_target = item["event_target"]
+            emo_target = item.get("emo_target", "<|NEUTRAL|>")
+            event_target = item.get("event_target", "<|Speech|>")
             text_language = item.get("text_language", "<|zh|>")
-            punc_itn_bottom = item.get("with_or_wo_itn", "<|SPECIAL_TOKEN_13|>")
+            punc_itn_bottom = item.get("with_or_wo_itn", "<|woitn|>")
 
             target_ids = self.tokenizer.encode(asr_target, allowed_special="all")
             target_ids_len = len(target_ids)  # [text]

--
Gitblit v1.9.1