From d80ac2fd2df4e7fb8a28acfa512bb11472b5cc99 Mon Sep 17 00:00:00 2001
From: liugz18 <57401541+liugz18@users.noreply.github.com>
Date: 星期四, 18 七月 2024 21:34:55 +0800
Subject: [PATCH] Rename 'res' in line 514 to avoid with naming conflict with line 365
---
funasr/datasets/sense_voice_datasets/datasets.py | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/funasr/datasets/sense_voice_datasets/datasets.py b/funasr/datasets/sense_voice_datasets/datasets.py
index 6b57a9f..2e0266e 100644
--- a/funasr/datasets/sense_voice_datasets/datasets.py
+++ b/funasr/datasets/sense_voice_datasets/datasets.py
@@ -325,10 +325,10 @@
asr_target = item["target"]
if self.preprocessor_text:
asr_target = self.preprocessor_text(asr_target)
- emo_target = item["emo_target"]
- event_target = item["event_target"]
+ emo_target = item.get("emo_target", "<|NEUTRAL|>")
+ event_target = item.get("event_target", "<|Speech|>")
text_language = item.get("text_language", "<|zh|>")
- punc_itn_bottom = item.get("with_or_wo_itn", "<|SPECIAL_TOKEN_13|>")
+ punc_itn_bottom = item.get("with_or_wo_itn", "<|woitn|>")
target_ids = self.tokenizer.encode(asr_target, allowed_special="all")
target_ids_len = len(target_ids) # [text]
--
Gitblit v1.9.1