From a98550fdf51a4ab5ec82506c359f9260104484bf Mon Sep 17 00:00:00 2001
From: 北念 <lzr265946@alibaba-inc.com>
Date: 星期三, 17 七月 2024 16:05:58 +0800
Subject: [PATCH] fix sense_voice_datasets
---
funasr/datasets/sense_voice_datasets/datasets.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/funasr/datasets/sense_voice_datasets/datasets.py b/funasr/datasets/sense_voice_datasets/datasets.py
index 6b57a9f..20232cc 100644
--- a/funasr/datasets/sense_voice_datasets/datasets.py
+++ b/funasr/datasets/sense_voice_datasets/datasets.py
@@ -328,7 +328,7 @@
emo_target = item["emo_target"]
event_target = item["event_target"]
text_language = item.get("text_language", "<|zh|>")
- punc_itn_bottom = item.get("with_or_wo_itn", "<|SPECIAL_TOKEN_13|>")
+ punc_itn_bottom = item.get("with_or_wo_itn", "<|woitn|>")
target_ids = self.tokenizer.encode(asr_target, allowed_special="all")
target_ids_len = len(target_ids) # [text]
--
Gitblit v1.9.1