From fb45c9a6ef4c5f94d8b36abafca072f62aff9b5f Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 15 五月 2024 17:32:07 +0800
Subject: [PATCH] hf hub

---
 funasr/datasets/sense_voice_datasets/datasets.py |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/funasr/datasets/sense_voice_datasets/datasets.py b/funasr/datasets/sense_voice_datasets/datasets.py
index 5d80956..ee2f13d 100644
--- a/funasr/datasets/sense_voice_datasets/datasets.py
+++ b/funasr/datasets/sense_voice_datasets/datasets.py
@@ -112,7 +112,7 @@
 
             eos = self.tokenizer.encode(self.eos, allowed_special="all")  # [eos]
 
-            ids = prompt_ids + target_ids + eos
+            ids = prompt_ids + target_ids + eos  # [sos, task, lid, text, eos]
             ids_lengths = len(ids)
 
             text = torch.tensor(ids, dtype=torch.int64)

--
Gitblit v1.9.1