From ae04f1e6c06d2829f218954470abe5c0348f75a6 Mon Sep 17 00:00:00 2001
From: root <wucong.lyb@alibaba-inc.com>
Date: 星期一, 13 五月 2024 19:41:39 +0800
Subject: [PATCH] update runtime_sdk_download_tool.py
---
funasr/datasets/sense_voice_datasets/datasets.py | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/funasr/datasets/sense_voice_datasets/datasets.py b/funasr/datasets/sense_voice_datasets/datasets.py
index 5d80956..ee2f13d 100644
--- a/funasr/datasets/sense_voice_datasets/datasets.py
+++ b/funasr/datasets/sense_voice_datasets/datasets.py
@@ -112,7 +112,7 @@
eos = self.tokenizer.encode(self.eos, allowed_special="all") # [eos]
- ids = prompt_ids + target_ids + eos
+ ids = prompt_ids + target_ids + eos # [sos, task, lid, text, eos]
ids_lengths = len(ids)
text = torch.tensor(ids, dtype=torch.int64)
--
Gitblit v1.9.1