From cfe577f16fef9fb5b0a48f07d4f9e232799cc9d4 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 08 五月 2024 00:03:52 +0800
Subject: [PATCH] decoding key
---
funasr/models/sense_voice/model.py | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/funasr/models/sense_voice/model.py b/funasr/models/sense_voice/model.py
index dcf18fd..0230638 100644
--- a/funasr/models/sense_voice/model.py
+++ b/funasr/models/sense_voice/model.py
@@ -803,7 +803,10 @@
tokenizer=tokenizer,
)
- if len(kwargs.get("data_type", [])) > 1:
+ if (
+ isinstance(kwargs.get("data_type", None), (list, tuple))
+ and len(kwargs.get("data_type", [])) > 1
+ ):
audio_sample_list, text_token_int_list = audio_sample_list
text_token_int = text_token_int_list[0]
else:
@@ -857,7 +860,7 @@
ys_pad = torch.tensor(sos_int + text_token_int, dtype=torch.int64).to(kwargs["device"])[
None, :
]
- ys_pad_lens = torch.tensor([len(text_token_int)], dtype=torch.int64).to(
+ ys_pad_lens = torch.tensor([len(sos_int + text_token_int)], dtype=torch.int64).to(
kwargs["device"]
)[None, :]
decoder_out = self.model.decoder(
--
Gitblit v1.9.1