From a3bb4013c39faa1d006dcb4d6d87ec9a6bb3770c Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 27 二月 2024 10:06:22 +0800
Subject: [PATCH] vad

---
 funasr/models/llm_asr/model.py |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/funasr/models/llm_asr/model.py b/funasr/models/llm_asr/model.py
index e3cf551..2b6db96 100644
--- a/funasr/models/llm_asr/model.py
+++ b/funasr/models/llm_asr/model.py
@@ -216,8 +216,8 @@
         self, speech: torch.Tensor, speech_lengths: torch.Tensor, **kwargs,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
     
-        audio_mask = kwargs.get("audio_mask")
-        audio_token_lengths = audio_mask.sum(-1)
+        audio_mask = kwargs.get("audio_mask", None)
+        audio_token_lengths = audio_mask.sum(-1) if audio_mask is not None else None
 
         batch = {"speech": speech, "speech_lengths": speech_lengths}
         enc, enc_lens = self.audio_encoder.encode(**batch)
@@ -279,7 +279,7 @@
         
     
         prompt_pre = "USER: \nINSTRUCTION: {}\nINPUT: ".format(prompt)
-        prompt_ids = self.tokenizer.encode(prompt_pre)
+        prompt_ids = tokenizer.encode(prompt_pre)
         prompt_length = len(prompt_ids)
         prompt_ids = torch.tensor(prompt_ids, dtype=torch.int64).to(kwargs["device"])
 

--
Gitblit v1.9.1