From 48693b45c021a842ea964c9dc99479b61eac062f Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 02 四月 2024 10:33:27 +0800
Subject: [PATCH] Dev gzf new (#1574)

---
 funasr/models/sense_voice/model.py |   24 +++++++++++++++---------
 1 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/funasr/models/sense_voice/model.py b/funasr/models/sense_voice/model.py
index 2822fc7..d6552a6 100644
--- a/funasr/models/sense_voice/model.py
+++ b/funasr/models/sense_voice/model.py
@@ -73,18 +73,24 @@
 
         speech = speech.to(device=kwargs["device"])[0, :, :]
         speech_lengths = speech_lengths.to(device=kwargs["device"])
-
+        
+        task = kwargs.get("task", "ASR")
+        if isinstance(task, str):
+            task = [task]
+        task = "".join([f"<|{x}|>" for x in task])
+        initial_prompt = kwargs.get("initial_prompt", f"<|startoftranscript|>{task}")
         language = kwargs.get("language", None)
-        initial_prompt = kwargs.get("initial_prompt", "<|startoftranscript|><|ASR|>")
-        # # detect the spoken language
-        # _, probs = self.model.detect_language(speech, initial_prompt=initial_prompt)
-        # print(f"Detected language: {max(probs, key=probs.get)}")
-        # language = max(probs, key=probs.get)
-        # language = language if kwargs.get("language", None) is None else kwargs.get("language")
+        language = None if language == "auto" else language
+        # if language is None:
+        #     # detect the spoken language
+        #     _, probs = self.model.detect_language(speech, initial_prompt=initial_prompt)
+        #     print(f"Detected language: {max(probs, key=probs.get)}")
+        #     language = max(probs, key=probs.get)
+        #     language = language if kwargs.get("language", None) is None else kwargs.get("language")
         
         # decode the audio
-        prompt = ""
-        initial_prompt = kwargs.get("initial_prompt", "<|startoftranscript|><|ASR|>")
+        
+        # initial_prompt = kwargs.get("initial_prompt", "<|startoftranscript|><|ASR|>")
         options = whisper.DecodingOptions(language=language, fp16=False, without_timestamps=True, initial_prompt=initial_prompt)
         result = whisper.decode(self.model, speech, options)
 

--
Gitblit v1.9.1