From a65016e23e6c829d61c63a68e27b24abf86e926a Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 16 四月 2024 10:09:03 +0800
Subject: [PATCH] Dev gzf exp (#1618)

---
 funasr/models/sense_voice/model.py |   35 +++++++++++++++++++----------------
 1 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/funasr/models/sense_voice/model.py b/funasr/models/sense_voice/model.py
index d6552a6..4ee2fa5 100644
--- a/funasr/models/sense_voice/model.py
+++ b/funasr/models/sense_voice/model.py
@@ -73,29 +73,32 @@
 
         speech = speech.to(device=kwargs["device"])[0, :, :]
         speech_lengths = speech_lengths.to(device=kwargs["device"])
-        
-        task = kwargs.get("task", "ASR")
+
+        DecodingOptions = kwargs.get("DecodingOptions", {})
+        task = DecodingOptions.get("task", "ASR")
         if isinstance(task, str):
             task = [task]
         task = "".join([f"<|{x}|>" for x in task])
         initial_prompt = kwargs.get("initial_prompt", f"<|startoftranscript|>{task}")
-        language = kwargs.get("language", None)
+        DecodingOptions["initial_prompt"] = initial_prompt
+        
+        language = DecodingOptions.get("language", None)
         language = None if language == "auto" else language
-        # if language is None:
-        #     # detect the spoken language
-        #     _, probs = self.model.detect_language(speech, initial_prompt=initial_prompt)
-        #     print(f"Detected language: {max(probs, key=probs.get)}")
-        #     language = max(probs, key=probs.get)
-        #     language = language if kwargs.get("language", None) is None else kwargs.get("language")
-        
-        # decode the audio
-        
-        # initial_prompt = kwargs.get("initial_prompt", "<|startoftranscript|><|ASR|>")
-        options = whisper.DecodingOptions(language=language, fp16=False, without_timestamps=True, initial_prompt=initial_prompt)
-        result = whisper.decode(self.model, speech, options)
+        DecodingOptions["language"] = language
 
+        DecodingOptions["vocab_path"] = kwargs.get("vocab_path", None)
+        
+        
+        if "without_timestamps" not in DecodingOptions:
+            DecodingOptions["without_timestamps"] = True
+
+    
+        options = whisper.DecodingOptions(**DecodingOptions)
+        
+        result = whisper.decode(self.model, speech, options)
+        text = f"{result.text}"
         results = []
-        result_i = {"key": key[0], "text": result.text}
+        result_i = {"key": key[0], "text": text}
 
         results.append(result_i)
     

--
Gitblit v1.9.1