From 70027f0eadaa89f1641d86199cbe5d8664f10bdf Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 11 四月 2024 19:46:33 +0800
Subject: [PATCH] add
---
funasr/models/whisper/model.py | 8 ++++----
examples/industrial_data_pretraining/whisper/demo.py | 11 +++++++++--
2 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/examples/industrial_data_pretraining/whisper/demo.py b/examples/industrial_data_pretraining/whisper/demo.py
index 3d317f8..c94678f 100644
--- a/examples/industrial_data_pretraining/whisper/demo.py
+++ b/examples/industrial_data_pretraining/whisper/demo.py
@@ -12,9 +12,16 @@
vad_kwargs={"max_single_segment_time": 30000},
)
+DecodingOptions = {
+ "task": "transcribe",
+ "language": None,
+ "beam_size": None,
+ "fp16": True,
+ "without_timestamps": False,
+ "prompt": None,
+ }
res = model.generate(
- language=None,
- task="transcribe",
+ DecodingOptions=DecodingOptions,
batch_size_s=0,
input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
diff --git a/funasr/models/whisper/model.py b/funasr/models/whisper/model.py
index 35de1c9..51fde81 100644
--- a/funasr/models/whisper/model.py
+++ b/funasr/models/whisper/model.py
@@ -90,12 +90,12 @@
speech = speech.to(device=kwargs["device"])[0, :, :]
speech_lengths = speech_lengths.to(device=kwargs["device"])
- # detect the spoken language
- _, probs = self.model.detect_language(speech)
- print(f"Detected language: {max(probs, key=probs.get)}")
+ # # detect the spoken language
+ # _, probs = self.model.detect_language(speech)
+ # print(f"Detected language: {max(probs, key=probs.get)}")
# decode the audio
- options = whisper.DecodingOptions(language=kwargs.get("language", None), fp16=False)
+ options = whisper.DecodingOptions(**kwargs.get("DecodingOptions", {}))
result = whisper.decode(self.model, speech, options)
results = []
--
Gitblit v1.9.1