From b9bcf1f093c3053fdc4e2cf4a1d38e27bbf429fb Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 19 十月 2023 14:03:48 +0800
Subject: [PATCH] docs
---
funasr/bin/asr_infer.py | 19 +++++++++++++------
1 files changed, 13 insertions(+), 6 deletions(-)
diff --git a/funasr/bin/asr_infer.py b/funasr/bin/asr_infer.py
index 43da8bf..4648fb3 100644
--- a/funasr/bin/asr_infer.py
+++ b/funasr/bin/asr_infer.py
@@ -38,9 +38,7 @@
from funasr.text.token_id_converter import TokenIDConverter
from funasr.torch_utils.device_funcs import to_device
from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard
-from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
-from funasr.utils.whisper_utils.transcribe import transcribe
-from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
+
class Speech2Text:
"""Speech2Text class
@@ -1918,12 +1916,15 @@
nbest: int = 1,
streaming: bool = False,
frontend_conf: dict = None,
+ language: str = None,
+ task: str = "transcribe",
**kwargs,
):
+ from funasr.tasks.whisper import ASRTask
+
# 1. Build ASR model
scorers = {}
- from funasr.tasks.whisper import ASRTask
asr_model, asr_train_args = ASRTask.build_model_from_file(
asr_train_config, asr_model_file, cmvn_file, device
)
@@ -1960,6 +1961,8 @@
self.device = device
self.dtype = dtype
self.frontend = frontend
+ self.language = language
+ self.task = task
@torch.no_grad()
def __call__(
@@ -1981,15 +1984,19 @@
"""
+ from funasr.utils.whisper_utils.transcribe import transcribe
+ from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
+ from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
+
speech = speech[0]
speech = pad_or_trim(speech)
mel = log_mel_spectrogram(speech).to(self.device)
if self.asr_model.is_multilingual:
- options = DecodingOptions(fp16=False)
+ options = DecodingOptions(fp16=False, language=self.language, task=self.task)
asr_res = decode(self.asr_model, mel, options)
text = asr_res.text
- language = asr_res.language
+ language = self.language if self.language else asr_res.language
else:
asr_res = transcribe(self.asr_model, speech, fp16=False)
text = asr_res["text"]
--
Gitblit v1.9.1