From c9f1b4e8a2e903f74de20d019e70307c26e93c3e Mon Sep 17 00:00:00 2001
From: shixian.shi <shixian.shi@alibaba-inc.com>
Date: 星期四, 23 十一月 2023 20:39:52 +0800
Subject: [PATCH] update
---
funasr/bin/asr_infer.py | 12 +++++++-----
1 files changed, 7 insertions(+), 5 deletions(-)
diff --git a/funasr/bin/asr_infer.py b/funasr/bin/asr_infer.py
index 8073213..7015eb8 100644
--- a/funasr/bin/asr_infer.py
+++ b/funasr/bin/asr_infer.py
@@ -38,9 +38,7 @@
from funasr.text.token_id_converter import TokenIDConverter
from funasr.torch_utils.device_funcs import to_device
from funasr.utils.timestamp_tools import ts_prediction_lfr6_standard
-from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
-from funasr.utils.whisper_utils.transcribe import transcribe
-from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
+
class Speech2Text:
"""Speech2Text class
@@ -1607,7 +1605,6 @@
feats_lengths = to_device(feats_lengths, device=self.device)
enc_out, _, _ = self.asr_model.encoder(feats, feats_lengths)
-
nbest_hyps = self.beam_search(enc_out[0])
return nbest_hyps
@@ -1923,9 +1920,10 @@
**kwargs,
):
+ from funasr.tasks.whisper import ASRTask
+
# 1. Build ASR model
scorers = {}
- from funasr.tasks.whisper import ASRTask
asr_model, asr_train_args = ASRTask.build_model_from_file(
asr_train_config, asr_model_file, cmvn_file, device
)
@@ -1985,6 +1983,10 @@
"""
+ from funasr.utils.whisper_utils.transcribe import transcribe
+ from funasr.utils.whisper_utils.audio import pad_or_trim, log_mel_spectrogram
+ from funasr.utils.whisper_utils.decoding import DecodingOptions, detect_language, decode
+
speech = speech[0]
speech = pad_or_trim(speech)
mel = log_mel_spectrogram(speech).to(self.device)
--
Gitblit v1.9.1