From 4ace5a95b052d338947fc88809a440ccd55cf6b4 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 16 十一月 2023 16:39:52 +0800
Subject: [PATCH] funasr pages
---
funasr/bin/asr_inference_launch.py | 57 ++++++++++++++++++++++++++++++++-------------------------
1 files changed, 32 insertions(+), 25 deletions(-)
diff --git a/funasr/bin/asr_inference_launch.py b/funasr/bin/asr_inference_launch.py
index e3de05b..e93d740 100644
--- a/funasr/bin/asr_inference_launch.py
+++ b/funasr/bin/asr_inference_launch.py
@@ -462,18 +462,18 @@
def inference_paraformer_vad_punc(
- maxlenratio: float,
- minlenratio: float,
- batch_size: int,
- beam_size: int,
- ngpu: int,
- ctc_weight: float,
- lm_weight: float,
- penalty: float,
- log_level: Union[int, str],
+ maxlenratio: float=0.0,
+ minlenratio: float=0.0,
+ batch_size: int=1,
+ beam_size: int=1,
+ ngpu: int=1,
+ ctc_weight: float=0.0,
+ lm_weight: float=0.0,
+ penalty: float=0.0,
+ log_level: Union[int, str]=logging.ERROR,
# data_path_and_name_and_type,
- asr_train_config: Optional[str],
- asr_model_file: Optional[str],
+ asr_train_config: Optional[str]=None,
+ asr_model_file: Optional[str]=None,
cmvn_file: Optional[str] = None,
lm_train_config: Optional[str] = None,
lm_file: Optional[str] = None,
@@ -487,7 +487,7 @@
seed: int = 0,
ngram_weight: float = 0.9,
nbest: int = 1,
- num_workers: int = 1,
+ num_workers: int = 0,
vad_infer_config: Optional[str] = None,
vad_model_file: Optional[str] = None,
vad_cmvn_file: Optional[str] = None,
@@ -815,8 +815,7 @@
format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
)
- if sv_model_file is None:
- sv_model_file = "{}/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/campplus_cn_common.bin".format(get_cache_dir(None))
+ sv_model_file = asr_model_file.replace("model.pb", "campplus_cn_common.bin")
if param_dict is not None:
hotword_list_or_file = param_dict.get('hotword')
@@ -1099,18 +1098,18 @@
def inference_paraformer_online(
- maxlenratio: float,
- minlenratio: float,
- batch_size: int,
- beam_size: int,
- ngpu: int,
- ctc_weight: float,
- lm_weight: float,
- penalty: float,
- log_level: Union[int, str],
+ maxlenratio: float=0.0,
+ minlenratio: float=0.0,
+ batch_size: int=1,
+ beam_size: int=1,
+ ngpu: int=1,
+ ctc_weight: float=0.0,
+ lm_weight: float=0.0,
+ penalty: float=0.0,
+ log_level: Union[int, str]=logging.ERROR,
# data_path_and_name_and_type,
- asr_train_config: Optional[str],
- asr_model_file: Optional[str],
+ asr_train_config: Optional[str]=None,
+ asr_model_file: Optional[str]=None,
cmvn_file: Optional[str] = None,
lm_train_config: Optional[str] = None,
lm_file: Optional[str] = None,
@@ -2056,6 +2055,12 @@
ncpu = kwargs.get("ncpu", 1)
torch.set_num_threads(ncpu)
+ if param_dict:
+ language = param_dict.get("language", None)
+ task = param_dict.get("task", "transcribe")
+ else:
+ language = None
+ task = "transcribe"
if batch_size > 1:
raise NotImplementedError("batch decoding is not implemented")
if word_lm_train_config is not None:
@@ -2099,6 +2104,8 @@
penalty=penalty,
nbest=nbest,
streaming=streaming,
+ language=language,
+ task=task,
)
logging.info("speech2text_kwargs: {}".format(speech2text_kwargs))
speech2text = Speech2TextWhisper(**speech2text_kwargs)
--
Gitblit v1.9.1