From 4ace5a95b052d338947fc88809a440ccd55cf6b4 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 16 十一月 2023 16:39:52 +0800
Subject: [PATCH] funasr pages

---
 funasr/bin/asr_inference_launch.py |   57 ++++++++++++++++++++++++++++++++-------------------------
 1 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/funasr/bin/asr_inference_launch.py b/funasr/bin/asr_inference_launch.py
index e3de05b..e93d740 100644
--- a/funasr/bin/asr_inference_launch.py
+++ b/funasr/bin/asr_inference_launch.py
@@ -462,18 +462,18 @@
 
 
 def inference_paraformer_vad_punc(
-        maxlenratio: float,
-        minlenratio: float,
-        batch_size: int,
-        beam_size: int,
-        ngpu: int,
-        ctc_weight: float,
-        lm_weight: float,
-        penalty: float,
-        log_level: Union[int, str],
+        maxlenratio: float=0.0,
+        minlenratio: float=0.0,
+        batch_size: int=1,
+        beam_size: int=1,
+        ngpu: int=1,
+        ctc_weight: float=0.0,
+        lm_weight: float=0.0,
+        penalty: float=0.0,
+        log_level: Union[int, str]=logging.ERROR,
         # data_path_and_name_and_type,
-        asr_train_config: Optional[str],
-        asr_model_file: Optional[str],
+        asr_train_config: Optional[str]=None,
+        asr_model_file: Optional[str]=None,
         cmvn_file: Optional[str] = None,
         lm_train_config: Optional[str] = None,
         lm_file: Optional[str] = None,
@@ -487,7 +487,7 @@
         seed: int = 0,
         ngram_weight: float = 0.9,
         nbest: int = 1,
-        num_workers: int = 1,
+        num_workers: int = 0,
         vad_infer_config: Optional[str] = None,
         vad_model_file: Optional[str] = None,
         vad_cmvn_file: Optional[str] = None,
@@ -815,8 +815,7 @@
         format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
     )
 
-    if sv_model_file is None:
-        sv_model_file = "{}/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/campplus_cn_common.bin".format(get_cache_dir(None))
+    sv_model_file = asr_model_file.replace("model.pb", "campplus_cn_common.bin")
 
     if param_dict is not None:
         hotword_list_or_file = param_dict.get('hotword')
@@ -1099,18 +1098,18 @@
 
 
 def inference_paraformer_online(
-        maxlenratio: float,
-        minlenratio: float,
-        batch_size: int,
-        beam_size: int,
-        ngpu: int,
-        ctc_weight: float,
-        lm_weight: float,
-        penalty: float,
-        log_level: Union[int, str],
+        maxlenratio: float=0.0,
+        minlenratio: float=0.0,
+        batch_size: int=1,
+        beam_size: int=1,
+        ngpu: int=1,
+        ctc_weight: float=0.0,
+        lm_weight: float=0.0,
+        penalty: float=0.0,
+        log_level: Union[int, str]=logging.ERROR,
         # data_path_and_name_and_type,
-        asr_train_config: Optional[str],
-        asr_model_file: Optional[str],
+        asr_train_config: Optional[str]=None,
+        asr_model_file: Optional[str]=None,
         cmvn_file: Optional[str] = None,
         lm_train_config: Optional[str] = None,
         lm_file: Optional[str] = None,
@@ -2056,6 +2055,12 @@
 
     ncpu = kwargs.get("ncpu", 1)
     torch.set_num_threads(ncpu)
+    if param_dict:
+        language = param_dict.get("language", None)
+        task = param_dict.get("task", "transcribe")
+    else:
+        language = None
+        task = "transcribe"
     if batch_size > 1:
         raise NotImplementedError("batch decoding is not implemented")
     if word_lm_train_config is not None:
@@ -2099,6 +2104,8 @@
         penalty=penalty,
         nbest=nbest,
         streaming=streaming,
+        language=language,
+        task=task,
     )
     logging.info("speech2text_kwargs: {}".format(speech2text_kwargs))
     speech2text = Speech2TextWhisper(**speech2text_kwargs)

--
Gitblit v1.9.1