From 2a66366be4c2715870e4859fd5a5db6e8a9dc00a Mon Sep 17 00:00:00 2001
From: chenmengzheAAA <123789350+chenmengzheAAA@users.noreply.github.com>
Date: 星期四, 14 九月 2023 19:00:17 +0800
Subject: [PATCH] Merge pull request #956 from alibaba-damo-academy/chenmengzheAAA-patch-4

---
 funasr/bin/asr_inference_launch.py |   20 +++++++++++++-------
 1 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/funasr/bin/asr_inference_launch.py b/funasr/bin/asr_inference_launch.py
index ffb0b26..e749ddf 100644
--- a/funasr/bin/asr_inference_launch.py
+++ b/funasr/bin/asr_inference_launch.py
@@ -236,6 +236,7 @@
         timestamp_infer_config: Union[Path, str] = None,
         timestamp_model_file: Union[Path, str] = None,
         param_dict: dict = None,
+        decoding_ind: int = 0,
         **kwargs,
 ):
     ncpu = kwargs.get("ncpu", 1)
@@ -290,6 +291,7 @@
         nbest=nbest,
         hotword_list_or_file=hotword_list_or_file,
         clas_scale=clas_scale,
+        decoding_ind=decoding_ind,
     )
 
     speech2text = Speech2TextParaformer(**speech2text_kwargs)
@@ -312,6 +314,7 @@
             **kwargs,
     ):
 
+        decoding_ind = None
         hotword_list_or_file = None
         if param_dict is not None:
             hotword_list_or_file = param_dict.get('hotword')
@@ -319,6 +322,8 @@
             hotword_list_or_file = kwargs['hotword']
         if hotword_list_or_file is not None or 'hotword' in kwargs:
             speech2text.hotword_list = speech2text.generate_hotwords_list(hotword_list_or_file)
+        if param_dict is not None and "decoding_ind" in param_dict:
+            decoding_ind = param_dict["decoding_ind"]
 
         # 3. Build data-iterator
         if data_path_and_name_and_type is None and raw_inputs is not None:
@@ -365,6 +370,7 @@
             # N-best list of (text, token, token_int, hyp_object)
 
             time_beg = time.time()
+            batch["decoding_ind"] = decoding_ind
             results = speech2text(**batch)
             if len(results) < 1:
                 hyp = Hypothesis(score=0.0, scores={}, states={}, yseq=[])
@@ -421,7 +427,7 @@
                         else:
                             text_postprocessed, word_lists = postprocessed_result[0], postprocessed_result[1]
                         item = {'key': key, 'value': text_postprocessed}
-                        if timestamp_postprocessed != "" or len(timestamp) == 0:
+                        if timestamp_postprocessed != "":
                             item['timestamp'] = timestamp_postprocessed
                         asr_result_list.append(item)
                         finish_count += 1
@@ -711,7 +717,7 @@
             item = {'key': key, 'value': text_postprocessed_punc}
             if text_postprocessed != "":
                 item['text_postprocessed'] = text_postprocessed
-            if time_stamp_postprocessed != "" or len(time_stamp) == 0:
+            if time_stamp_postprocessed != "":
                 item['time_stamp'] = time_stamp_postprocessed
 
             item['sentences'] = time_stamp_sentence(punc_id_list, time_stamp_postprocessed, text_postprocessed)
@@ -1289,7 +1295,7 @@
         quantize_modules: Optional[List[str]] = None,
         quantize_dtype: Optional[str] = "float16",
         streaming: Optional[bool] = False,
-        simu_streaming: Optional[bool] = False,
+        fake_streaming: Optional[bool] = False,
         full_utt: Optional[bool] = False,
         chunk_size: Optional[int] = 16,
         left_context: Optional[int] = 16,
@@ -1366,7 +1372,7 @@
         quantize_modules=quantize_modules,
         quantize_dtype=quantize_dtype,
         streaming=streaming,
-        simu_streaming=simu_streaming,
+        fake_streaming=fake_streaming,
         full_utt=full_utt,
         chunk_size=chunk_size,
         left_context=left_context,
@@ -1424,8 +1430,8 @@
                     final_hyps = speech2text.streaming_decode(
                         speech[_end: len(speech)], is_final=True
                     )
-                elif speech2text.simu_streaming:
-                    final_hyps = speech2text.simu_streaming_decode(**batch)
+                elif speech2text.fake_streaming:
+                    final_hyps = speech2text.fake_streaming_decode(**batch)
                 elif speech2text.full_utt:
                     final_hyps = speech2text.full_utt_decode(**batch)
                 else:
@@ -1815,7 +1821,7 @@
     group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight")
     group.add_argument("--ngram_weight", type=float, default=0.9, help="ngram weight")
     group.add_argument("--streaming", type=str2bool, default=False)
-    group.add_argument("--simu_streaming", type=str2bool, default=False)
+    group.add_argument("--fake_streaming", type=str2bool, default=False)
     group.add_argument("--full_utt", type=str2bool, default=False)
     group.add_argument("--chunk_size", type=int, default=16)
     group.add_argument("--left_context", type=int, default=16)

--
Gitblit v1.9.1