From 2a66366be4c2715870e4859fd5a5db6e8a9dc00a Mon Sep 17 00:00:00 2001
From: chenmengzheAAA <123789350+chenmengzheAAA@users.noreply.github.com>
Date: 星期四, 14 九月 2023 19:00:17 +0800
Subject: [PATCH] Merge pull request #956 from alibaba-damo-academy/chenmengzheAAA-patch-4
---
funasr/bin/asr_inference_launch.py | 20 +++++++-------------
1 files changed, 7 insertions(+), 13 deletions(-)
diff --git a/funasr/bin/asr_inference_launch.py b/funasr/bin/asr_inference_launch.py
index 6fa57a7..e749ddf 100644
--- a/funasr/bin/asr_inference_launch.py
+++ b/funasr/bin/asr_inference_launch.py
@@ -415,7 +415,7 @@
ibest_writer["rtf"][key] = rtf_cur
if text is not None:
- if use_timestamp and timestamp is not None:
+ if use_timestamp and timestamp is not None and len(timestamp):
postprocessed_result = postprocess_utils.sentence_postprocess(token, timestamp)
else:
postprocessed_result = postprocess_utils.sentence_postprocess(token)
@@ -692,7 +692,7 @@
text, token, token_int = result[0], result[1], result[2]
time_stamp = result[4] if len(result[4]) > 0 else None
- if use_timestamp and time_stamp is not None:
+ if use_timestamp and time_stamp is not None and len(time_stamp):
postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
else:
postprocessed_result = postprocess_utils.sentence_postprocess(token)
@@ -1295,7 +1295,7 @@
quantize_modules: Optional[List[str]] = None,
quantize_dtype: Optional[str] = "float16",
streaming: Optional[bool] = False,
- simu_streaming: Optional[bool] = False,
+ fake_streaming: Optional[bool] = False,
full_utt: Optional[bool] = False,
chunk_size: Optional[int] = 16,
left_context: Optional[int] = 16,
@@ -1372,7 +1372,7 @@
quantize_modules=quantize_modules,
quantize_dtype=quantize_dtype,
streaming=streaming,
- simu_streaming=simu_streaming,
+ fake_streaming=fake_streaming,
full_utt=full_utt,
chunk_size=chunk_size,
left_context=left_context,
@@ -1430,8 +1430,8 @@
final_hyps = speech2text.streaming_decode(
speech[_end: len(speech)], is_final=True
)
- elif speech2text.simu_streaming:
- final_hyps = speech2text.simu_streaming_decode(**batch)
+ elif speech2text.fake_streaming:
+ final_hyps = speech2text.fake_streaming_decode(**batch)
elif speech2text.full_utt:
final_hyps = speech2text.full_utt_decode(**batch)
else:
@@ -1792,12 +1792,6 @@
default=1,
help="The batch size for inference",
)
- group.add_argument(
- "--decoding_ind",
- type=int,
- default=0,
- help="chunk select for chunk encoder",
- )
group.add_argument("--nbest", type=int, default=5, help="Output N-best hypotheses")
group.add_argument("--beam_size", type=int, default=20, help="Beam size")
group.add_argument("--penalty", type=float, default=0.0, help="Insertion penalty")
@@ -1827,7 +1821,7 @@
group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight")
group.add_argument("--ngram_weight", type=float, default=0.9, help="ngram weight")
group.add_argument("--streaming", type=str2bool, default=False)
- group.add_argument("--simu_streaming", type=str2bool, default=False)
+ group.add_argument("--fake_streaming", type=str2bool, default=False)
group.add_argument("--full_utt", type=str2bool, default=False)
group.add_argument("--chunk_size", type=int, default=16)
group.add_argument("--left_context", type=int, default=16)
--
Gitblit v1.9.1