chenmengzheAAA
2023-09-14 2a66366be4c2715870e4859fd5a5db6e8a9dc00a
funasr/bin/asr_inference_launch.py
@@ -415,7 +415,7 @@
                        ibest_writer["rtf"][key] = rtf_cur
                    if text is not None:
                        if use_timestamp and timestamp is not None:
                        if use_timestamp and timestamp is not None and len(timestamp):
                            postprocessed_result = postprocess_utils.sentence_postprocess(token, timestamp)
                        else:
                            postprocessed_result = postprocess_utils.sentence_postprocess(token)
@@ -692,7 +692,7 @@
            text, token, token_int = result[0], result[1], result[2]
            time_stamp = result[4] if len(result[4]) > 0 else None
            if use_timestamp and time_stamp is not None:
            if use_timestamp and time_stamp is not None and len(time_stamp):
                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
            else:
                postprocessed_result = postprocess_utils.sentence_postprocess(token)
@@ -1295,7 +1295,7 @@
        quantize_modules: Optional[List[str]] = None,
        quantize_dtype: Optional[str] = "float16",
        streaming: Optional[bool] = False,
        simu_streaming: Optional[bool] = False,
        fake_streaming: Optional[bool] = False,
        full_utt: Optional[bool] = False,
        chunk_size: Optional[int] = 16,
        left_context: Optional[int] = 16,
@@ -1372,7 +1372,7 @@
        quantize_modules=quantize_modules,
        quantize_dtype=quantize_dtype,
        streaming=streaming,
        simu_streaming=simu_streaming,
        fake_streaming=fake_streaming,
        full_utt=full_utt,
        chunk_size=chunk_size,
        left_context=left_context,
@@ -1430,8 +1430,8 @@
                    final_hyps = speech2text.streaming_decode(
                        speech[_end: len(speech)], is_final=True
                    )
                elif speech2text.simu_streaming:
                    final_hyps = speech2text.simu_streaming_decode(**batch)
                elif speech2text.fake_streaming:
                    final_hyps = speech2text.fake_streaming_decode(**batch)
                elif speech2text.full_utt:
                    final_hyps = speech2text.full_utt_decode(**batch)
                else:
@@ -1792,12 +1792,6 @@
        default=1,
        help="The batch size for inference",
    )
    group.add_argument(
        "--decoding_ind",
        type=int,
        default=0,
        help="chunk select for chunk encoder",
    )
    group.add_argument("--nbest", type=int, default=5, help="Output N-best hypotheses")
    group.add_argument("--beam_size", type=int, default=20, help="Beam size")
    group.add_argument("--penalty", type=float, default=0.0, help="Insertion penalty")
@@ -1827,7 +1821,7 @@
    group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight")
    group.add_argument("--ngram_weight", type=float, default=0.9, help="ngram weight")
    group.add_argument("--streaming", type=str2bool, default=False)
    group.add_argument("--simu_streaming", type=str2bool, default=False)
    group.add_argument("--fake_streaming", type=str2bool, default=False)
    group.add_argument("--full_utt", type=str2bool, default=False)
    group.add_argument("--chunk_size", type=int, default=16)
    group.add_argument("--left_context", type=int, default=16)