| | |
| | | ibest_writer["rtf"][key] = rtf_cur |
| | | |
| | | if text is not None: |
| | | if use_timestamp and timestamp is not None: |
| | | if use_timestamp and timestamp is not None and len(timestamp): |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token, timestamp) |
| | | else: |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token) |
| | |
| | | text, token, token_int = result[0], result[1], result[2] |
| | | time_stamp = result[4] if len(result[4]) > 0 else None |
| | | |
| | | if use_timestamp and time_stamp is not None: |
| | | if use_timestamp and time_stamp is not None and len(time_stamp): |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp) |
| | | else: |
| | | postprocessed_result = postprocess_utils.sentence_postprocess(token) |
| | |
| | | quantize_modules: Optional[List[str]] = None, |
| | | quantize_dtype: Optional[str] = "float16", |
| | | streaming: Optional[bool] = False, |
| | | simu_streaming: Optional[bool] = False, |
| | | fake_streaming: Optional[bool] = False, |
| | | full_utt: Optional[bool] = False, |
| | | chunk_size: Optional[int] = 16, |
| | | left_context: Optional[int] = 16, |
| | |
| | | quantize_modules=quantize_modules, |
| | | quantize_dtype=quantize_dtype, |
| | | streaming=streaming, |
| | | simu_streaming=simu_streaming, |
| | | fake_streaming=fake_streaming, |
| | | full_utt=full_utt, |
| | | chunk_size=chunk_size, |
| | | left_context=left_context, |
| | |
| | | final_hyps = speech2text.streaming_decode( |
| | | speech[_end: len(speech)], is_final=True |
| | | ) |
| | | elif speech2text.simu_streaming: |
| | | final_hyps = speech2text.simu_streaming_decode(**batch) |
| | | elif speech2text.fake_streaming: |
| | | final_hyps = speech2text.fake_streaming_decode(**batch) |
| | | elif speech2text.full_utt: |
| | | final_hyps = speech2text.full_utt_decode(**batch) |
| | | else: |
| | |
| | | default=1, |
| | | help="The batch size for inference", |
| | | ) |
| | | group.add_argument( |
| | | "--decoding_ind", |
| | | type=int, |
| | | default=0, |
| | | help="chunk select for chunk encoder", |
| | | ) |
| | | group.add_argument("--nbest", type=int, default=5, help="Output N-best hypotheses") |
| | | group.add_argument("--beam_size", type=int, default=20, help="Beam size") |
| | | group.add_argument("--penalty", type=float, default=0.0, help="Insertion penalty") |
| | |
| | | group.add_argument("--lm_weight", type=float, default=1.0, help="RNNLM weight") |
| | | group.add_argument("--ngram_weight", type=float, default=0.9, help="ngram weight") |
| | | group.add_argument("--streaming", type=str2bool, default=False) |
| | | group.add_argument("--simu_streaming", type=str2bool, default=False) |
| | | group.add_argument("--fake_streaming", type=str2bool, default=False) |
| | | group.add_argument("--full_utt", type=str2bool, default=False) |
| | | group.add_argument("--chunk_size", type=int, default=16) |
| | | group.add_argument("--left_context", type=int, default=16) |