| | |
| | | |
| | | import numpy as np |
| | | import torch |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.build_utils.build_model_from_file import build_model_from_file |
| | | from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline |
| | |
| | | dtype: str = "float32", |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | # 1. Build vad model |
| | | vad_model, vad_infer_args = build_model_from_file( |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | text, token, token_int, hyp |
| | | |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | |
| | | feats = to_device(feats, device=self.device) |
| | | feats_len = feats_len.int() |
| | | waveforms = self.frontend.get_waveforms() |
| | | if max_end_sil == 800 and self.vad_infer_args.vad_post_conf["max_end_silence_time"] != 800: |
| | | max_end_sil = self.vad_infer_args.vad_post_conf["max_end_silence_time"] |
| | | |
| | | batch = { |
| | | "feats": feats, |