| | |
| | | import numpy as np |
| | | import torch |
| | | import torchaudio |
| | | import soundfile |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | |
| | | from funasr.bin.asr_infer import Speech2Text |
| | | from funasr.bin.asr_infer import Speech2TextMFCCA |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | |
| | | if word_lm_train_config is not None: |
| | | raise NotImplementedError("Word LM is not implemented") |
| | |
| | | raw_inputs = _load_bytes(data_path_and_name_and_type[0]) |
| | | raw_inputs = torch.tensor(raw_inputs) |
| | | if data_path_and_name_and_type is not None and data_path_and_name_and_type[2] == "sound": |
| | | raw_inputs = torchaudio.load(data_path_and_name_and_type[0])[0][0] |
| | | try: |
| | | raw_inputs = torchaudio.load(data_path_and_name_and_type[0])[0][0] |
| | | except: |
| | | raw_inputs = soundfile.read(data_path_and_name_and_type[0], dtype='float32')[0] |
| | | if raw_inputs.ndim == 2: |
| | | raw_inputs = raw_inputs[:, 0] |
| | | raw_inputs = torch.tensor(raw_inputs) |
| | | if data_path_and_name_and_type is None and raw_inputs is not None: |
| | | if isinstance(raw_inputs, np.ndarray): |
| | | raw_inputs = torch.tensor(raw_inputs) |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | ncpu = kwargs.get("ncpu", 1) |
| | | torch.set_num_threads(ncpu) |
| | | if batch_size > 1: |
| | |
| | | right_context: Number of frames in right context AFTER subsampling. |
| | | display_partial_hypotheses: Whether to display partial hypotheses. |
| | | """ |
| | | assert check_argument_types() |
| | | |
| | | if batch_size > 1: |
| | | raise NotImplementedError("batch decoding is not implemented") |
| | |
| | | param_dict: dict = None, |
| | | **kwargs, |
| | | ): |
| | | assert check_argument_types() |
| | | if batch_size > 1: |
| | | raise NotImplementedError("batch decoding is not implemented") |
| | | if word_lm_train_config is not None: |
| | |
| | | return inference_mfcca(**kwargs) |
| | | elif mode == "rnnt": |
| | | return inference_transducer(**kwargs) |
| | | elif mode == "bat": |
| | | return inference_transducer(**kwargs) |
| | | elif mode == "sa_asr": |
| | | return inference_sa_asr(**kwargs) |
| | | else: |