| | |
| | | import numpy as np |
| | | import torch |
| | | import torchaudio |
| | | import soundfile |
| | | import yaml |
| | | from typeguard import check_argument_types |
| | | |
| | |
| | | export_mode = param_dict.get("export_mode", False) |
| | | else: |
| | | hotword_list_or_file = None |
| | | clas_scale = param_dict.get('clas_scale', 1.0) |
| | | |
| | | if kwargs.get("device", None) == "cpu": |
| | | ngpu = 0 |
| | |
| | | penalty=penalty, |
| | | nbest=nbest, |
| | | hotword_list_or_file=hotword_list_or_file, |
| | | clas_scale=clas_scale, |
| | | ) |
| | | |
| | | speech2text = Speech2TextParaformer(**speech2text_kwargs) |
| | |
| | | raw_inputs = _load_bytes(data_path_and_name_and_type[0]) |
| | | raw_inputs = torch.tensor(raw_inputs) |
| | | if data_path_and_name_and_type is not None and data_path_and_name_and_type[2] == "sound": |
| | | raw_inputs = torchaudio.load(data_path_and_name_and_type[0])[0][0] |
| | | try: |
| | | raw_inputs = torchaudio.load(data_path_and_name_and_type[0])[0][0] |
| | | except: |
| | | raw_inputs = soundfile.read(data_path_and_name_and_type[0], dtype='float32')[0] |
| | | if raw_inputs.ndim == 2: |
| | | raw_inputs = raw_inputs[:, 0] |
| | | raw_inputs = torch.tensor(raw_inputs) |
| | | if data_path_and_name_and_type is None and raw_inputs is not None: |
| | | if isinstance(raw_inputs, np.ndarray): |
| | | raw_inputs = torch.tensor(raw_inputs) |