| | |
| | | tp_model, tp_train_args = ASRTask.build_model_from_file( |
| | | timestamp_infer_config, timestamp_model_file, device |
| | | ) |
| | | if 'cuda' in device: |
| | | tp_model = tp_model.cuda() |
| | | |
| | | frontend = None |
| | | if tp_train_args.frontend is not None: |
| | | frontend = WavFrontend(cmvn_file=timestamp_cmvn_file, **tp_train_args.frontend_conf) |
| | |
| | | # Input as audio signal |
| | | if isinstance(speech, np.ndarray): |
| | | speech = torch.tensor(speech) |
| | | |
| | | if self.frontend is not None: |
| | | feats, feats_len = self.frontend.forward(speech, speech_lengths) |
| | | feats = to_device(feats, device=self.device) |
| | | feats_len = feats_len.int() |
| | | self.tp_model.frontend = None |
| | | else: |
| | | feats = speech |
| | | feats_len = speech_lengths |
| | |
| | | device = "cuda" |
| | | else: |
| | | device = "cpu" |
| | | |
| | | # 1. Set random-seed |
| | | set_all_random_seed(seed) |
| | | |