| | |
| | | self.nbest = kwargs.get("nbest", 1) |
| | | |
| | | meta_data = {} |
| | | if isinstance(data_in, torch.Tensor): # fbank |
| | | speech, speech_lengths = data_in, data_lengths |
| | | if len(speech.shape) < 3: |
| | | speech = speech[None, :, :] |
| | | if speech_lengths is None: |
| | | speech_lengths = speech.shape[1] |
| | | else: |
| | | # if isinstance(data_in, torch.Tensor): # fbank |
| | | # speech, speech_lengths = data_in, data_lengths |
| | | # if len(speech.shape) < 3: |
| | | # speech = speech[None, :, :] |
| | | # if speech_lengths is None: |
| | | # speech_lengths = speech.shape[1] |
| | | # else: |
| | | # extract fbank feats |
| | | time1 = time.perf_counter() |
| | | audio_sample_list = load_audio_text_image_video(data_in, fs=frontend.fs, audio_fs=kwargs.get("fs", 16000)) |