| | |
| | | import torch.nn.functional as F |
| | | from torch import Tensor |
| | | from torch import nn |
| | | |
| | | import whisper |
| | | # import whisper_timestamped as whisper |
| | | |
| | | from funasr.utils.load_utils import load_audio_text_image_video, extract_fbank |
| | | |
| | | from funasr.register import tables |
| | |
| | | |
| | | # decode the audio |
| | | options = whisper.DecodingOptions(**kwargs.get("DecodingOptions", {})) |
| | | result = whisper.decode(self.model, speech, options) |
| | | |
| | | result = whisper.decode(self.model, speech, language='english') |
| | | # result = whisper.transcribe(self.model, speech) |
| | | |
| | | results = [] |
| | | result_i = {"key": key[0], "text": result.text} |