| | |
| | | from funasr import AutoModel |
| | | |
| | | model = AutoModel( |
| | | model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", |
| | | model="iic/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404", |
| | | ) |
| | | |
| | | res = model.export(type="torchscript", quantize=False) |
| | |
| | | self.embedding = model.bias_embed |
| | | model.bias_encoder.batch_first = False |
| | | self.bias_encoder = model.bias_encoder |
| | | |
| | | def export_dummy_inputs(self): |
| | | hotword = torch.tensor( |
| | | [ |
| | | [10, 11, 12, 13, 14, 10, 11, 12, 13, 14], |
| | | [100, 101, 0, 0, 0, 0, 0, 0, 0, 0], |
| | | [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| | | [10, 11, 12, 13, 14, 10, 11, 12, 13, 14], |
| | | [100, 101, 0, 0, 0, 0, 0, 0, 0, 0], |
| | | [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], |
| | | ], |
| | | dtype=torch.int32, |
| | | ) |
| | | # hotword_length = torch.tensor([10, 2, 1], dtype=torch.int32) |
| | | return (hotword) |
| | | |
| | | |
| | | def export_rebuild_model(model, **kwargs): |
| | |
| | | return q, k, v |
| | | |
| | | def forward_attention(self, value, scores, mask, ret_attn): |
| | | scores = scores + mask |
| | | scores = scores + mask.to(scores.device) |
| | | |
| | | self.attn = torch.softmax(scores, dim=-1) |
| | | context_layer = torch.matmul(self.attn, value) # (batch, head, time1, d_k) |
| | |
| | | import torch.nn.functional as F |
| | | from torch import Tensor |
| | | from torch import nn |
| | | |
| | | import whisper |
| | | # import whisper_timestamped as whisper |
| | | |
| | | from funasr.utils.load_utils import load_audio_text_image_video, extract_fbank |
| | | |
| | | from funasr.register import tables |
| | |
| | | |
| | | # decode the audio |
| | | options = whisper.DecodingOptions(**kwargs.get("DecodingOptions", {})) |
| | | result = whisper.decode(self.model, speech, options) |
| | | |
| | | result = whisper.decode(self.model, speech, language='english') |
| | | # result = whisper.transcribe(self.model, speech) |
| | | |
| | | import pdb; pdb.set_trace() |
| | | |
| | | results = [] |
| | | result_i = {"key": key[0], "text": result.text} |
| | | |
| | |
| | | |
| | | if device == 'cuda': |
| | | model = model.cuda() |
| | | dummy_input = tuple([i.cuda() for i in dummy_input]) |
| | | if isinstance(dummy_input, torch.Tensor): |
| | | dummy_input = dummy_input.cuda() |
| | | else: |
| | | dummy_input = tuple([i.cuda() for i in dummy_input]) |
| | | |
| | | # model_script = torch.jit.script(model) |
| | | model_script = torch.jit.trace(model, dummy_input) |