| | |
| | | deep_update(model_conf, kwargs.get("model_conf", {})) |
| | | deep_update(model_conf, kwargs) |
| | | model = model_class(**model_conf, vocab_size=vocab_size) |
| | | model.to(device) |
| | | |
| | | # init_param |
| | | init_param = kwargs.get("init_param", None) |
| | |
| | | model.to(torch.float16) |
| | | elif kwargs.get("bf16", False): |
| | | model.to(torch.bfloat16) |
| | | model.to(device) |
| | | return model, kwargs |
| | | |
| | | def __call__(self, *args, **cfg): |
| | |
| | | # fp16 |
| | | if kwargs.get("fp16", False): |
| | | speech = speech.to(torch.float16) |
| | | encoder_out_lens = encoder_out_lens.to(torch.float16) |
| | | elif kwargs.get("bf16", False): |
| | | speech = speech.to(torch.bfloat16) |
| | | encoder_out_lens = encoder_out_lens.to(torch.bfloat16) |
| | | encoder_out, encoder_out_lens = self.audio_encoder(speech.permute(0, 2, 1), speech_lengths) |
| | | |
| | | # audio_adaptor |