| | |
| | | hub = audio_encoder_conf.get("hub", None) |
| | | if hub == "ms": |
| | | from funasr import AutoModel |
| | | model = AutoModel(model=audio_encoder, model_revision="v2.0.4") |
| | | model = AutoModel(model=audio_encoder, model_revision="master") |
| | | # frontend = model.kwargs.get("frontend") |
| | | audio_encoder_output_size = model.model.encoder_output_size |
| | | |
| | |
| | | ): |
| | | speech = speech.permute(0, 2, 1) |
| | | res = self.audio_encoder(speech) |
| | | if len(res) > 1: |
| | | if isinstance(res, (list, tuple)): |
| | | encoder_out, encoder_out_lens = res[0], res[1] |
| | | else: |
| | | encoder_out, encoder_out_lens = res, speech_lengths |