zhifu gao
2024-04-16 a65016e23e6c829d61c63a68e27b24abf86e926a
funasr/models/paraformer/model.py
@@ -475,6 +475,8 @@
        speech = speech.to(device=kwargs["device"])
        speech_lengths = speech_lengths.to(device=kwargs["device"])
        # Encoder
        if kwargs.get("fp16", False):
            speech = speech.half()
        encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)
        if isinstance(encoder_out, tuple):
            encoder_out = encoder_out[0]