游雁
2024-04-15 90bc3ad02eee3745188be3960036ae3e9e746049
funasr/models/paraformer/model.py
@@ -231,6 +231,7 @@
        stats["loss_pre"] = loss_pre.detach().cpu() if loss_pre is not None else None
        
        stats["loss"] = torch.clone(loss.detach())
        stats["batch_size"] = batch_size
        
        # force_gatherable: to-device and to-tensor if scalar for DataParallel
        if self.length_normalized_loss:
@@ -474,6 +475,8 @@
        speech = speech.to(device=kwargs["device"])
        speech_lengths = speech_lengths.to(device=kwargs["device"])
        # Encoder
        if kwargs.get("fp16", False):
            speech = speech.half()
        encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)
        if isinstance(encoder_out, tuple):
            encoder_out = encoder_out[0]