shixian.shi
2024-01-12 c3c78fc5e790d48b3a2f9da79199320c06108d38
funasr/models/transformer/model.py
@@ -400,7 +400,8 @@
      meta_data["extract_feat"] = f"{time3 - time2:0.3f}"
      meta_data["batch_data_time"] = speech_lengths.sum().item() * self.frontend.frame_shift * self.frontend.lfr_n / 1000
      
      speech.to(device=kwargs["device"]), speech_lengths.to(device=kwargs["device"])
        speech = speech.to(device=kwargs["device"])
        speech_lengths = speech_lengths.to(device=kwargs["device"])
      # Encoder
      encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)