shixian.shi
2024-01-12 c3c78fc5e790d48b3a2f9da79199320c06108d38
funasr/models/fsmn_vad/model.py
@@ -555,7 +555,8 @@
            meta_data[
                "batch_data_time"] = speech_lengths.sum().item() * frontend.frame_shift * frontend.lfr_n / 1000
        speech.to(device=kwargs["device"]), speech_lengths.to(device=kwargs["device"])
        speech = speech.to(device=kwargs["device"])
        speech_lengths = speech_lengths.to(device=kwargs["device"])
        # b. Forward Encoder streaming
        t_offset = 0