游雁
2023-11-21 c644ac8f58895b9e29e9cfca79465fd2c0efaa5a
funasr/bin/vad_infer.py
@@ -23,9 +23,9 @@
    """Speech2VadSegment class
    Examples:
        >>> import soundfile
        >>> import librosa
        >>> speech2segment = Speech2VadSegment("vad_config.yml", "vad.pt")
        >>> audio, rate = soundfile.read("speech.wav")
        >>> audio, rate = librosa.load("speech.wav")
        >>> speech2segment(audio)
        [[10, 230], [245, 450], ...]
@@ -118,9 +118,9 @@
    """Speech2VadSegmentOnline class
    Examples:
        >>> import soundfile
        >>> import librosa
        >>> speech2segment = Speech2VadSegmentOnline("vad_config.yml", "vad.pt")
        >>> audio, rate = soundfile.read("speech.wav")
        >>> audio, rate = librosa.load("speech.wav")
        >>> speech2segment(audio)
        [[10, 230], [245, 450], ...]
@@ -162,6 +162,8 @@
            feats = to_device(feats, device=self.device)
            feats_len = feats_len.int()
            waveforms = self.frontend.get_waveforms()
            if max_end_sil == 800 and self.vad_infer_args.vad_post_conf["max_end_silence_time"] != 800:
                max_end_sil = self.vad_infer_args.vad_post_conf["max_end_silence_time"]
            batch = {
                "feats": feats,