| | |
| | | """Speech2VadSegment class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2segment = Speech2VadSegment("vad_config.yml", "vad.pt") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2segment(audio) |
| | | [[10, 230], [245, 450], ...] |
| | | |
| | |
| | | """Speech2VadSegmentOnline class |
| | | |
| | | Examples: |
| | | >>> import soundfile |
| | | >>> import librosa |
| | | >>> speech2segment = Speech2VadSegmentOnline("vad_config.yml", "vad.pt") |
| | | >>> audio, rate = soundfile.read("speech.wav") |
| | | >>> audio, rate = librosa.load("speech.wav") |
| | | >>> speech2segment(audio) |
| | | [[10, 230], [245, 450], ...] |
| | | |
| | |
| | | feats = to_device(feats, device=self.device) |
| | | feats_len = feats_len.int() |
| | | waveforms = self.frontend.get_waveforms() |
| | | if max_end_sil == 800 and self.vad_infer_args.vad_post_conf["max_end_silence_time"] != 800: |
| | | max_end_sil = self.vad_infer_args.vad_post_conf["max_end_silence_time"] |
| | | |
| | | batch = { |
| | | "feats": feats, |