funasr/utils/vad_utils.py
@@ -17,6 +17,7 @@ speech_lengths_pad = torch.Tensor(speech_lengths_list).int() return feats_pad, speech_lengths_pad def slice_padding_audio_samples(speech, speech_lengths, vad_segments): speech_list = [] speech_lengths_list = [] @@ -29,6 +30,7 @@ speech_lengths_list.append(speech_lengths_i) return speech_list, speech_lengths_list def merge_vad(vad_result, max_length=15000): new_result = [] @@ -51,4 +53,3 @@ bg = time new_result.append([bg, time_step[-1]]) return new_result