游雁
2023-09-13 33d3d2084403fd34b79c835d2f2fe04f6cd8f738
funasr/bin/vad_infer.py
@@ -13,7 +13,6 @@
import numpy as np
import torch
from typeguard import check_argument_types
from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline
@@ -42,7 +41,6 @@
            dtype: str = "float32",
            **kwargs,
    ):
        assert check_argument_types()
        # 1. Build vad model
        vad_model, vad_infer_args = build_model_from_file(
@@ -76,7 +74,6 @@
            text, token, token_int, hyp
        """
        assert check_argument_types()
        # Input as audio signal
        if isinstance(speech, np.ndarray):
@@ -149,7 +146,6 @@
            text, token, token_int, hyp
        """
        assert check_argument_types()
        # Input as audio signal
        if isinstance(speech, np.ndarray):
@@ -166,6 +162,8 @@
            feats = to_device(feats, device=self.device)
            feats_len = feats_len.int()
            waveforms = self.frontend.get_waveforms()
            if max_end_sil == 800 and self.vad_infer_args.vad_post_conf["max_end_silence_time"] != 800:
                max_end_sil = self.vad_infer_args.vad_post_conf["max_end_silence_time"]
            batch = {
                "feats": feats,