From 9fa2b2128d3935b2edff2a2a3f1b8fd430a7e272 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期四, 30 十一月 2023 11:15:47 +0800
Subject: [PATCH] rm log.h for wins-websocket

---
 funasr/bin/vad_infer.py |   14 ++++++--------
 1 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/funasr/bin/vad_infer.py b/funasr/bin/vad_infer.py
index f888bb4..5763873 100644
--- a/funasr/bin/vad_infer.py
+++ b/funasr/bin/vad_infer.py
@@ -13,7 +13,6 @@
 
 import numpy as np
 import torch
-from typeguard import check_argument_types
 
 from funasr.build_utils.build_model_from_file import build_model_from_file
 from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline
@@ -24,9 +23,9 @@
     """Speech2VadSegment class
 
     Examples:
-        >>> import soundfile
+        >>> import librosa
         >>> speech2segment = Speech2VadSegment("vad_config.yml", "vad.pt")
-        >>> audio, rate = soundfile.read("speech.wav")
+        >>> audio, rate = librosa.load("speech.wav")
         >>> speech2segment(audio)
         [[10, 230], [245, 450], ...]
 
@@ -42,7 +41,6 @@
             dtype: str = "float32",
             **kwargs,
     ):
-        assert check_argument_types()
 
         # 1. Build vad model
         vad_model, vad_infer_args = build_model_from_file(
@@ -76,7 +74,6 @@
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -121,9 +118,9 @@
     """Speech2VadSegmentOnline class
 
     Examples:
-        >>> import soundfile
+        >>> import librosa
         >>> speech2segment = Speech2VadSegmentOnline("vad_config.yml", "vad.pt")
-        >>> audio, rate = soundfile.read("speech.wav")
+        >>> audio, rate = librosa.load("speech.wav")
         >>> speech2segment(audio)
         [[10, 230], [245, 450], ...]
 
@@ -149,7 +146,6 @@
             text, token, token_int, hyp
 
         """
-        assert check_argument_types()
 
         # Input as audio signal
         if isinstance(speech, np.ndarray):
@@ -166,6 +162,8 @@
             feats = to_device(feats, device=self.device)
             feats_len = feats_len.int()
             waveforms = self.frontend.get_waveforms()
+            if max_end_sil == 800 and self.vad_infer_args.vad_post_conf["max_end_silence_time"] != 800:
+                max_end_sil = self.vad_infer_args.vad_post_conf["max_end_silence_time"]
 
             batch = {
                 "feats": feats,

--
Gitblit v1.9.1