python/FunASR-XL.git

			@@ -13,7 +13,6 @@

			import numpy as np
			import torch
			from typeguard import check_argument_types

			from funasr.build_utils.build_model_from_file import build_model_from_file
			from funasr.models.frontend.wav_frontend import WavFrontend, WavFrontendOnline
			@@ -42,7 +41,6 @@
			dtype: str = "float32",
			**kwargs,
			):
			assert check_argument_types()

			# 1. Build vad model
			vad_model, vad_infer_args = build_model_from_file(
			@@ -76,7 +74,6 @@
			text, token, token_int, hyp

			"""
			assert check_argument_types()

			# Input as audio signal
			if isinstance(speech, np.ndarray):
			@@ -149,7 +146,6 @@
			text, token, token_int, hyp

			"""
			assert check_argument_types()

			# Input as audio signal
			if isinstance(speech, np.ndarray):
			@@ -166,6 +162,8 @@
			feats = to_device(feats, device=self.device)
			feats_len = feats_len.int()
			waveforms = self.frontend.get_waveforms()
			if max_end_sil == 800 and self.vad_infer_args.vad_post_conf["max_end_silence_time"] != 800:
			max_end_sil = self.vad_infer_args.vad_post_conf["max_end_silence_time"]

			batch = {
			"feats": feats,