python/FunASR-XL.git

			@@ -582,6 +582,21 @@
			else:
			speech2text = Speech2Text(**speech2text_kwargs)

			def _load_bytes(input):
			middle_data = np.frombuffer(input, dtype=np.int16)
			middle_data = np.asarray(middle_data)
			if middle_data.dtype.kind not in 'iu':
			raise TypeError("'middle_data' must be an array of integers")
			dtype = np.dtype('float32')
			if dtype.kind != 'f':
			raise TypeError("'dtype' must be a floating point type")

			i = np.iinfo(middle_data.dtype)
			abs_max = 2 ** (i.bits - 1)
			offset = i.min + abs_max
			array = np.frombuffer((middle_data.astype(dtype) - offset) / abs_max, dtype=np.float32)
			return array

			def _forward(
			data_path_and_name_and_type,
			raw_inputs: Union[np.ndarray, torch.Tensor] = None,
			@@ -592,6 +607,9 @@
			):

			# 3. Build data-iterator
			if data_path_and_name_and_type is not None and data_path_and_name_and_type[2] == "bytes":
			raw_inputs = _load_bytes(data_path_and_name_and_type[0])
			raw_inputs = torch.tensor(raw_inputs)
			if data_path_and_name_and_type is None and raw_inputs is not None:
			if isinstance(raw_inputs, np.ndarray):
			raw_inputs = torch.tensor(raw_inputs)