python/FunASR-XL.git

			@@ -194,8 +194,8 @@
			# Input as audio signal
			if isinstance(speech, np.ndarray):
			speech = torch.tensor(speech)


			if(speech.dim()==3):
			speech = torch.squeeze(speech, 2)
			#speech = speech.unsqueeze(0).to(getattr(torch, self.dtype))
			speech = speech.to(getattr(torch, self.dtype))
			# lenghts: (1,)
			@@ -534,6 +534,8 @@
			data_path_and_name_and_type,
			dtype=dtype,
			batch_size=batch_size,
			fs=fs,
			mc=True,
			key_file=key_file,
			num_workers=num_workers,
			preprocess_fn=ASRTask.build_preprocess_fn(speech2text.asr_train_args, False),