funasr/bin/asr_inference_mfcca.py
@@ -194,8 +194,8 @@ # Input as audio signal if isinstance(speech, np.ndarray): speech = torch.tensor(speech) if(speech.dim()==3): speech = torch.squeeze(speech, 2) #speech = speech.unsqueeze(0).to(getattr(torch, self.dtype)) speech = speech.to(getattr(torch, self.dtype)) # lenghts: (1,) @@ -534,6 +534,8 @@ data_path_and_name_and_type, dtype=dtype, batch_size=batch_size, fs=fs, mc=True, key_file=key_file, num_workers=num_workers, preprocess_fn=ASRTask.build_preprocess_fn(speech2text.asr_train_args, False),