zhifu gao
2024-01-22 1159adbca076fa1a33bf4292ec5043e536285c5c
funasr/models/paraformer/model.py
@@ -451,7 +451,7 @@
            self.nbest = kwargs.get("nbest", 1)
        
        meta_data = {}
        if isinstance(data_in, torch.Tensor): # fbank
        if isinstance(data_in, torch.Tensor) and kwargs.get("data_type", "sound") == "fbank": # fbank
            speech, speech_lengths = data_in, data_lengths
            if len(speech.shape) < 3:
                speech = speech[None, :, :]