zhifu gao
2023-03-16 d783b24ba7d8a03dabfa2139fcbf40c216e0ea3d
funasr/bin/asr_inference_mfcca.py
@@ -55,7 +55,7 @@
    Examples:
        >>> import soundfile
        >>> speech2text = Speech2Text("asr_config.yml", "asr.pth")
        >>> speech2text = Speech2Text("asr_config.yml", "asr.pb")
        >>> audio, rate = soundfile.read("speech.wav")
        >>> speech2text(audio)
        [(text, token, token_int, hypothesis object), ...]
@@ -194,8 +194,8 @@
        # Input as audio signal
        if isinstance(speech, np.ndarray):
            speech = torch.tensor(speech)
        if(speech.dim()==3):
            speech = torch.squeeze(speech, 2)
        #speech = speech.unsqueeze(0).to(getattr(torch, self.dtype))
        speech = speech.to(getattr(torch, self.dtype))
        # lenghts: (1,)
@@ -534,6 +534,8 @@
            data_path_and_name_and_type,
            dtype=dtype,
            batch_size=batch_size,
            fs=fs,
            mc=True,
            key_file=key_file,
            num_workers=num_workers,
            preprocess_fn=ASRTask.build_preprocess_fn(speech2text.asr_train_args, False),