shixian.shi
2024-01-17 7458e39ff0756d0bae38b139e0e534e61e1fa0cf
bug fix
2个文件已修改
16 ■■■■■ 已修改文件
examples/industrial_data_pretraining/paraformer/demo.py 2 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/models/bicif_paraformer/model.py 14 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
examples/industrial_data_pretraining/paraformer/demo.py
@@ -11,6 +11,7 @@
print(res)
''' can not use currently
from funasr import AutoFrontend
frontend = AutoFrontend(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.2")
@@ -20,3 +21,4 @@
for batch_idx, fbank_dict in enumerate(fbanks):
    res = model.generate(**fbank_dict)
    print(res)
'''
funasr/models/bicif_paraformer/model.py
@@ -235,13 +235,13 @@
            self.nbest = kwargs.get("nbest", 1)
        
        meta_data = {}
        if isinstance(data_in, torch.Tensor):  # fbank
            speech, speech_lengths = data_in, data_lengths
            if len(speech.shape) < 3:
                speech = speech[None, :, :]
            if speech_lengths is None:
                speech_lengths = speech.shape[1]
        else:
        # if isinstance(data_in, torch.Tensor):  # fbank
        #     speech, speech_lengths = data_in, data_lengths
        #     if len(speech.shape) < 3:
        #         speech = speech[None, :, :]
        #     if speech_lengths is None:
        #         speech_lengths = speech.shape[1]
        # else:
            # extract fbank feats
            time1 = time.perf_counter()
            audio_sample_list = load_audio_text_image_video(data_in, fs=frontend.fs, audio_fs=kwargs.get("fs", 16000))