hnluo
2023-05-08 30e4f1fa711de86c369152215358d069d066122a
funasr/bin/asr_inference_paraformer_streaming.py
@@ -553,12 +553,12 @@
                asr_result = speech2text(cache, raw_inputs[:, sample_offset: sample_offset + stride_size], input_lens)
                if len(asr_result) != 0: 
                    final_result += " ".join(asr_result) + " "
            item = {'key': "utt", 'value': [final_result.strip()]}
            item = {'key': "utt", 'value': final_result.strip()}
        else:
            input_lens = torch.tensor([raw_inputs.shape[1]])
            cache["encoder"]["is_final"] = is_final
            asr_result = speech2text(cache, raw_inputs, input_lens)
            item = {'key': "utt", 'value': asr_result}
            item = {'key': "utt", 'value': " ".join(asr_result)}
        asr_result_list.append(item)
        if is_final: