游雁
2023-01-31 51ea14f910d76c5c1a581098d3808a78fdb9fcd1
funasr/bin/asr_inference_paraformer_vad_punc.py
@@ -235,6 +235,8 @@
        predictor_outs = self.asr_model.calc_predictor(enc, enc_len)
        pre_acoustic_embeds, pre_token_length, alphas, pre_peak_index = predictor_outs[0], predictor_outs[1], predictor_outs[2], predictor_outs[3]
        if torch.max(pre_token_length) < 1:
            return []
        pre_token_length = pre_token_length.round().long()
        decoder_outs = self.asr_model.cal_decoder_with_predictor(enc, enc_len, pre_acoustic_embeds, pre_token_length)
        decoder_out, ys_pad_lens = decoder_outs[0], decoder_outs[1]
@@ -602,7 +604,7 @@
                    results = speech2text(**batch)
                    if len(results) < 1:
                        hyp = Hypothesis(score=0.0, scores={}, states={}, yseq=[])
                        results = [[" ", ["<space>"], [2], 0, 1, 6]] * nbest
                        results = [[" ", ["sil"], [2], 0, 1, 6]] * nbest
                    time_end = time.time()
                    forward_time = time_end - time_beg
                    lfr_factor = results[0][-1]