游雁
2023-01-31 51ea14f910d76c5c1a581098d3808a78fdb9fcd1
funasr/bin/asr_inference_paraformer.py
@@ -227,6 +227,8 @@
        pre_acoustic_embeds, pre_token_length, alphas, pre_peak_index = predictor_outs[0], predictor_outs[1], \
                                                                        predictor_outs[2], predictor_outs[3]
        pre_token_length = pre_token_length.round().long()
        if torch.max(pre_token_length) < 1:
            return []
        decoder_outs = self.asr_model.cal_decoder_with_predictor(enc, enc_len, pre_acoustic_embeds, pre_token_length)
        decoder_out, ys_pad_lens = decoder_outs[0], decoder_outs[1]
@@ -394,7 +396,7 @@
#         results = speech2text(**batch)
#         if len(results) < 1:
#             hyp = Hypothesis(score=0.0, scores={}, states={}, yseq=[])
#             results = [[" ", ["<space>"], [2], hyp, 10, 6]] * nbest
#             results = [[" ", ["sil"], [2], hyp, 10, 6]] * nbest
#         time_end = time.time()
#         forward_time = time_end - time_beg
#         lfr_factor = results[0][-1]
@@ -621,7 +623,7 @@
            results = speech2text(**batch)
            if len(results) < 1:
                hyp = Hypothesis(score=0.0, scores={}, states={}, yseq=[])
                results = [[" ", ["<space>"], [2], hyp, 10, 6]] * nbest
                results = [[" ", ["sil"], [2], hyp, 10, 6]] * nbest
            time_end = time.time()
            forward_time = time_end - time_beg
            lfr_factor = results[0][-1]