语帆
2024-02-28 e2425cc0675cc6fd7685067a27eabd1d32ca7fc9
test
2个文件已修改
13 ■■■■■ 已修改文件
funasr/models/conformer/encoder.py 6 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/models/lcbnet/model.py 7 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/models/conformer/encoder.py
@@ -573,7 +573,7 @@
            xs_pad, masks = self.embed(xs_pad, masks)
        else:
            xs_pad = self.embed(xs_pad)
        pdb.set_trace()
        intermediate_outs = []
        if len(self.interctc_layer_idx) == 0:
            xs_pad, masks = self.encoders(xs_pad, masks)
@@ -601,12 +601,12 @@
                            xs_pad = (x, pos_emb)
                        else:
                            xs_pad = xs_pad + self.conditioning_layer(ctc_out)
        pdb.set_trace()
        if isinstance(xs_pad, tuple):
            xs_pad = xs_pad[0]
        if self.normalize_before:
            xs_pad = self.after_norm(xs_pad)
        pdb.set_trace()
        olens = masks.squeeze(1).sum(1)
        if len(intermediate_outs) > 0:
            return (xs_pad, intermediate_outs), olens, None
funasr/models/lcbnet/model.py
@@ -296,7 +296,6 @@
        
        if intermediate_outs is not None:
            return (encoder_out, intermediate_outs), encoder_out_lens
        pdb.set_trace()
        return encoder_out, encoder_out_lens
    
    def _calc_att_loss(
@@ -444,7 +443,11 @@
        encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)
        if isinstance(encoder_out, tuple):
            encoder_out = encoder_out[0]
        pdb.set_trace()
        ocr = ocr_sample_list[0]
        ocr_lengths = ocr.new_full([1], dtype=torch.long, fill_value=ocr.size(1))
        ocr, ocr_lens, _ = self.text_encoder(ocr, ocr_lengths)
        pdb.set_trace()
        # c. Passed the encoder result and the beam search
        nbest_hyps = self.beam_search(
            x=encoder_out[0], maxlenratio=kwargs.get("maxlenratio", 0.0), minlenratio=kwargs.get("minlenratio", 0.0)