雾聪
2024-03-29 9ba0dbd98bf69c830dfcfde8f109a400cb65e4e5
funasr/models/contextual_paraformer/model.py
@@ -107,7 +107,7 @@
        hotword_pad = kwargs.get("hotword_pad")
        hotword_lengths = kwargs.get("hotword_lengths")
        dha_pad = kwargs.get("dha_pad")
        # dha_pad = kwargs.get("dha_pad")
        # 1. Encoder
        encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)
@@ -189,13 +189,10 @@
        # 0. sampler
        decoder_out_1st = None
        if self.sampling_ratio > 0.0:
            if self.step_cur < 2:
                logging.info("enable sampler in paraformer, sampling_ratio: {}".format(self.sampling_ratio))
            sematic_embeds, decoder_out_1st = self.sampler(encoder_out, encoder_out_lens, ys_pad, ys_pad_lens,
                                                           pre_acoustic_embeds, contextual_info)
        else:
            if self.step_cur < 2:
                logging.info("disable sampler in paraformer, sampling_ratio: {}".format(self.sampling_ratio))
            sematic_embeds = pre_acoustic_embeds
        # 1. Forward decoder