python/FunASR-XL.git

			@@ -9,7 +9,6 @@
			import numpy as np

			import torch
			from typeguard import check_argument_types

			from funasr.layers.abs_normalize import AbsNormalize
			from funasr.models.ctc import CTC
			@@ -43,9 +42,7 @@
			frontend: Optional[AbsFrontend],
			specaug: Optional[AbsSpecAug],
			normalize: Optional[AbsNormalize],
			preencoder: Optional[AbsPreEncoder],
			encoder: AbsEncoder,
			postencoder: Optional[AbsPostEncoder],
			decoder: AbsDecoder,
			ctc: CTC,
			ctc_weight: float = 0.5,
			@@ -72,8 +69,9 @@
			crit_attn_weight: float = 0.0,
			crit_attn_smooth: float = 0.0,
			bias_encoder_dropout_rate: float = 0.0,
			preencoder: Optional[AbsPreEncoder] = None,
			postencoder: Optional[AbsPostEncoder] = None,
			):
			assert check_argument_types()
			assert 0.0 <= ctc_weight <= 1.0, ctc_weight
			assert 0.0 <= interctc_weight < 1.0, interctc_weight

			@@ -343,7 +341,7 @@
			input_mask_expand_dim, 0)
			return sematic_embeds * tgt_mask, decoder_out * tgt_mask

			def cal_decoder_with_predictor(self, encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens, hw_list=None):
			def cal_decoder_with_predictor(self, encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens, hw_list=None, clas_scale=1.0):
			if hw_list is None:
			hw_list = [torch.Tensor([1]).long().to(encoder_out.device)] # empty hotword list
			hw_list_pad = pad_list(hw_list, 0)
			@@ -352,6 +350,7 @@
			else:
			hw_embed = self.bias_embed(hw_list_pad)
			hw_embed, (h_n, _) = self.bias_encoder(hw_embed)
			hw_embed = h_n.repeat(encoder_out.shape[0], 1, 1)
			else:
			hw_lengths = [len(i) for i in hw_list]
			hw_list_pad = pad_list([torch.Tensor(i).long() for i in hw_list], 0).to(encoder_out.device)
			@@ -365,7 +364,7 @@
			hw_embed = h_n.repeat(encoder_out.shape[0], 1, 1)

			decoder_outs = self.decoder(
			encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens, contextual_info=hw_embed
			encoder_out, encoder_out_lens, sematic_embeds, ys_pad_lens, contextual_info=hw_embed, clas_scale=clas_scale
			)
			decoder_out = decoder_outs[0]
			decoder_out = torch.log_softmax(decoder_out, dim=-1)