python/FunASR-XL.git

			@@ -456,6 +456,38 @@
			att_outs = self.forward_attention(v_h, scores, mask, mask_att_chunk_encoder)
			return att_outs + fsmn_memory

			def forward_chunk(self, x, cache=None, chunk_size=None, look_back=0):
			"""Compute scaled dot product attention.

			Args:
			query (torch.Tensor): Query tensor (#batch, time1, size).
			key (torch.Tensor): Key tensor (#batch, time2, size).
			value (torch.Tensor): Value tensor (#batch, time2, size).
			mask (torch.Tensor): Mask tensor (#batch, 1, time2) or
			(#batch, time1, time2).

			Returns:
			torch.Tensor: Output tensor (#batch, time1, d_model).

			"""
			q_h, k_h, v_h, v = self.forward_qkv(x)
			if chunk_size is not None and look_back > 0:
			if cache is not None:
			k_h = torch.cat((cache["k"], k_h), dim=2)
			v_h = torch.cat((cache["v"], v_h), dim=2)
			cache["k"] = k_h[:, :, -(look_back * chunk_size[1]):, :]
			cache["v"] = v_h[:, :, -(look_back * chunk_size[1]):, :]
			else:
			cache_tmp = {"k": k_h[:, :, -(look_back * chunk_size[1]):, :],
			"v": v_h[:, :, -(look_back * chunk_size[1]):, :]}
			cache = cache_tmp
			fsmn_memory = self.forward_fsmn(v, None)
			q_h = q_h * self.d_k ** (-0.5)
			scores = torch.matmul(q_h, k_h.transpose(-2, -1))
			att_outs = self.forward_attention(v_h, scores, None)
			return att_outs + fsmn_memory, cache


			class MultiHeadedAttentionSANMwithMask(MultiHeadedAttentionSANM):
			def __init__(self, args, *kwargs):
			super().__init__(args, *kwargs)