shixian.shi
2023-06-27 b0f4910de6dc91c13828026fb5bdd4f15d8636f3
funasr/models/decoder/contextual_decoder.py
@@ -74,7 +74,7 @@
        return x, tgt_mask, x_self_attn, x_src_attn
class ContexutalBiasDecoder(nn.Module):
class ContextualBiasDecoder(nn.Module):
    def __init__(
        self,
        size,
@@ -83,7 +83,7 @@
        normalize_before=True,
    ):
        """Construct an DecoderLayer object."""
        super(ContexutalBiasDecoder, self).__init__()
        super(ContextualBiasDecoder, self).__init__()
        self.size = size
        self.src_attn = src_attn
        if src_attn is not None:
@@ -102,7 +102,7 @@
class ContextualParaformerDecoder(ParaformerSANMDecoder):
    """
    author: Speech Lab, Alibaba Group, China
    Author: Speech Lab of DAMO Academy, Alibaba Group
    Paraformer: Fast and Accurate Parallel Transformer for Non-autoregressive End-to-End Speech Recognition
    https://arxiv.org/abs/2006.01713
    """
@@ -186,7 +186,7 @@
            ),
        )
        self.dropout = nn.Dropout(dropout_rate)
        self.bias_decoder = ContexutalBiasDecoder(
        self.bias_decoder = ContextualBiasDecoder(
            size=attention_dim,
            src_attn=MultiHeadedAttentionCrossAtt(
                attention_heads, attention_dim, src_attention_dropout_rate
@@ -246,6 +246,7 @@
        ys_in_pad: torch.Tensor,
        ys_in_lens: torch.Tensor,
        contextual_info: torch.Tensor,
        clas_scale: float = 1.0,
        return_hidden: bool = False,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """Forward decoder.
@@ -285,7 +286,7 @@
        cx, tgt_mask, _, _, _ = self.bias_decoder(x_self_attn, tgt_mask, contextual_info, memory_mask=contextual_mask)
        if self.bias_output is not None:
            x = torch.cat([x_src_attn, cx], dim=2)
            x = torch.cat([x_src_attn, cx*clas_scale], dim=2)
            x = self.bias_output(x.transpose(1, 2)).transpose(1, 2)  # 2D -> D
            x = x_self_attn + self.dropout(x)