| | |
| | | from funasr.models.transformer.attention import MultiHeadedAttention |
| | | from funasr.models.transformer.positionwise_feed_forward import PositionwiseFeedForward |
| | | |
| | | self.blocks = None |
| | | if kwargs.get("n_layer", 2) > 0: |
| | | self.blocks = nn.ModuleList( |
| | | [ |
| | | EncoderLayer( |
| | |
| | | olens = None |
| | | olens = (ilens - 1) // self.k + 1 |
| | | masks = (~make_pad_mask(olens)[:, None, :]).to(x.device) |
| | | if self.blocks is not None: |
| | | for layer, block in enumerate(self.blocks): |
| | | x, masks = block(x, masks) |
| | | return x, olens |
| | |
| | | |
| | | batch_size, token_num, dims = inputs_embeds.shape |
| | | fbank_mask[fbank_mask < 0] = 0 |
| | | fbank_fake_lens = fbank_mask.sum(-1) |
| | | fbank_fake_lens = fbank_mask.sum(-1).to(torch.int32) |
| | | # _, l, _ = encoder_out.shape |
| | | for batch_idx in range(batch_size): |
| | | |