| | |
| | | """RWKV encoder definition for Transducer models.""" |
| | | |
| | | import math |
| | | from typing import Dict, List, Optional, Tuple |
| | | #!/usr/bin/env python3 |
| | | # -*- encoding: utf-8 -*- |
| | | # Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved. |
| | | # MIT License (https://opensource.org/licenses/MIT) |
| | | |
| | | import torch |
| | | from typing import Dict, List, Optional, Tuple |
| | | |
| | | from funasr.models.encoder.abs_encoder import AbsEncoder |
| | | from funasr.register import tables |
| | | from funasr.models.rwkv_bat.rwkv import RWKV |
| | | from funasr.models.transformer.layer_norm import LayerNorm |
| | | from funasr.models.rwkv_bat.rwkv_subsampling import RWKVConvInput |
| | | from funasr.models.transformer.utils.nets_utils import make_source_mask |
| | | from funasr.models.rwkv_bat.rwkv_subsampling import RWKVConvInput |
| | | |
| | | class RWKVEncoder(AbsEncoder): |
| | | |
| | | @tables.register("encoder_classes", "RWKVEncoder") |
| | | class RWKVEncoder(torch.nn.Module): |
| | | """RWKV encoder module. |
| | | |
| | | Based on https://arxiv.org/pdf/2305.13048.pdf. |
| | |
| | | att_dropout_rate: float = 0.0, |
| | | ffn_dropout_rate: float = 0.0, |
| | | dropout_rate: float = 0.0, |
| | | subsampling_factor: int =4, |
| | | subsampling_factor: int = 4, |
| | | time_reduction_factor: int = 1, |
| | | kernel: int = 3, |
| | | **kwargs, |
| | | ) -> None: |
| | | """Construct a RWKVEncoder object.""" |
| | | super().__init__() |
| | | |
| | | self.embed = RWKVConvInput( |
| | | input_size, |
| | | [output_size//4, output_size//2, output_size], |
| | | [output_size // 4, output_size // 2, output_size], |
| | | subsampling_factor, |
| | | conv_kernel_size=kernel, |
| | | output_size=output_size, |
| | |
| | | |
| | | linear_size = output_size * 4 if linear_size is None else linear_size |
| | | attention_size = output_size if attention_size is None else attention_size |
| | | |
| | | |
| | | self.rwkv_blocks = torch.nn.ModuleList( |
| | | [ |
| | | RWKV( |
| | |
| | | x, _ = block(x) |
| | | else: |
| | | x = self.rwkv_infer(x) |
| | | |
| | | |
| | | x = self.final_norm(x) |
| | | |
| | | if self.time_reduction_factor > 1: |
| | | x = x[:,::self.time_reduction_factor,:] |
| | | olens = torch.floor_divide(olens-1, self.time_reduction_factor) + 1 |
| | | x = x[:, :: self.time_reduction_factor, :] |
| | | olens = torch.floor_divide(olens - 1, self.time_reduction_factor) + 1 |
| | | |
| | | return x, olens, None |
| | | |
| | |
| | | |
| | | batch_size = xs_pad.shape[0] |
| | | |
| | | hidden_sizes = [ |
| | | self._output_size for i in range(5) |
| | | ] |
| | | hidden_sizes = [self._output_size for i in range(5)] |
| | | |
| | | state = [ |
| | | torch.zeros( |
| | |
| | | |
| | | xs_out = [] |
| | | for t in range(xs_pad.shape[1]): |
| | | x_t = xs_pad[:,t,:] |
| | | x_t = xs_pad[:, t, :] |
| | | for idx, block in enumerate(self.rwkv_blocks): |
| | | x_t, state = block(x_t, state=state) |
| | | xs_out.append(x_t) |