| | |
| | | |
| | | """Repeat the same layer definition.""" |
| | | |
| | | from typing import Dict, List, Optional |
| | | from funasr.modules.layer_norm import LayerNorm |
| | | import torch |
| | | |
| | | |
| | | class MultiSequential(torch.nn.Sequential): |
| | | """Multi-input multi-output torch.nn.Sequential.""" |
| | | |
| | | def __init__(self, *args, layer_drop_rate=0.0): |
| | | """Initialize MultiSequential with layer_drop. |
| | | |
| | | Args: |
| | | layer_drop_rate (float): Probability of dropping out each fn (layer). |
| | | |
| | | """ |
| | | super(MultiSequential, self).__init__(*args) |
| | | self.layer_drop_rate = layer_drop_rate |
| | | |
| | | def forward(self, *args): |
| | | """Repeat.""" |
| | | for m in self: |
| | | args = m(*args) |
| | | _probs = torch.empty(len(self)).uniform_() |
| | | for idx, m in enumerate(self): |
| | | if not self.training or (_probs[idx] >= self.layer_drop_rate): |
| | | args = m(*args) |
| | | return args |
| | | |
| | | |
| | | def repeat(N, fn): |
| | | def repeat(N, fn, layer_drop_rate=0.0): |
| | | """Repeat module N times. |
| | | |
| | | Args: |
| | | N (int): Number of repeat time. |
| | | fn (Callable): Function to generate module. |
| | | layer_drop_rate (float): Probability of dropping out each fn (layer). |
| | | |
| | | Returns: |
| | | MultiSequential: Repeated model instance. |
| | | |
| | | """ |
| | | return MultiSequential(*[fn(n) for n in range(N)]) |
| | | return MultiSequential(*[fn(n) for n in range(N)], layer_drop_rate=layer_drop_rate) |
| | | |
| | | |
| | | class MultiBlocks(torch.nn.Module): |
| | | """MultiBlocks definition. |
| | | Args: |
| | | block_list: Individual blocks of the encoder architecture. |
| | | output_size: Architecture output size. |
| | | norm_class: Normalization module class. |
| | | norm_args: Normalization module arguments. |
| | | """ |
| | | |
| | | def __init__( |
| | | self, |
| | | block_list: List[torch.nn.Module], |
| | | output_size: int, |
| | | norm_class: torch.nn.Module = LayerNorm, |
| | | ) -> None: |
| | | """Construct a MultiBlocks object.""" |
| | | super().__init__() |
| | | |
| | | self.blocks = torch.nn.ModuleList(block_list) |
| | | self.norm_blocks = norm_class(output_size) |
| | | |
| | | self.num_blocks = len(block_list) |
| | | |
| | | def reset_streaming_cache(self, left_context: int, device: torch.device) -> None: |
| | | """Initialize/Reset encoder streaming cache. |
| | | Args: |
| | | left_context: Number of left frames during chunk-by-chunk inference. |
| | | device: Device to use for cache tensor. |
| | | """ |
| | | for idx in range(self.num_blocks): |
| | | self.blocks[idx].reset_streaming_cache(left_context, device) |
| | | |
| | | def forward( |
| | | self, |
| | | x: torch.Tensor, |
| | | pos_enc: torch.Tensor, |
| | | mask: torch.Tensor, |
| | | chunk_mask: Optional[torch.Tensor] = None, |
| | | ) -> torch.Tensor: |
| | | """Forward each block of the encoder architecture. |
| | | Args: |
| | | x: MultiBlocks input sequences. (B, T, D_block_1) |
| | | pos_enc: Positional embedding sequences. |
| | | mask: Source mask. (B, T) |
| | | chunk_mask: Chunk mask. (T_2, T_2) |
| | | Returns: |
| | | x: Output sequences. (B, T, D_block_N) |
| | | """ |
| | | for block_index, block in enumerate(self.blocks): |
| | | x, mask, pos_enc = block(x, pos_enc, mask, chunk_mask=chunk_mask) |
| | | |
| | | x = self.norm_blocks(x) |
| | | |
| | | return x |
| | | |
| | | def chunk_forward( |
| | | self, |
| | | x: torch.Tensor, |
| | | pos_enc: torch.Tensor, |
| | | mask: torch.Tensor, |
| | | chunk_size: int = 0, |
| | | left_context: int = 0, |
| | | right_context: int = 0, |
| | | ) -> torch.Tensor: |
| | | """Forward each block of the encoder architecture. |
| | | Args: |
| | | x: MultiBlocks input sequences. (B, T, D_block_1) |
| | | pos_enc: Positional embedding sequences. (B, 2 * (T - 1), D_att) |
| | | mask: Source mask. (B, T_2) |
| | | left_context: Number of frames in left context. |
| | | right_context: Number of frames in right context. |
| | | Returns: |
| | | x: MultiBlocks output sequences. (B, T, D_block_N) |
| | | """ |
| | | for block_idx, block in enumerate(self.blocks): |
| | | x, pos_enc = block.chunk_forward( |
| | | x, |
| | | pos_enc, |
| | | mask, |
| | | chunk_size=chunk_size, |
| | | left_context=left_context, |
| | | right_context=right_context, |
| | | ) |
| | | |
| | | x = self.norm_blocks(x) |
| | | |
| | | return x |