| | |
| | | import torch |
| | | import torch.nn as nn |
| | | from torch.nn import functional as F |
| | | from typeguard import check_argument_types |
| | | import numpy as np |
| | | from funasr.modules.nets_utils import make_pad_mask |
| | | from funasr.modules.layer_norm import LayerNorm |
| | |
| | | |
| | | class ConvEncoder(AbsEncoder): |
| | | """ |
| | | author: Speech Lab, Alibaba Group, China |
| | | Author: Speech Lab of DAMO Academy, Alibaba Group |
| | | Convolution encoder in OpenNMT framework |
| | | """ |
| | | |
| | |
| | | tf2torch_tensor_name_prefix_torch: str = "speaker_encoder", |
| | | tf2torch_tensor_name_prefix_tf: str = "EAND/speaker_encoder", |
| | | ): |
| | | assert check_argument_types() |
| | | super().__init__() |
| | | self._output_size = num_units |
| | | |
| | |
| | | self.out_padding = nn.ConstantPad1d((left_padding, right_padding), 0.0) |
| | | self.conv_out = nn.Conv1d( |
| | | num_units, |
| | | num_units, |
| | | out_units, |
| | | kernel_size, |
| | | ) |
| | | |
| | | if self.out_norm: |
| | | self.after_norm = LayerNorm(num_units) |
| | | self.after_norm = LayerNorm(out_units) |
| | | |
| | | def output_size(self) -> int: |
| | | return self.num_units |