游雁
2023-05-25 b18f7d121f2f17df8bf2d0c2bbb223bc5ddbcc0f
funasr/models/encoder/conformer_encoder.py
@@ -15,13 +15,13 @@
from typeguard import check_argument_types
from funasr.models.ctc import CTC
from funasr.models.encoder.abs_encoder import AbsEncoder
from funasr.modules.attention import (
    MultiHeadedAttention,  # noqa: H301
    RelPositionMultiHeadedAttention,  # noqa: H301
    RelPositionMultiHeadedAttentionChunk,
    LegacyRelPositionMultiHeadedAttention,  # noqa: H301
)
from funasr.models.encoder.abs_encoder import AbsEncoder
from funasr.modules.embedding import (
    PositionalEncoding,  # noqa: H301
    ScaledPositionalEncoding,  # noqa: H301
@@ -30,7 +30,6 @@
    StreamingRelPositionalEncoding,
)
from funasr.modules.layer_norm import LayerNorm
from funasr.modules.normalization import get_normalization
from funasr.modules.multi_layer_conv import Conv1dLinear
from funasr.modules.multi_layer_conv import MultiLayeredConv1d
from funasr.modules.nets_utils import get_activation
@@ -308,7 +307,7 @@
        feed_forward: torch.nn.Module,
        feed_forward_macaron: torch.nn.Module,
        conv_mod: torch.nn.Module,
        norm_class: torch.nn.Module = torch.nn.LayerNorm,
        norm_class: torch.nn.Module = LayerNorm,
        norm_args: Dict = {},
        dropout_rate: float = 0.0,
    ) -> None:
@@ -895,7 +894,7 @@
        return x, cache
class ConformerChunkEncoder(torch.nn.Module):
class ConformerChunkEncoder(AbsEncoder):
    """Encoder module definition.
    Args:
        input_size: Input size.
@@ -940,7 +939,6 @@
        default_chunk_size: int = 16,
        jitter_range: int = 4,
        subsampling_factor: int = 1,
        **activation_parameters,
    ) -> None:
        """Construct an Encoder object."""
        super().__init__()
@@ -961,7 +959,7 @@
        )
        activation = get_activation(
            activation_type, **activation_parameters
            activation_type
       )        
        pos_wise_args = (
@@ -991,9 +989,6 @@
            simplified_att_score,
        )
        norm_class, norm_args = get_normalization(
            norm_type,
        )
        fn_modules = []
        for _ in range(num_blocks):
@@ -1003,8 +998,6 @@
                PositionwiseFeedForward(*pos_wise_args),
                PositionwiseFeedForward(*pos_wise_args),
                CausalConvolution(*conv_mod_args),
                norm_class=norm_class,
                norm_args=norm_args,
                dropout_rate=dropout_rate,
            )
            fn_modules.append(module)        
@@ -1012,11 +1005,9 @@
        self.encoders = MultiBlocks(
            [fn() for fn in fn_modules],
            output_size,
            norm_class=norm_class,
            norm_args=norm_args,
        )
        self.output_size = output_size
        self._output_size = output_size
        self.dynamic_chunk_training = dynamic_chunk_training
        self.short_chunk_threshold = short_chunk_threshold
@@ -1028,6 +1019,9 @@
        self.jitter_range = jitter_range
        self.time_reduction_factor = time_reduction_factor
    def output_size(self) -> int:
        return self._output_size
    def get_encoder_input_raw_size(self, size: int, hop_length: int) -> int:
        """Return the corresponding number of sample for a given chunk size, in frames.
@@ -1084,7 +1078,7 @@
                limit_size,
            )
        mask = make_source_mask(x_len)
        mask = make_source_mask(x_len).to(x.device)
        if self.unified_model_training:
            chunk_size = self.default_chunk_size + torch.randint(-self.jitter_range, self.jitter_range+1, (1,)).item()
@@ -1151,7 +1145,7 @@
            x = x[:,::self.time_reduction_factor,:]
            olens = torch.floor_divide(olens-1, self.time_reduction_factor) + 1
        return x, olens
        return x, olens, None
    def simu_chunk_forward(
        self,