From 8706e767affc6bdc8cb7a67ca3a20a62779ff048 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期三, 17 五月 2023 15:45:46 +0800
Subject: [PATCH] Merge branch 'main' of https://github.com/alibaba-damo-academy/FunASR into main

---
 funasr/models/encoder/conformer_encoder.py |   22 ++++++++--------------
 1 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/funasr/models/encoder/conformer_encoder.py b/funasr/models/encoder/conformer_encoder.py
index c837cf5..434f2a4 100644
--- a/funasr/models/encoder/conformer_encoder.py
+++ b/funasr/models/encoder/conformer_encoder.py
@@ -30,7 +30,6 @@
     StreamingRelPositionalEncoding,
 )
 from funasr.modules.layer_norm import LayerNorm
-from funasr.modules.normalization import get_normalization
 from funasr.modules.multi_layer_conv import Conv1dLinear
 from funasr.modules.multi_layer_conv import MultiLayeredConv1d
 from funasr.modules.nets_utils import get_activation
@@ -308,7 +307,7 @@
         feed_forward: torch.nn.Module,
         feed_forward_macaron: torch.nn.Module,
         conv_mod: torch.nn.Module,
-        norm_class: torch.nn.Module = torch.nn.LayerNorm,
+        norm_class: torch.nn.Module = LayerNorm,
         norm_args: Dict = {},
         dropout_rate: float = 0.0,
     ) -> None:
@@ -895,7 +894,7 @@
 
         return x, cache
 
-class ConformerChunkEncoder(torch.nn.Module):
+class ConformerChunkEncoder(AbsEncoder):
     """Encoder module definition.
     Args:
         input_size: Input size.
@@ -940,7 +939,6 @@
         default_chunk_size: int = 16,
         jitter_range: int = 4,
         subsampling_factor: int = 1,
-        **activation_parameters,
     ) -> None:
         """Construct an Encoder object."""
         super().__init__()
@@ -961,7 +959,7 @@
         )
 
         activation = get_activation(
-            activation_type, **activation_parameters
+            activation_type
        )        
 
         pos_wise_args = (
@@ -991,9 +989,6 @@
             simplified_att_score,
         )
 
-        norm_class, norm_args = get_normalization(
-            norm_type,
-        )
 
         fn_modules = []
         for _ in range(num_blocks):
@@ -1003,8 +998,6 @@
                 PositionwiseFeedForward(*pos_wise_args),
                 PositionwiseFeedForward(*pos_wise_args),
                 CausalConvolution(*conv_mod_args),
-                norm_class=norm_class,
-                norm_args=norm_args,
                 dropout_rate=dropout_rate,
             )
             fn_modules.append(module)        
@@ -1012,11 +1005,9 @@
         self.encoders = MultiBlocks(
             [fn() for fn in fn_modules],
             output_size,
-            norm_class=norm_class,
-            norm_args=norm_args,
         )
 
-        self.output_size = output_size
+        self._output_size = output_size
 
         self.dynamic_chunk_training = dynamic_chunk_training
         self.short_chunk_threshold = short_chunk_threshold
@@ -1028,6 +1019,9 @@
         self.jitter_range = jitter_range
 
         self.time_reduction_factor = time_reduction_factor
+
+    def output_size(self) -> int:
+        return self._output_size
 
     def get_encoder_input_raw_size(self, size: int, hop_length: int) -> int:
         """Return the corresponding number of sample for a given chunk size, in frames.
@@ -1151,7 +1145,7 @@
             x = x[:,::self.time_reduction_factor,:]
             olens = torch.floor_divide(olens-1, self.time_reduction_factor) + 1
 
-        return x, olens
+        return x, olens, None
 
     def simu_chunk_forward(
         self,

--
Gitblit v1.9.1