From 219c2482ab755fbd4e49dfbdee91bf1a8a4ec49a Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期五, 19 五月 2023 11:33:27 +0800
Subject: [PATCH] websocket 2pass bugfix

---
 funasr/models/encoder/conformer_encoder.py |   15 +++++++++------
 1 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/funasr/models/encoder/conformer_encoder.py b/funasr/models/encoder/conformer_encoder.py
index b7b552c..5f20dee 100644
--- a/funasr/models/encoder/conformer_encoder.py
+++ b/funasr/models/encoder/conformer_encoder.py
@@ -15,13 +15,13 @@
 from typeguard import check_argument_types
 
 from funasr.models.ctc import CTC
-from funasr.models.encoder.abs_encoder import AbsEncoder
 from funasr.modules.attention import (
     MultiHeadedAttention,  # noqa: H301
     RelPositionMultiHeadedAttention,  # noqa: H301
     RelPositionMultiHeadedAttentionChunk,
     LegacyRelPositionMultiHeadedAttention,  # noqa: H301
 )
+from funasr.models.encoder.abs_encoder import AbsEncoder
 from funasr.modules.embedding import (
     PositionalEncoding,  # noqa: H301
     ScaledPositionalEncoding,  # noqa: H301
@@ -307,7 +307,7 @@
         feed_forward: torch.nn.Module,
         feed_forward_macaron: torch.nn.Module,
         conv_mod: torch.nn.Module,
-        norm_class: torch.nn.Module = torch.nn.LayerNorm,
+        norm_class: torch.nn.Module = LayerNorm,
         norm_args: Dict = {},
         dropout_rate: float = 0.0,
     ) -> None:
@@ -894,7 +894,7 @@
 
         return x, cache
 
-class ConformerChunkEncoder(torch.nn.Module):
+class ConformerChunkEncoder(AbsEncoder):
     """Encoder module definition.
     Args:
         input_size: Input size.
@@ -1007,7 +1007,7 @@
             output_size,
         )
 
-        self.output_size = output_size
+        self._output_size = output_size
 
         self.dynamic_chunk_training = dynamic_chunk_training
         self.short_chunk_threshold = short_chunk_threshold
@@ -1019,6 +1019,9 @@
         self.jitter_range = jitter_range
 
         self.time_reduction_factor = time_reduction_factor
+
+    def output_size(self) -> int:
+        return self._output_size
 
     def get_encoder_input_raw_size(self, size: int, hop_length: int) -> int:
         """Return the corresponding number of sample for a given chunk size, in frames.
@@ -1075,7 +1078,7 @@
                 limit_size,
             )
 
-        mask = make_source_mask(x_len)
+        mask = make_source_mask(x_len).to(x.device)
 
         if self.unified_model_training:
             chunk_size = self.default_chunk_size + torch.randint(-self.jitter_range, self.jitter_range+1, (1,)).item()
@@ -1142,7 +1145,7 @@
             x = x[:,::self.time_reduction_factor,:]
             olens = torch.floor_divide(olens-1, self.time_reduction_factor) + 1
 
-        return x, olens
+        return x, olens, None
 
     def simu_chunk_forward(
         self,

--
Gitblit v1.9.1