From 19f4fae784210e85421ae2f8dcd0fbbd1eb2ad3e Mon Sep 17 00:00:00 2001
From: lingyunfly <121302812+lingyunfly@users.noreply.github.com>
Date: 星期四, 18 五月 2023 14:15:27 +0800
Subject: [PATCH] Update vad_inference_launch.py

---
 funasr/models/e2e_asr_transducer.py |   22 ++++++++++++----------
 1 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/funasr/models/e2e_asr_transducer.py b/funasr/models/e2e_asr_transducer.py
index 657dd75..3120087 100644
--- a/funasr/models/e2e_asr_transducer.py
+++ b/funasr/models/e2e_asr_transducer.py
@@ -12,12 +12,12 @@
 from funasr.models.specaug.abs_specaug import AbsSpecAug
 from funasr.models.decoder.rnnt_decoder import RNNTDecoder
 from funasr.models.decoder.abs_decoder import AbsDecoder as AbsAttDecoder
-from funasr.models.encoder.conformer_encoder import ConformerChunkEncoder as Encoder
+from funasr.models.encoder.abs_encoder import AbsEncoder
 from funasr.models.joint_net.joint_network import JointNetwork
 from funasr.modules.nets_utils import get_transducer_task_io
 from funasr.layers.abs_normalize import AbsNormalize
 from funasr.torch_utils.device_funcs import force_gatherable
-from funasr.train.abs_espnet_model import AbsESPnetModel
+from funasr.models.base_model import FunASRModel
 
 if V(torch.__version__) >= V("1.6.0"):
     from torch.cuda.amp import autocast
@@ -28,7 +28,7 @@
         yield
 
 
-class TransducerModel(AbsESPnetModel):
+class TransducerModel(FunASRModel):
     """ESPnet2ASRTransducerModel module definition.
 
     Args:
@@ -62,7 +62,7 @@
         frontend: Optional[AbsFrontend],
         specaug: Optional[AbsSpecAug],
         normalize: Optional[AbsNormalize],
-        encoder: Encoder,
+        encoder: AbsEncoder,
         decoder: RNNTDecoder,
         joint_network: JointNetwork,
         att_decoder: Optional[AbsAttDecoder] = None,
@@ -286,7 +286,7 @@
                 feats, feats_lengths = self.normalize(feats, feats_lengths)
 
         # 4. Forward encoder
-        encoder_out, encoder_out_lens = self.encoder(feats, feats_lengths)
+        encoder_out, encoder_out_lens, _ = self.encoder(feats, feats_lengths)
 
         assert encoder_out.size(0) == speech.size(0), (
             encoder_out.size(),
@@ -386,7 +386,7 @@
 
         if not self.training and (self.report_cer or self.report_wer):
             if self.error_calculator is None:
-                from espnet2.asr_transducer.error_calculator import ErrorCalculator
+                from funasr.modules.e2e_asr_common import ErrorCalculatorTransducer as ErrorCalculator
 
                 self.error_calculator = ErrorCalculator(
                     self.decoder,
@@ -398,7 +398,7 @@
                     report_wer=self.report_wer,
                 )
 
-            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target)
+            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target, t_len)
 
             return loss_transducer, cer_transducer, wer_transducer
 
@@ -483,7 +483,7 @@
 
         return loss_lm
 
-class UnifiedTransducerModel(AbsESPnetModel):
+class UnifiedTransducerModel(FunASRModel):
     """ESPnet2ASRTransducerModel module definition.
     Args:
         vocab_size: Size of complete vocabulary (w/ EOS and blank included).
@@ -515,7 +515,7 @@
         frontend: Optional[AbsFrontend],
         specaug: Optional[AbsSpecAug],
         normalize: Optional[AbsNormalize],
-        encoder: Encoder,
+        encoder: AbsEncoder,
         decoder: RNNTDecoder,
         joint_network: JointNetwork,
         att_decoder: Optional[AbsAttDecoder] = None,
@@ -889,6 +889,8 @@
 
         if not self.training and (self.report_cer or self.report_wer):
             if self.error_calculator is None:
+                from funasr.modules.e2e_asr_common import ErrorCalculatorTransducer as ErrorCalculator
+
                 self.error_calculator = ErrorCalculator(
                     self.decoder,
                     self.joint_network,
@@ -899,7 +901,7 @@
                     report_wer=self.report_wer,
                 )
 
-            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target)
+            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target, t_len)
             return loss_transducer, cer_transducer, wer_transducer
 
         return loss_transducer, None, None

--
Gitblit v1.9.1