From bc723ea200144bd6fa8a5dff4b9a780feda144fc Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 29 六月 2023 18:55:01 +0800
Subject: [PATCH] dcos
---
funasr/models/e2e_asr_transducer.py | 37 +++++++++++++++++++------------------
1 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/funasr/models/e2e_asr_transducer.py b/funasr/models/e2e_asr_transducer.py
index f8ba0f0..80914b1 100644
--- a/funasr/models/e2e_asr_transducer.py
+++ b/funasr/models/e2e_asr_transducer.py
@@ -6,18 +6,21 @@
import torch
from packaging.version import parse as V
-from typeguard import check_argument_types
-
+from funasr.losses.label_smoothing_loss import (
+ LabelSmoothingLoss, # noqa: H301
+)
from funasr.models.frontend.abs_frontend import AbsFrontend
from funasr.models.specaug.abs_specaug import AbsSpecAug
from funasr.models.decoder.rnnt_decoder import RNNTDecoder
from funasr.models.decoder.abs_decoder import AbsDecoder as AbsAttDecoder
-from funasr.models.encoder.conformer_encoder import ConformerChunkEncoder as Encoder
+from funasr.models.encoder.abs_encoder import AbsEncoder
from funasr.models.joint_net.joint_network import JointNetwork
from funasr.modules.nets_utils import get_transducer_task_io
+from funasr.modules.nets_utils import th_accuracy
+from funasr.modules.add_sos_eos import add_sos_eos
from funasr.layers.abs_normalize import AbsNormalize
from funasr.torch_utils.device_funcs import force_gatherable
-from funasr.train.abs_espnet_model import AbsESPnetModel
+from funasr.models.base_model import FunASRModel
if V(torch.__version__) >= V("1.6.0"):
from torch.cuda.amp import autocast
@@ -28,7 +31,7 @@
yield
-class TransducerModel(AbsESPnetModel):
+class TransducerModel(FunASRModel):
"""ESPnet2ASRTransducerModel module definition.
Args:
@@ -62,7 +65,7 @@
frontend: Optional[AbsFrontend],
specaug: Optional[AbsSpecAug],
normalize: Optional[AbsNormalize],
- encoder: Encoder,
+ encoder: AbsEncoder,
decoder: RNNTDecoder,
joint_network: JointNetwork,
att_decoder: Optional[AbsAttDecoder] = None,
@@ -81,8 +84,6 @@
) -> None:
"""Construct an ESPnetASRTransducerModel object."""
super().__init__()
-
- assert check_argument_types()
# The following labels ID are reserved: 0 (blank) and vocab_size - 1 (sos/eos)
self.blank_id = 0
@@ -108,7 +109,7 @@
self.use_auxiliary_lm_loss = auxiliary_lm_loss_weight > 0
if self.use_auxiliary_ctc:
- self.ctc_lin = torch.nn.Linear(encoder.output_size, vocab_size)
+ self.ctc_lin = torch.nn.Linear(encoder.output_size(), vocab_size)
self.ctc_dropout_rate = auxiliary_ctc_dropout_rate
if self.use_auxiliary_lm_loss:
@@ -162,7 +163,9 @@
# 1. Encoder
encoder_out, encoder_out_lens = self.encode(speech, speech_lengths)
-
+ if hasattr(self.encoder, 'overlap_chunk_cls') and self.encoder.overlap_chunk_cls is not None:
+ encoder_out, encoder_out_lens = self.encoder.overlap_chunk_cls.remove_chunk(encoder_out, encoder_out_lens,
+ chunk_outs=None)
# 2. Transducer-related I/O preparation
decoder_in, target, t_len, u_len = get_transducer_task_io(
text,
@@ -286,7 +289,7 @@
feats, feats_lengths = self.normalize(feats, feats_lengths)
# 4. Forward encoder
- encoder_out, encoder_out_lens = self.encoder(feats, feats_lengths)
+ encoder_out, encoder_out_lens, _ = self.encoder(feats, feats_lengths)
assert encoder_out.size(0) == speech.size(0), (
encoder_out.size(),
@@ -483,7 +486,7 @@
return loss_lm
-class UnifiedTransducerModel(AbsESPnetModel):
+class UnifiedTransducerModel(FunASRModel):
"""ESPnet2ASRTransducerModel module definition.
Args:
vocab_size: Size of complete vocabulary (w/ EOS and blank included).
@@ -515,7 +518,7 @@
frontend: Optional[AbsFrontend],
specaug: Optional[AbsSpecAug],
normalize: Optional[AbsNormalize],
- encoder: Encoder,
+ encoder: AbsEncoder,
decoder: RNNTDecoder,
joint_network: JointNetwork,
att_decoder: Optional[AbsAttDecoder] = None,
@@ -539,8 +542,6 @@
) -> None:
"""Construct an ESPnetASRTransducerModel object."""
super().__init__()
-
- assert check_argument_types()
# The following labels ID are reserved: 0 (blank) and vocab_size - 1 (sos/eos)
self.blank_id = 0
@@ -577,7 +578,7 @@
self.use_auxiliary_lm_loss = auxiliary_lm_loss_weight > 0
if self.use_auxiliary_ctc:
- self.ctc_lin = torch.nn.Linear(encoder.output_size, vocab_size)
+ self.ctc_lin = torch.nn.Linear(encoder.output_size(), vocab_size)
self.ctc_dropout_rate = auxiliary_ctc_dropout_rate
if self.use_auxiliary_att:
@@ -707,7 +708,7 @@
loss_lm = self._calc_lm_loss(decoder_out, target)
loss_trans = loss_trans_utt + loss_trans_chunk
- loss_ctc = loss_ctc + loss_ctc_chunk
+ loss_ctc = loss_ctc + loss_ctc_chunk
loss_ctc = loss_att + loss_att_chunk
loss = (
@@ -1012,4 +1013,4 @@
ignore_label=self.ignore_id,
)
- return loss_att, acc_att
+ return loss_att, acc_att
\ No newline at end of file
--
Gitblit v1.9.1