游雁
2023-05-19 219c2482ab755fbd4e49dfbdee91bf1a8a4ec49a
funasr/models/e2e_asr_transducer.py
@@ -12,12 +12,12 @@
from funasr.models.specaug.abs_specaug import AbsSpecAug
from funasr.models.decoder.rnnt_decoder import RNNTDecoder
from funasr.models.decoder.abs_decoder import AbsDecoder as AbsAttDecoder
from funasr.models.encoder.conformer_encoder import ConformerChunkEncoder as Encoder
from funasr.models.encoder.abs_encoder import AbsEncoder
from funasr.models.joint_net.joint_network import JointNetwork
from funasr.modules.nets_utils import get_transducer_task_io
from funasr.layers.abs_normalize import AbsNormalize
from funasr.torch_utils.device_funcs import force_gatherable
from funasr.train.abs_espnet_model import AbsESPnetModel
from funasr.models.base_model import FunASRModel
if V(torch.__version__) >= V("1.6.0"):
    from torch.cuda.amp import autocast
@@ -28,7 +28,7 @@
        yield
class TransducerModel(AbsESPnetModel):
class TransducerModel(FunASRModel):
    """ESPnet2ASRTransducerModel module definition.
    Args:
@@ -62,7 +62,7 @@
        frontend: Optional[AbsFrontend],
        specaug: Optional[AbsSpecAug],
        normalize: Optional[AbsNormalize],
        encoder: Encoder,
        encoder: AbsEncoder,
        decoder: RNNTDecoder,
        joint_network: JointNetwork,
        att_decoder: Optional[AbsAttDecoder] = None,
@@ -286,7 +286,7 @@
                feats, feats_lengths = self.normalize(feats, feats_lengths)
        # 4. Forward encoder
        encoder_out, encoder_out_lens = self.encoder(feats, feats_lengths)
        encoder_out, encoder_out_lens, _ = self.encoder(feats, feats_lengths)
        assert encoder_out.size(0) == speech.size(0), (
            encoder_out.size(),
@@ -386,7 +386,7 @@
        if not self.training and (self.report_cer or self.report_wer):
            if self.error_calculator is None:
                from espnet2.asr_transducer.error_calculator import ErrorCalculator
                from funasr.modules.e2e_asr_common import ErrorCalculatorTransducer as ErrorCalculator
                self.error_calculator = ErrorCalculator(
                    self.decoder,
@@ -398,7 +398,7 @@
                    report_wer=self.report_wer,
                )
            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target)
            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target, t_len)
            return loss_transducer, cer_transducer, wer_transducer
@@ -483,7 +483,7 @@
        return loss_lm
class UnifiedTransducerModel(AbsESPnetModel):
class UnifiedTransducerModel(FunASRModel):
    """ESPnet2ASRTransducerModel module definition.
    Args:
        vocab_size: Size of complete vocabulary (w/ EOS and blank included).
@@ -515,7 +515,7 @@
        frontend: Optional[AbsFrontend],
        specaug: Optional[AbsSpecAug],
        normalize: Optional[AbsNormalize],
        encoder: Encoder,
        encoder: AbsEncoder,
        decoder: RNNTDecoder,
        joint_network: JointNetwork,
        att_decoder: Optional[AbsAttDecoder] = None,
@@ -531,8 +531,8 @@
        sym_blank: str = "<blank>",
        report_cer: bool = True,
        report_wer: bool = True,
        sym_sos: str = "<sos/eos>",
        sym_eos: str = "<sos/eos>",
        sym_sos: str = "<s>",
        sym_eos: str = "</s>",
        extract_feats_in_collect_stats: bool = True,
        lsm_weight: float = 0.0,
        length_normalized_loss: bool = False,
@@ -889,6 +889,8 @@
        if not self.training and (self.report_cer or self.report_wer):
            if self.error_calculator is None:
                from funasr.modules.e2e_asr_common import ErrorCalculatorTransducer as ErrorCalculator
                self.error_calculator = ErrorCalculator(
                    self.decoder,
                    self.joint_network,
@@ -899,7 +901,7 @@
                    report_wer=self.report_wer,
                )
            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target)
            cer_transducer, wer_transducer = self.error_calculator(encoder_out, target, t_len)
            return loss_transducer, cer_transducer, wer_transducer
        return loss_transducer, None, None