游雁
2023-12-21 a1b0cd33d50cee3e4612d1e787399e508b453a4a
funasr/models/paraformer/model.py
@@ -25,10 +25,10 @@
from funasr.datasets.audio_datasets.load_audio_extract_fbank import load_audio, extract_fbank
from funasr.utils import postprocess_utils
from funasr.utils.datadir_writer import DatadirWriter
from funasr.utils.register import register_class, registry_tables
from funasr.register import tables
from funasr.models.ctc.ctc import CTC
@register_class("model_classes", "Paraformer")
@tables.register("model_classes", "Paraformer")
class Paraformer(nn.Module):
   """
   Author: Speech Lab of DAMO Academy, Alibaba Group
@@ -79,17 +79,17 @@
      super().__init__()
      if specaug is not None:
         specaug_class = registry_tables.specaug_classes.get(specaug.lower())
         specaug_class = tables.specaug_classes.get(specaug.lower())
         specaug = specaug_class(**specaug_conf)
      if normalize is not None:
         normalize_class = registry_tables.normalize_classes.get(normalize.lower())
         normalize_class = tables.normalize_classes.get(normalize.lower())
         normalize = normalize_class(**normalize_conf)
      encoder_class = registry_tables.encoder_classes.get(encoder.lower())
      encoder_class = tables.encoder_classes.get(encoder.lower())
      encoder = encoder_class(input_size=input_size, **encoder_conf)
      encoder_output_size = encoder.output_size()
      if decoder is not None:
         decoder_class = registry_tables.decoder_classes.get(decoder.lower())
         decoder_class = tables.decoder_classes.get(decoder.lower())
         decoder = decoder_class(
            vocab_size=vocab_size,
            encoder_output_size=encoder_output_size,
@@ -104,7 +104,7 @@
            odim=vocab_size, encoder_output_size=encoder_output_size, **ctc_conf
         )
      if predictor is not None:
         predictor_class = registry_tables.predictor_classes.get(predictor.lower())
         predictor_class = tables.predictor_classes.get(predictor.lower())
         predictor = predictor_class(**predictor_conf)
      
      # note that eos is the same as sos (equivalent ID)