| | |
| | | import torch |
| | | |
| | | from funasr.layers.global_mvn import GlobalMVN |
| | | from funasr.layers.label_aggregation import LabelAggregate |
| | | from funasr.layers.label_aggregation import LabelAggregate, LabelAggregateMaxPooling |
| | | from funasr.layers.utterance_mvn import UtteranceMVN |
| | | from funasr.models.e2e_diar_eend_ola import DiarEENDOLAModel |
| | | from funasr.models.e2e_diar_sond import DiarSondModel |
| | |
| | | from funasr.models.frontend.windowing import SlidingWindow |
| | | from funasr.models.specaug.specaug import SpecAug |
| | | from funasr.models.specaug.specaug import SpecAugLFR |
| | | from funasr.models.specaug.abs_profileaug import AbsProfileAug |
| | | from funasr.models.specaug.profileaug import ProfileAug |
| | | from funasr.modules.eend_ola.encoder import EENDOLATransformerEncoder |
| | | from funasr.modules.eend_ola.encoder_decoder_attractor import EncoderDecoderAttractor |
| | | from funasr.torch_utils.initialize import initialize |
| | |
| | | default=None, |
| | | optional=True, |
| | | ) |
| | | profileaug_choices = ClassChoices( |
| | | name="profileaug", |
| | | classes=dict( |
| | | profileaug=ProfileAug, |
| | | ), |
| | | type_check=AbsProfileAug, |
| | | default=None, |
| | | optional=True, |
| | | ) |
| | | normalize_choices = ClassChoices( |
| | | "normalize", |
| | | classes=dict( |
| | |
| | | label_aggregator_choices = ClassChoices( |
| | | "label_aggregator", |
| | | classes=dict( |
| | | label_aggregator=LabelAggregate |
| | | label_aggregator=LabelAggregate, |
| | | label_aggregator_max_pool=LabelAggregateMaxPooling, |
| | | ), |
| | | default=None, |
| | | optional=True, |
| | |
| | | frontend_choices, |
| | | # --specaug and --specaug_conf |
| | | specaug_choices, |
| | | # --profileaug and --profileaug_conf |
| | | profileaug_choices, |
| | | # --normalize and --normalize_conf |
| | | normalize_choices, |
| | | # --label_aggregator and --label_aggregator_conf |
| | |
| | | else: |
| | | specaug = None |
| | | |
| | | # Data augmentation for Profiles |
| | | if hasattr(args, "profileaug") and args.profileaug is not None: |
| | | profileaug_class = profileaug_choices.get_class(args.profileaug) |
| | | profileaug = profileaug_class(**args.profileaug_conf) |
| | | else: |
| | | profileaug = None |
| | | |
| | | # normalization layer |
| | | if args.normalize is not None: |
| | | normalize_class = normalize_choices.get_class(args.normalize) |
| | |
| | | vocab_size=vocab_size, |
| | | frontend=frontend, |
| | | specaug=specaug, |
| | | profileaug=profileaug, |
| | | normalize=normalize, |
| | | label_aggregator=label_aggregator, |
| | | encoder=encoder, |