zhifu gao
2023-05-18 97a689d65da434345a641a909f13b78e5690c86b
funasr/tasks/abs_task.py
@@ -30,6 +30,7 @@
import torch.nn
import torch.optim
import yaml
from funasr.models.base_model import FunASRModel
from torch.utils.data import DataLoader
from typeguard import check_argument_types
from typeguard import check_return_type
@@ -44,19 +45,18 @@
from funasr.iterators.multiple_iter_factory import MultipleIterFactory
from funasr.iterators.sequence_iter_factory import SequenceIterFactory
from funasr.main_funcs.collect_stats import collect_stats
from funasr.optimizers.sgd import SGD
from funasr.optimizers.fairseq_adam import FairseqAdam
from funasr.optimizers.sgd import SGD
from funasr.samplers.build_batch_sampler import BATCH_TYPES
from funasr.samplers.build_batch_sampler import build_batch_sampler
from funasr.samplers.unsorted_batch_sampler import UnsortedBatchSampler
from funasr.schedulers.noam_lr import NoamLR
from funasr.schedulers.warmup_lr import WarmupLR
from funasr.schedulers.tri_stage_scheduler import TriStageLR
from funasr.schedulers.warmup_lr import WarmupLR
from funasr.torch_utils.load_pretrained_model import load_pretrained_model
from funasr.torch_utils.model_summary import model_summary
from funasr.torch_utils.pytorch_version import pytorch_cudnn_version
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.train.abs_espnet_model import AbsESPnetModel
from funasr.train.class_choices import ClassChoices
from funasr.train.distributed_utils import DistributedOption
from funasr.train.trainer import Trainer
@@ -230,8 +230,8 @@
        >>> cls.check_task_requirements()
        If your model is defined as following,
        >>> from funasr.train.abs_espnet_model import AbsESPnetModel
        >>> class Model(AbsESPnetModel):
        >>> from funasr.models.base_model import FunASRModel
        >>> class Model(FunASRModel):
        ...     def forward(self, input, output, opt=None):  pass
        then "required_data_names" should be as
@@ -251,8 +251,8 @@
        >>> cls.check_task_requirements()
        If your model is defined as follows,
        >>> from funasr.train.abs_espnet_model import AbsESPnetModel
        >>> class Model(AbsESPnetModel):
        >>> from funasr.models.base_model import FunASRModel
        >>> class Model(FunASRModel):
        ...     def forward(self, input, output, opt=None):  pass
        then "optional_data_names" should be as
@@ -263,8 +263,9 @@
    @classmethod
    @abstractmethod
    def build_model(cls, args: argparse.Namespace) -> AbsESPnetModel:
    def build_model(cls, args: argparse.Namespace) -> FunASRModel:
        raise NotImplementedError
    @classmethod
    def get_parser(cls) -> config_argparse.ArgumentParser:
@@ -1172,7 +1173,8 @@
                    args.batch_bins = args.batch_bins * args.ngpu
        # filter samples if wav.scp and text are mismatch
        if (args.train_shape_file is None and args.dataset_type == "small") or args.train_data_file is None and args.dataset_type == "large":
        if (
                args.train_shape_file is None and args.dataset_type == "small") or args.train_data_file is None and args.dataset_type == "large":
            if not args.simple_ddp or distributed_option.dist_rank == 0:
                filter_wav_text(args.data_dir, args.train_set)
                filter_wav_text(args.data_dir, args.dev_set)
@@ -1181,8 +1183,10 @@
        if args.train_shape_file is None and args.dataset_type == "small":
            if not args.simple_ddp or distributed_option.dist_rank == 0:
                calc_shape(args.data_dir, args.train_set, args.frontend_conf, args.speech_length_min, args.speech_length_max)
                calc_shape(args.data_dir, args.dev_set, args.frontend_conf, args.speech_length_min, args.speech_length_max)
                calc_shape(args.data_dir, args.train_set, args.frontend_conf, args.speech_length_min,
                           args.speech_length_max)
                calc_shape(args.data_dir, args.dev_set, args.frontend_conf, args.speech_length_min,
                           args.speech_length_max)
            if args.simple_ddp:
                dist.barrier()
            args.train_shape_file = [os.path.join(args.data_dir, args.train_set, "speech_shape")]
@@ -1244,9 +1248,9 @@
        # 2. Build model
        model = cls.build_model(args=args)
        if not isinstance(model, AbsESPnetModel):
        if not isinstance(model, FunASRModel):
            raise RuntimeError(
                f"model must inherit {AbsESPnetModel.__name__}, but got {type(model)}"
                f"model must inherit {FunASRModel.__name__}, but got {type(model)}"
            )
        model = model.to(
            dtype=getattr(torch, args.train_dtype),
@@ -1374,15 +1378,21 @@
            if args.dataset_type == "large":
                from funasr.datasets.large_datasets.build_dataloader import ArkDataLoader
                train_iter_factory = ArkDataLoader(args.train_data_file, args.token_list, args.dataset_conf,
                                                   frontend_conf=args.frontend_conf if hasattr(args, "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args, "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
                                                   frontend_conf=args.frontend_conf if hasattr(args,
                                                                                               "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args,
                                                                                               "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args,
                                                                                            "punc_list") else None,
                                                   bpemodel_file=args.bpemodel if hasattr(args, "bpemodel") else None,
                                                   mode="train")
                valid_iter_factory = ArkDataLoader(args.valid_data_file, args.token_list, args.dataset_conf,
                                                   frontend_conf=args.frontend_conf if hasattr(args, "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args, "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args, "punc_list") else None,
                valid_iter_factory = ArkDataLoader(args.valid_data_file, args.token_list, args.dataset_conf,
                                                   frontend_conf=args.frontend_conf if hasattr(args,
                                                                                               "frontend_conf") else None,
                                                   seg_dict_file=args.seg_dict_file if hasattr(args,
                                                                                               "seg_dict_file") else None,
                                                   punc_dict_file=args.punc_list if hasattr(args,
                                                                                            "punc_list") else None,
                                                   bpemodel_file=args.bpemodel if hasattr(args, "bpemodel") else None,
                                                   mode="eval")
            elif args.dataset_type == "small":
@@ -1929,7 +1939,7 @@
            model_file: Union[Path, str] = None,
            cmvn_file: Union[Path, str] = None,
            device: str = "cpu",
    ) -> Tuple[AbsESPnetModel, argparse.Namespace]:
    ) -> Tuple[FunASRModel, argparse.Namespace]:
        """Build model from the files.
        This method is used for inference or fine-tuning.
@@ -1956,9 +1966,9 @@
            args["cmvn_file"] = cmvn_file
        args = argparse.Namespace(**args)
        model = cls.build_model(args)
        if not isinstance(model, AbsESPnetModel):
        if not isinstance(model, FunASRModel):
            raise RuntimeError(
                f"model must inherit {AbsESPnetModel.__name__}, but got {type(model)}"
                f"model must inherit {FunASRModel.__name__}, but got {type(model)}"
            )
        model.to(device)
        if model_file is not None: