游雁
2023-11-16 4ace5a95b052d338947fc88809a440ccd55cf6b4
funasr/train/trainer.py
@@ -26,7 +26,6 @@
import torch
import torch.nn
import torch.optim
from typeguard import check_argument_types
from funasr.iterators.abs_iter_factory import AbsIterFactory
from funasr.main_funcs.average_nbest_models import average_nbest_models
@@ -44,6 +43,7 @@
from funasr.train.reporter import Reporter
from funasr.train.reporter import SubReporter
from funasr.utils.build_dataclass import build_dataclass
from funasr.utils.kwargs2args import kwargs2args
if torch.distributed.is_available():
    from torch.distributed import ReduceOp
@@ -126,7 +126,6 @@
    @classmethod
    def build_options(cls, args: argparse.Namespace) -> TrainerOptions:
        """Build options consumed by train(), eval()"""
        assert check_argument_types()
        return build_dataclass(TrainerOptions, args)
    @classmethod
@@ -187,7 +186,6 @@
        distributed_option: DistributedOption,
    ) -> None:
        """Perform training. This method performs the main process of training."""
        assert check_argument_types()
        # NOTE(kamo): Don't check the type more strictly as far trainer_options
        assert is_dataclass(trainer_options), type(trainer_options)
        assert len(optimizers) == len(schedulers), (len(optimizers), len(schedulers))
@@ -371,7 +369,7 @@
                            ],
                            "scaler": scaler.state_dict() if scaler is not None else None,
                            "ema_model": model.encoder.ema.model.state_dict()
                            if hasattr(model.encoder, "ema") and model.encoder.ema is not None else None,
                            if hasattr(model, "encoder") and hasattr(model.encoder, "ema") and model.encoder.ema is not None else None,
                        },
                        buffer,
                    )
@@ -550,7 +548,6 @@
        options: TrainerOptions,
        distributed_option: DistributedOption,
    ) -> Tuple[bool, bool]:
        assert check_argument_types()
        grad_noise = options.grad_noise
        accum_grad = options.accum_grad
@@ -619,6 +616,24 @@
            if no_forward_run:
                all_steps_are_invalid = False
                continue
            if iiter == 1 and summary_writer is not None:
                try:
                    args = kwargs2args(model.forward, batch)
                except (ValueError, TypeError):
                    logging.warning(
                        "inpect.signature() is failed for the model. "
                        "The graph can't be added for tensorboard."
                    )
                else:
                    try:
                        summary_writer.add_graph(model, args, use_strict_trace=False)
                    except Exception:
                        logging.warning(
                            "summary_writer.add_graph() is failed for the model. "
                            "The graph can't be added for tensorboard."
                        )
                    del args
            with autocast(scaler is not None):
                with reporter.measure_time("forward_time"):
@@ -826,7 +841,6 @@
        options: TrainerOptions,
        distributed_option: DistributedOption,
    ) -> None:
        assert check_argument_types()
        ngpu = options.ngpu
        no_forward_run = options.no_forward_run
        distributed = distributed_option.distributed