python/FunASR-XL.git

			@@ -26,7 +26,6 @@
			import torch
			import torch.nn
			import torch.optim
			from typeguard import check_argument_types

			from funasr.iterators.abs_iter_factory import AbsIterFactory
			from funasr.main_funcs.average_nbest_models import average_nbest_models
			@@ -44,6 +43,7 @@
			from funasr.train.reporter import Reporter
			from funasr.train.reporter import SubReporter
			from funasr.utils.build_dataclass import build_dataclass
			from funasr.utils.kwargs2args import kwargs2args

			if torch.distributed.is_available():
			from torch.distributed import ReduceOp
			@@ -126,7 +126,6 @@
			@classmethod
			def build_options(cls, args: argparse.Namespace) -> TrainerOptions:
			"""Build options consumed by train(), eval()"""
			assert check_argument_types()
			return build_dataclass(TrainerOptions, args)

			@classmethod
			@@ -187,7 +186,6 @@
			distributed_option: DistributedOption,
			) -> None:
			"""Perform training. This method performs the main process of training."""
			assert check_argument_types()
			# NOTE(kamo): Don't check the type more strictly as far trainer_options
			assert is_dataclass(trainer_options), type(trainer_options)
			assert len(optimizers) == len(schedulers), (len(optimizers), len(schedulers))
			@@ -280,14 +278,11 @@
			for iepoch in range(start_epoch, trainer_options.max_epoch + 1):
			if iepoch != start_epoch:
			logging.info(
			"{}/{}epoch started. Estimated time to finish: {}".format(
			"{}/{}epoch started. Estimated time to finish: {} hours".format(
			iepoch,
			trainer_options.max_epoch,
			humanfriendly.format_timespan(
			(time.perf_counter() - start_time)
			/ (iepoch - start_epoch)
			* (trainer_options.max_epoch - iepoch + 1)
			),
			(time.perf_counter() - start_time) / 3600.0 / (iepoch - start_epoch) * (
			trainer_options.max_epoch - iepoch + 1),
			)
			)
			else:
			@@ -371,7 +366,7 @@
			],
			"scaler": scaler.state_dict() if scaler is not None else None,
			"ema_model": model.encoder.ema.model.state_dict()
			if hasattr(model.encoder, "ema") and model.encoder.ema is not None else None,
			if hasattr(model, "encoder") and hasattr(model.encoder, "ema") and model.encoder.ema is not None else None,
			},
			buffer,
			)
			@@ -550,7 +545,6 @@
			options: TrainerOptions,
			distributed_option: DistributedOption,
			) -> Tuple[bool, bool]:
			assert check_argument_types()

			grad_noise = options.grad_noise
			accum_grad = options.accum_grad
			@@ -619,6 +613,24 @@
			if no_forward_run:
			all_steps_are_invalid = False
			continue

			if iiter == 1 and summary_writer is not None:
			try:
			args = kwargs2args(model.forward, batch)
			except (ValueError, TypeError):
			logging.warning(
			"inpect.signature() is failed for the model. "
			"The graph can't be added for tensorboard."
			)
			else:
			try:
			summary_writer.add_graph(model, args, use_strict_trace=False)
			except Exception:
			logging.warning(
			"summary_writer.add_graph() is failed for the model. "
			"The graph can't be added for tensorboard."
			)
			del args

			with autocast(scaler is not None):
			with reporter.measure_time("forward_time"):
			@@ -826,7 +838,6 @@
			options: TrainerOptions,
			distributed_option: DistributedOption,
			) -> None:
			assert check_argument_types()
			ngpu = options.ngpu
			no_forward_run = options.no_forward_run
			distributed = distributed_option.distributed