zhifu gao
2024-04-25 80bd14e6bbb7bb282ff3832194648dc4a16157ca
funasr/train_utils/trainer.py
@@ -108,6 +108,9 @@
        self.val_acc_step_or_eoch = {}
        self.val_loss_step_or_eoch = {}
        self.reset_gpu_cache = kwargs.get("reset_gpu_cache", False)
    def save_checkpoint(
        self,
        epoch,
@@ -325,6 +328,12 @@
                with maybe_autocast(self.use_fp16):
                    retval = model(**batch)
                    if (
                        self.reset_gpu_cache
                        and (torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024) > 70
                    ):
                        torch.cuda.empty_cache()
                time3 = time.perf_counter()
                speed_stats["forward_time"] = f"{time3 - time2:0.3f}"
                loss, stats, weight = retval