From e299cfecaf979833d9c4d7c70e44cb92ea066afe Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 09 五月 2024 20:02:37 +0800
Subject: [PATCH] total_time/accum_grad

---
 funasr/bin/train.py |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index 97516eb..643df71 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -198,14 +198,13 @@
         writer = None
 
     dataloader_tr, dataloader_val = None, None
-    for epoch in range(trainer.start_epoch, trainer.max_epoch + 1):
+    for epoch in range(trainer.start_epoch, trainer.max_epoch):
         time1 = time.perf_counter()
 
         for data_split_i in range(trainer.start_data_split_i, dataloader.data_split_num):
             dataloader_tr, dataloader_val = dataloader.build_iter(
                 epoch, data_split_i=data_split_i, start_step=trainer.start_step
             )
-            trainer.start_step = 0
 
             trainer.train_epoch(
                 model=model,
@@ -218,10 +217,13 @@
                 writer=writer,
                 data_split_i=data_split_i,
                 data_split_num=dataloader.data_split_num,
+                start_step=trainer.start_step,
             )
+            trainer.start_step = 0
 
             torch.cuda.empty_cache()
 
+        trainer.start_data_split_i = 0
         trainer.validate_epoch(
             model=model, dataloader_val=dataloader_val, epoch=epoch + 1, writer=writer
         )

--
Gitblit v1.9.1