From 4e613ec064052bc1374f26c201e7ec334c12d587 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期四, 23 五月 2024 09:55:47 +0800
Subject: [PATCH] wenetspeech
---
funasr/bin/train.py | 16 +++++++++++-----
1 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index 2af6a59..c3556d1 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -198,14 +198,13 @@
writer = None
dataloader_tr, dataloader_val = None, None
- for epoch in range(trainer.start_epoch, trainer.max_epoch + 1):
+ for epoch in range(trainer.start_epoch, trainer.max_epoch):
time1 = time.perf_counter()
for data_split_i in range(trainer.start_data_split_i, dataloader.data_split_num):
dataloader_tr, dataloader_val = dataloader.build_iter(
epoch, data_split_i=data_split_i, start_step=trainer.start_step
)
- trainer.start_step = 0
trainer.train_epoch(
model=model,
@@ -218,16 +217,21 @@
writer=writer,
data_split_i=data_split_i,
data_split_num=dataloader.data_split_num,
+ start_step=trainer.start_step,
)
+ trainer.start_step = 0
torch.cuda.empty_cache()
+ trainer.start_data_split_i = 0
trainer.validate_epoch(
- model=model, dataloader_val=dataloader_val, epoch=epoch, writer=writer
+ model=model, dataloader_val=dataloader_val, epoch=epoch + 1, writer=writer
)
scheduler.step()
- trainer.step_cur_in_epoch = 0
- trainer.save_checkpoint(epoch, model=model, optim=optim, scheduler=scheduler, scaler=scaler)
+ trainer.step_in_epoch = 0
+ trainer.save_checkpoint(
+ epoch + 1, model=model, optim=optim, scheduler=scheduler, scaler=scaler
+ )
time2 = time.perf_counter()
time_escaped = (time2 - time1) / 3600.0
@@ -237,6 +241,8 @@
f"estimated to finish {trainer.max_epoch} "
f"epoch: {(trainer.max_epoch - epoch) * time_escaped:.3f} hours\n"
)
+ trainer.train_acc_avg = 0.0
+ trainer.train_loss_avg = 0.0
if trainer.rank == 0:
average_checkpoints(trainer.output_dir, trainer.avg_nbest_model)
--
Gitblit v1.9.1