From a0f03bd2a87d97d47a1636bbe6f0855a43160331 Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 15 五月 2024 19:48:50 +0800
Subject: [PATCH] Dev gzf deepspeed (#1732)
---
funasr/bin/train.py | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index 7695e51..c3556d1 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -223,6 +223,7 @@
torch.cuda.empty_cache()
+ trainer.start_data_split_i = 0
trainer.validate_epoch(
model=model, dataloader_val=dataloader_val, epoch=epoch + 1, writer=writer
)
@@ -240,6 +241,8 @@
f"estimated to finish {trainer.max_epoch} "
f"epoch: {(trainer.max_epoch - epoch) * time_escaped:.3f} hours\n"
)
+ trainer.train_acc_avg = 0.0
+ trainer.train_loss_avg = 0.0
if trainer.rank == 0:
average_checkpoints(trainer.output_dir, trainer.avg_nbest_model)
--
Gitblit v1.9.1