From a70f5b3edf22ac889724aa9a06cefbb316374b28 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期日, 24 三月 2024 01:44:18 +0800
Subject: [PATCH] finetune
---
funasr/train_utils/trainer.py | 2 +-
funasr/bin/train.py | 21 +++++++++++----------
2 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index 5cf54da..e446e54 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -173,10 +173,10 @@
except:
writer = None
- # if use_ddp or use_fsdp:
- # context = Join([model])
- # else:
- context = nullcontext()
+ if use_ddp or use_fsdp:
+ context = Join([model])
+ else:
+ context = nullcontext()
for epoch in range(trainer.start_epoch, trainer.max_epoch + 1):
time1 = time.perf_counter()
@@ -192,13 +192,14 @@
epoch=epoch,
writer=writer
)
+ with context:
+ trainer.validate_epoch(
+ model=model,
+ dataloader_val=dataloader_val,
+ epoch=epoch,
+ writer=writer
+ )
scheduler.step()
- trainer.validate_epoch(
- model=model,
- dataloader_val=dataloader_val,
- epoch=epoch,
- writer=writer
- )
trainer.save_checkpoint(epoch, model=model, optim=optim, scheduler=scheduler, scaler=scaler)
diff --git a/funasr/train_utils/trainer.py b/funasr/train_utils/trainer.py
index cf23483..e554aca 100644
--- a/funasr/train_utils/trainer.py
+++ b/funasr/train_utils/trainer.py
@@ -398,7 +398,7 @@
speed_stats = {}
time5 = time.perf_counter()
# iterator_stop = torch.tensor(0).to(self.device)
-
+ dataloader_val.batch_sampler.set_epoch(epoch)
for batch_idx, batch in enumerate(dataloader_val):
# if self.use_ddp or self.use_fsdp:
# dist.all_reduce(iterator_stop, dist.ReduceOp.SUM)
--
Gitblit v1.9.1