From 93ef505e2d426b6aa1e58c0b4721999de789ff8e Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期日, 28 四月 2024 15:14:57 +0800
Subject: [PATCH] Dev gzf exp (#1670)
---
funasr/bin/train.py | 9 +++++----
1 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index 448e464..2af6a59 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -13,7 +13,7 @@
from contextlib import nullcontext
import torch.distributed as dist
-from collections.abc import Sequence
+
from omegaconf import DictConfig, OmegaConf
from torch.cuda.amp import autocast, GradScaler
from torch.nn.parallel import DistributedDataParallel as DDP
@@ -99,7 +99,7 @@
if freeze_param is not None:
if "," in freeze_param:
freeze_param = eval(freeze_param)
- if not isinstance(freeze_param, Sequence):
+ if not isinstance(freeze_param, (list, tuple)):
freeze_param = (freeze_param,)
logging.info("freeze_param is not None: %s", freeze_param)
for t in freeze_param:
@@ -193,7 +193,7 @@
try:
from tensorboardX import SummaryWriter
- writer = SummaryWriter(tensorboard_dir) if trainer.rank == 0 else None
+ writer = SummaryWriter(tensorboard_dir) # if trainer.rank == 0 else None
except:
writer = None
@@ -206,6 +206,7 @@
epoch, data_split_i=data_split_i, start_step=trainer.start_step
)
trainer.start_step = 0
+
trainer.train_epoch(
model=model,
optim=optim,
@@ -225,7 +226,7 @@
model=model, dataloader_val=dataloader_val, epoch=epoch, writer=writer
)
scheduler.step()
-
+ trainer.step_cur_in_epoch = 0
trainer.save_checkpoint(epoch, model=model, optim=optim, scheduler=scheduler, scaler=scaler)
time2 = time.perf_counter()
--
Gitblit v1.9.1