From 2cf4084b23db9bd9e8ce4db76d0628ef6655ed71 Mon Sep 17 00:00:00 2001
From: 夜雨飘零 <yeyupiaoling@foxmail.com>
Date: 星期六, 03 二月 2024 13:05:07 +0800
Subject: [PATCH] fix retract error (#1350)
---
funasr/bin/train.py | 22 +++++++++++++++-------
1 files changed, 15 insertions(+), 7 deletions(-)
diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index ef0d205..8ea0c0d 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -40,8 +40,7 @@
def main(**kwargs):
- # preprocess_config(kwargs)
- # import pdb; pdb.set_trace()
+ print(kwargs)
# set random seed
tables.print()
set_all_random_seed(kwargs.get("seed", 0))
@@ -142,33 +141,42 @@
scheduler_class = scheduler_classes.get(scheduler)
scheduler = scheduler_class(optim, **kwargs.get("scheduler_conf"))
- # import pdb;
- # pdb.set_trace()
+
# dataset
dataset_class = tables.dataset_classes.get(kwargs.get("dataset", "AudioDataset"))
dataset_tr = dataset_class(kwargs.get("train_data_set_list"), frontend=frontend, tokenizer=tokenizer, **kwargs.get("dataset_conf"))
+ dataset_val = dataset_class(kwargs.get("valid_data_set_list"), frontend=frontend, tokenizer=tokenizer,
+ **kwargs.get("dataset_conf"))
# dataloader
batch_sampler = kwargs["dataset_conf"].get("batch_sampler", "DynamicBatchLocalShuffleSampler")
- batch_sampler_class = tables.batch_sampler_classes.get(batch_sampler)
+ batch_sampler_val = None
if batch_sampler is not None:
+ batch_sampler_class = tables.batch_sampler_classes.get(batch_sampler)
batch_sampler = batch_sampler_class(dataset_tr, **kwargs.get("dataset_conf"))
+ batch_sampler_val = batch_sampler_class(dataset_val, is_training=False, **kwargs.get("dataset_conf"))
dataloader_tr = torch.utils.data.DataLoader(dataset_tr,
collate_fn=dataset_tr.collator,
batch_sampler=batch_sampler,
num_workers=kwargs.get("dataset_conf").get("num_workers", 4),
pin_memory=True)
-
+ dataloader_val = torch.utils.data.DataLoader(dataset_val,
+ collate_fn=dataset_val.collator,
+ batch_sampler=batch_sampler_val,
+ num_workers=kwargs.get("dataset_conf").get("num_workers", 4),
+ pin_memory=True)
trainer = Trainer(
model=model,
optim=optim,
scheduler=scheduler,
dataloader_train=dataloader_tr,
- dataloader_val=None,
+ dataloader_val=dataloader_val,
local_rank=local_rank,
use_ddp=use_ddp,
use_fsdp=use_fsdp,
+ output_dir=kwargs.get("output_dir", "./exp"),
+ resume=kwargs.get("resume", True),
**kwargs.get("train_conf"),
)
trainer.run()
--
Gitblit v1.9.1