From 9a9c3b75b5b3359701844a91a9fae6d2979866cd Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期三, 17 一月 2024 18:28:28 +0800
Subject: [PATCH] Funasr1.0 (#1261)

---
 funasr/bin/train.py |   17 ++++++++++++-----
 1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/funasr/bin/train.py b/funasr/bin/train.py
index 7ae687e..0334006 100644
--- a/funasr/bin/train.py
+++ b/funasr/bin/train.py
@@ -141,30 +141,37 @@
     scheduler_class = scheduler_classes.get(scheduler)
     scheduler = scheduler_class(optim, **kwargs.get("scheduler_conf"))
 
-    # import pdb;
-    # pdb.set_trace()
+
     # dataset
     dataset_class = tables.dataset_classes.get(kwargs.get("dataset", "AudioDataset"))
     dataset_tr = dataset_class(kwargs.get("train_data_set_list"), frontend=frontend, tokenizer=tokenizer, **kwargs.get("dataset_conf"))
+    dataset_val = dataset_class(kwargs.get("valid_data_set_list"), frontend=frontend, tokenizer=tokenizer,
+                               **kwargs.get("dataset_conf"))
 
     # dataloader
     batch_sampler = kwargs["dataset_conf"].get("batch_sampler", "DynamicBatchLocalShuffleSampler")
-    batch_sampler_class = tables.batch_sampler_classes.get(batch_sampler)
+    batch_sampler_val = None
     if batch_sampler is not None:
+        batch_sampler_class = tables.batch_sampler_classes.get(batch_sampler)
         batch_sampler = batch_sampler_class(dataset_tr, **kwargs.get("dataset_conf"))
+        batch_sampler_val = batch_sampler_class(dataset_tr, is_training=False, **kwargs.get("dataset_conf"))
     dataloader_tr = torch.utils.data.DataLoader(dataset_tr,
                                                 collate_fn=dataset_tr.collator,
                                                 batch_sampler=batch_sampler,
                                                 num_workers=kwargs.get("dataset_conf").get("num_workers", 4),
                                                 pin_memory=True)
     
-
+    dataloader_val = torch.utils.data.DataLoader(dataset_val,
+                                                collate_fn=dataset_val.collator,
+                                                batch_sampler=batch_sampler_val,
+                                                num_workers=kwargs.get("dataset_conf").get("num_workers", 4),
+                                                pin_memory=True)
     trainer = Trainer(
         model=model,
         optim=optim,
         scheduler=scheduler,
         dataloader_train=dataloader_tr,
-        dataloader_val=None,
+        dataloader_val=dataloader_val,
         local_rank=local_rank,
         use_ddp=use_ddp,
         use_fsdp=use_fsdp,

--
Gitblit v1.9.1