From 0a4a1d5257dace9561d95b38a9386539908dcd5e Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期二, 23 四月 2024 12:48:52 +0800
Subject: [PATCH] Dev gzf exp (#1645)

---
 funasr/datasets/audio_datasets/espnet_samplers.py |   11 ++++++++---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/funasr/datasets/audio_datasets/espnet_samplers.py b/funasr/datasets/audio_datasets/espnet_samplers.py
index bca0753..4bb34f3 100644
--- a/funasr/datasets/audio_datasets/espnet_samplers.py
+++ b/funasr/datasets/audio_datasets/espnet_samplers.py
@@ -32,8 +32,9 @@
     def __init__(self, dataset,
                  batch_size,
                  batch_type="token",
-                 num_replicas=None,
                  rank=None,
+                 num_replicas=None,
+                 rank_split=False,
                  shuffle=True,
                  drop_last=False,
                  is_training: bool = True,
@@ -45,6 +46,10 @@
             rank = dist.get_rank()
             num_replicas = dist.get_world_size()
         except:
+            rank = 0
+            num_replicas = 1
+        if rank_split:
+            logging.info(f"Warning, rank_split: {rank_split}, batch and shuffle data in local rank")
             rank = 0
             num_replicas = 1
         self.rank = rank
@@ -65,8 +70,8 @@
         self.length_scale_source = kwargs.get("length_scale_source", 1.0)
 
 
-        super().__init__(dataset, num_replicas=num_replicas, rank=rank,
-                         shuffle=shuffle, drop_last=drop_last)
+        # super().__init__(dataset, num_replicas=num_replicas, rank=rank,
+        #                  shuffle=shuffle, drop_last=drop_last)
     def __iter__(self):
         if self.shuffle:
             g = torch.Generator()

--
Gitblit v1.9.1