From 961ec280afb02f2464ce4f7b2fd7c821dd24044b Mon Sep 17 00:00:00 2001
From: zhifu gao <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 20 五月 2024 15:31:46 +0800
Subject: [PATCH] Dev gzf deepspeed (#1736)

---
 funasr/datasets/audio_datasets/espnet_samplers.py |    4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/funasr/datasets/audio_datasets/espnet_samplers.py b/funasr/datasets/audio_datasets/espnet_samplers.py
index 528f593..b358fa3 100644
--- a/funasr/datasets/audio_datasets/espnet_samplers.py
+++ b/funasr/datasets/audio_datasets/espnet_samplers.py
@@ -72,6 +72,7 @@
         self.min_token_length = kwargs.get("min_token_length", 0)
         self.length_scale_source = kwargs.get("length_scale_source", 1.0)
         self.start_step = start_step
+        self.batch_num = 1
         if self.start_step > 0:
             logging.info(f"Warning, start_step > 0, dataloader start from step: {self.start_step}")
         # super().__init__(dataset, num_replicas=num_replicas, rank=rank,
@@ -146,6 +147,7 @@
         start_idx = self.rank * batches_per_rank
         end_idx = start_idx + batches_per_rank
         rank_batches = buffer_batches[start_idx + self.start_step : end_idx]
+        self.batch_num = len(rank_batches)
         logging.info(
             f"rank: {self.rank}, dataloader start from step: {self.start_step}, batch_num: {end_idx-start_idx}, batch_num_after_step: {len(rank_batches)}"
         )
@@ -154,7 +156,7 @@
 
     def __len__(self):
         # Calculate the number of batches per epoch for the current rank
-        return 1
+        return self.batch_num
 
     def set_epoch(self, epoch):
         # Set the epoch for shuffling

--
Gitblit v1.9.1