From 3eee773814c392e497557bbad501e0add4c8eca9 Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期日, 09 六月 2024 02:11:42 +0800
Subject: [PATCH] fix bug

---
 funasr/datasets/audio_datasets/samplers.py |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/funasr/datasets/audio_datasets/samplers.py b/funasr/datasets/audio_datasets/samplers.py
index 94e9209..f8f744c 100644
--- a/funasr/datasets/audio_datasets/samplers.py
+++ b/funasr/datasets/audio_datasets/samplers.py
@@ -392,7 +392,7 @@
             )
             batch = []
             max_len_in_batch = 0
-            count = 0
+            count = 1
             for idx in buffer:
                 original_sample_length = self.dataset.get_source_len(idx)
                 if original_sample_length > self.max_token_length:
@@ -410,7 +410,7 @@
                     buffer_batches.append(batch)
                     batch = [idx]
                     max_len_in_batch = sample_length
-                    count = 0
+                    count = 1
             if batch:
                 buffer_batches.append(batch)
 
@@ -431,7 +431,7 @@
         self.batch_num = len(final_batches)
 
         logging.info(
-            f"rank: {self.rank}, dataloader start from step: {self.start_step}, batch_num: {rank_batches[self.rank]}, after: {self.batch_num}"
+            f"rank: {self.rank}, dataloader start from step: {self.start_step}, batch_num: {len(rank_batches[self.rank])}, after: {self.batch_num}"
         )
         return iter(final_batches)
 

--
Gitblit v1.9.1