From e24dbdc496debec225414d4d2c760f5775e64f2a Mon Sep 17 00:00:00 2001
From: 天地 <tiandiweizun@gmail.com>
Date: 星期三, 26 三月 2025 13:44:41 +0800
Subject: [PATCH] 感觉应该从文件读取更合适,因为上面判断了文件存在,且可以读取,如果本身是文本的话,下面也会有逻辑进行处理 (#2452)
---
funasr/datasets/audio_datasets/espnet_samplers.py | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/funasr/datasets/audio_datasets/espnet_samplers.py b/funasr/datasets/audio_datasets/espnet_samplers.py
index b358fa3..004201e 100644
--- a/funasr/datasets/audio_datasets/espnet_samplers.py
+++ b/funasr/datasets/audio_datasets/espnet_samplers.py
@@ -147,7 +147,9 @@
start_idx = self.rank * batches_per_rank
end_idx = start_idx + batches_per_rank
rank_batches = buffer_batches[start_idx + self.start_step : end_idx]
+
self.batch_num = len(rank_batches)
+
logging.info(
f"rank: {self.rank}, dataloader start from step: {self.start_step}, batch_num: {end_idx-start_idx}, batch_num_after_step: {len(rank_batches)}"
)
--
Gitblit v1.9.1