kongdeqiang
2026-03-13 28ccfbfc51068a663a80764e14074df5edf2b5ba
funasr/datasets/audio_datasets/espnet_samplers.py
@@ -71,7 +71,8 @@
        self.max_token_length = kwargs.get("max_token_length", 2048)
        self.min_token_length = kwargs.get("min_token_length", 0)
        self.length_scale_source = kwargs.get("length_scale_source", 1.0)
        self.start_step = 0
        self.start_step = start_step
        self.batch_num = 1
        if self.start_step > 0:
            logging.info(f"Warning, start_step > 0, dataloader start from step: {self.start_step}")
        # super().__init__(dataset, num_replicas=num_replicas, rank=rank,
@@ -147,12 +148,17 @@
        end_idx = start_idx + batches_per_rank
        rank_batches = buffer_batches[start_idx + self.start_step : end_idx]
        self.batch_num = len(rank_batches)
        logging.info(
            f"rank: {self.rank}, dataloader start from step: {self.start_step}, batch_num: {end_idx-start_idx}, batch_num_after_step: {len(rank_batches)}"
        )
        # Return an iterator over the batches for the current rank
        return iter(rank_batches)
    def __len__(self):
        # Calculate the number of batches per epoch for the current rank
        return 1
        return self.batch_num
    def set_epoch(self, epoch):
        # Set the epoch for shuffling