From 7c11a0d6a84b0f903a947f6a0e5a2730fb528c87 Mon Sep 17 00:00:00 2001
From: 雾聪 <wucong.lyb@alibaba-inc.com>
Date: 星期一, 27 五月 2024 19:15:58 +0800
Subject: [PATCH] adapt to new model struct
---
funasr/datasets/large_datasets/datapipes/batch.py | 17 +++++++++--------
1 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/funasr/datasets/large_datasets/datapipes/batch.py b/funasr/datasets/large_datasets/datapipes/batch.py
index c980ae3..aeeb451 100644
--- a/funasr/datasets/large_datasets/datapipes/batch.py
+++ b/funasr/datasets/large_datasets/datapipes/batch.py
@@ -19,13 +19,13 @@
class MaxTokenBucketizerIterDataPipe(IterableDataset):
def __init__(
- self,
- datapipe,
- batch_size=8000,
- len_fn=_default_len_fn,
- buffer_size=10240,
- sort_size=500,
- batch_mode="padding",
+ self,
+ datapipe,
+ batch_size=8000,
+ len_fn=_default_len_fn,
+ buffer_size=10240,
+ sort_size=500,
+ batch_mode="padding",
):
assert batch_size > 0, "Batch size is required to be larger than 0!"
assert buffer_size >= -1, "Buffer size is required to be larger than -1!"
@@ -39,13 +39,14 @@
self.batch_mode = batch_mode
def set_epoch(self, epoch):
- self.epoch = epoch
+ self.datapipe.set_epoch(epoch)
def __iter__(self):
buffer = []
batch = []
bucket = []
max_lengths = 0
+ min_lengths = 999999
batch_lengths = 0
if self.batch_mode == "clipping":
--
Gitblit v1.9.1