游雁
2024-05-22 9a1b3b88ab9c374c400670fb2b4689b30bde7d7c
funasr/datasets/large_datasets/datapipes/batch.py
@@ -19,13 +19,13 @@
class MaxTokenBucketizerIterDataPipe(IterableDataset):
    def __init__(
            self,
            datapipe,
            batch_size=8000,
            len_fn=_default_len_fn,
            buffer_size=10240,
            sort_size=500,
            batch_mode="padding",
        self,
        datapipe,
        batch_size=8000,
        len_fn=_default_len_fn,
        buffer_size=10240,
        sort_size=500,
        batch_mode="padding",
    ):
        assert batch_size > 0, "Batch size is required to be larger than 0!"
        assert buffer_size >= -1, "Buffer size is required to be larger than -1!"