From 1596f6f414f6f41da66506debb1dff19fffeb3ec Mon Sep 17 00:00:00 2001
From: 游雁 <zhifu.gzf@alibaba-inc.com>
Date: 星期一, 24 六月 2024 11:55:17 +0800
Subject: [PATCH] fixbug hotwords

---
 funasr/datasets/large_datasets/datapipes/batch.py |   17 +++++++++--------
 1 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/funasr/datasets/large_datasets/datapipes/batch.py b/funasr/datasets/large_datasets/datapipes/batch.py
index c980ae3..aeeb451 100644
--- a/funasr/datasets/large_datasets/datapipes/batch.py
+++ b/funasr/datasets/large_datasets/datapipes/batch.py
@@ -19,13 +19,13 @@
 class MaxTokenBucketizerIterDataPipe(IterableDataset):
 
     def __init__(
-            self,
-            datapipe,
-            batch_size=8000,
-            len_fn=_default_len_fn,
-            buffer_size=10240,
-            sort_size=500,
-            batch_mode="padding",
+        self,
+        datapipe,
+        batch_size=8000,
+        len_fn=_default_len_fn,
+        buffer_size=10240,
+        sort_size=500,
+        batch_mode="padding",
     ):
         assert batch_size > 0, "Batch size is required to be larger than 0!"
         assert buffer_size >= -1, "Buffer size is required to be larger than -1!"
@@ -39,13 +39,14 @@
         self.batch_mode = batch_mode
 
     def set_epoch(self, epoch):
-        self.epoch = epoch
+        self.datapipe.set_epoch(epoch)
 
     def __iter__(self):
         buffer = []
         batch = []
         bucket = []
         max_lengths = 0
+        min_lengths = 999999
         batch_lengths = 0
 
         if self.batch_mode == "clipping":

--
Gitblit v1.9.1