From 0bd1a4d6a9893e45438505514a063d9deee91f21 Mon Sep 17 00:00:00 2001 From: Xingchen Song(宋星辰) <xingchensong1996@163.com> Date: 星期二, 11 六月 2024 14:00:10 +0800 Subject: [PATCH] [fix] better solution for handling empty result (#1796) --- funasr/datasets/large_datasets/datapipes/filter.py | 23 +++++++++++++++++++++++ 1 files changed, 23 insertions(+), 0 deletions(-) diff --git a/funasr/datasets/large_datasets/datapipes/filter.py b/funasr/datasets/large_datasets/datapipes/filter.py new file mode 100644 index 0000000..c4f045d --- /dev/null +++ b/funasr/datasets/large_datasets/datapipes/filter.py @@ -0,0 +1,23 @@ +from torch.utils.data import IterableDataset + + +def default_fn(data): + return data + + +class FilterIterDataPipe(IterableDataset): + + def __init__(self, datapipe, fn=default_fn): + self.datapipe = datapipe + self.fn = fn + + def set_epoch(self, epoch): + self.datapipe.set_epoch(epoch) + + def __iter__(self): + assert callable(self.fn) + for data in self.datapipe: + if self.fn(data): + yield data + else: + continue -- Gitblit v1.9.1