From 4d907718f39e2b0f7a0c714c2e3de289e742fc61 Mon Sep 17 00:00:00 2001
From: Carl <415692979@qq.com>
Date: 星期四, 28 三月 2024 13:42:00 +0800
Subject: [PATCH] 修正commit 87b62d68957a2194b017a43b6c2a15424a05a984 引入的英文整句标点预测导致末尾两个单词中间的空格被删除的问题。 (#1556)

---
 funasr/datasets/large_datasets/datapipes/filter.py |   24 ++++++++++++++++++++++++
 1 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/funasr/datasets/large_datasets/datapipes/filter.py b/funasr/datasets/large_datasets/datapipes/filter.py
new file mode 100644
index 0000000..6fe7153
--- /dev/null
+++ b/funasr/datasets/large_datasets/datapipes/filter.py
@@ -0,0 +1,24 @@
+from torch.utils.data import IterableDataset
+
+def default_fn(data):
+    return data
+
+
+class FilterIterDataPipe(IterableDataset):
+
+    def __init__(self,
+                 datapipe,
+                 fn=default_fn):
+        self.datapipe = datapipe
+        self.fn = fn
+
+    def set_epoch(self, epoch):
+        self.datapipe.set_epoch(epoch)
+
+    def __iter__(self):
+        assert callable(self.fn)
+        for data in self.datapipe:
+            if self.fn(data):
+                yield data
+            else:
+                continue

--
Gitblit v1.9.1