From 47343b5c2f4e1256f60f46d8da0aa2e5de39b6c7 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期六, 05 八月 2023 17:53:08 +0800
Subject: [PATCH] init repo
---
funasr/build_utils/build_streaming_iterator.py | 18 +++++++++++++-----
1 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/funasr/build_utils/build_streaming_iterator.py b/funasr/build_utils/build_streaming_iterator.py
index 57cf8cf..02fc263 100644
--- a/funasr/build_utils/build_streaming_iterator.py
+++ b/funasr/build_utils/build_streaming_iterator.py
@@ -1,6 +1,5 @@
import numpy as np
from torch.utils.data import DataLoader
-from typeguard import check_argument_types
from funasr.datasets.iterable_dataset import IterableESPnetDataset
from funasr.datasets.small_datasets.collate_fn import CommonCollateFn
@@ -17,17 +16,26 @@
mc: bool = False,
dtype: str = np.float32,
num_workers: int = 1,
+ use_collate_fn: bool = True,
+ preprocess_fn=None,
ngpu: int = 0,
- train: bool=False,
+ train: bool = False,
) -> DataLoader:
"""Build DataLoader using iterable dataset"""
- assert check_argument_types()
# preprocess
- preprocess_fn = build_preprocess(preprocess_args, train)
+ if preprocess_fn is not None:
+ preprocess_fn = preprocess_fn
+ elif preprocess_args is not None:
+ preprocess_args.task_name = task_name
+ preprocess_fn = build_preprocess(preprocess_args, train)
+ else:
+ preprocess_fn = None
# collate
- if task_name in ["punc", "lm"]:
+ if not use_collate_fn:
+ collate_fn = None
+ elif task_name in ["punc", "lm"]:
collate_fn = CommonCollateFn(int_pad_value=0)
else:
collate_fn = CommonCollateFn(float_pad_value=0.0, int_pad_value=-1)
--
Gitblit v1.9.1