From 54a91194901ad72562d5cb5856ee8c302d93fb0e Mon Sep 17 00:00:00 2001 From: 游雁 <zhifu.gzf@alibaba-inc.com> Date: 星期一, 27 十一月 2023 14:11:54 +0800 Subject: [PATCH] dataloader --- funasr/datasets/data_sampler.py | 6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/funasr/datasets/data_sampler.py b/funasr/datasets/data_sampler.py index 6b3407c..60c7c84 100644 --- a/funasr/datasets/data_sampler.py +++ b/funasr/datasets/data_sampler.py @@ -4,7 +4,7 @@ class BatchSampler(torch.utils.data.BatchSampler): - def __init__(self, dataset, batch_size_type: str="example", batch_size: int=14, sort_size: int=30, drop_last: bool=False, shuffle: bool=True, **kwargs): + def __init__(self, dataset, batch_size_type: str="example", batch_size: int=100, sort_size: int=30, drop_last: bool=False, shuffle: bool=True, **kwargs): self.drop_last = drop_last self.pre_idx = -1 @@ -46,8 +46,8 @@ idx_map = self.shuffle_idx[idx] # prompt = self.dataset.indexed_dataset[idx_map]["prompt"] - sample_len_cur = self.dataset.indexed_dataset[idx_map]["source_len"] + \ - self.dataset.indexed_dataset[idx_map]["target_len"] + sample_len_cur = self.dataset.indexed_dataset.get_source_len(self.dataset.indexed_dataset[idx_map]) + \ + self.dataset.indexed_dataset.get_target_len(self.dataset.indexed_dataset[idx_map]) datalen_with_index.append([idx, sample_len_cur]) -- Gitblit v1.9.1