| | |
| | | buffer_size: int = 30, |
| | | drop_last: bool = False, |
| | | shuffle: bool = True, |
| | | is_training: bool = True, |
| | | **kwargs): |
| | | |
| | | self.drop_last = drop_last |
| | |
| | | self.dataset = dataset |
| | | self.total_samples = len(dataset) |
| | | self.batch_type = batch_type |
| | | self.batch_size = batch_size |
| | | self.batch_size = int(batch_size) |
| | | self.buffer_size = buffer_size |
| | | self.max_token_length = kwargs.get("max_token_length", 5000) |
| | | self.shuffle_idx = np.arange(self.total_samples) |
| | | self.shuffle = shuffle |
| | | self.shuffle = shuffle and is_training |
| | | |
| | | def __len__(self): |
| | | return self.total_samples |