| | |
| | | import os |
| | | import random |
| | | import soundfile |
| | | import numpy |
| | | from functools import partial |
| | | |
| | | import torch |
| | | import torchaudio |
| | | import torch.distributed as dist |
| | | from kaldiio import ReadHelper |
| | | from torch.utils.data import IterableDataset |
| | |
| | | sample_dict["key"] = key |
| | | elif data_type == "sound": |
| | | key, path = item.strip().split() |
| | | mat, sampling_rate = soundfile.read(path) |
| | | waveform, sampling_rate = torchaudio.load(path) |
| | | waveform = waveform.numpy() |
| | | mat = waveform[0] |
| | | sample_dict[data_name] = mat |
| | | sample_dict["sampling_rate"] = sampling_rate |
| | | if data_name == "speech": |
| | |
| | | filter_fn = partial(filter, **filter_conf) |
| | | dataset = FilterIterDataPipe(dataset, fn=filter_fn) |
| | | |
| | | vocab = {'vocab': dict, 'seg_dict': seg_dict} |
| | | tokenize_fn = partial(tokenize, **vocab) |
| | | dataset = MapperIterDataPipe(dataset, fn=tokenize_fn) |
| | | if "text" in data_names: |
| | | vocab = {'vocab': dict, 'seg_dict': seg_dict} |
| | | tokenize_fn = partial(tokenize, **vocab) |
| | | dataset = MapperIterDataPipe(dataset, fn=tokenize_fn) |
| | | |
| | | if shuffle: |
| | | buffer_conf = conf.get('shuffle_conf', {}) |