| | |
| | | import torch |
| | | import torch.distributed as dist |
| | | import torchaudio |
| | | import numpy as np |
| | | import soundfile |
| | | from kaldiio import ReadHelper |
| | | from torch.utils.data import IterableDataset |
| | | |
| | |
| | | sample_dict["key"] = key |
| | | elif data_type == "sound": |
| | | key, path = item.strip().split() |
| | | waveform, sampling_rate = torchaudio.load(path) |
| | | try: |
| | | waveform, sampling_rate = torchaudio.load(path) |
| | | except: |
| | | waveform, sampling_rate = soundfile.read(path) |
| | | waveform = np.expand_dims(waveform, axis=0) |
| | | waveform = torch.tensor(waveform) |
| | | if self.frontend_conf is not None: |
| | | if sampling_rate != self.frontend_conf["fs"]: |
| | | waveform = torchaudio.transforms.Resample(orig_freq=sampling_rate, |