| funasr/datasets/large_datasets/dataset.py | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
funasr/datasets/large_datasets/dataset.py
@@ -4,6 +4,7 @@ from functools import partial import torch import torchaudio import torch.distributed as dist from kaldiio import ReadHelper from torch.utils.data import IterableDataset @@ -117,7 +118,8 @@ sample_dict["key"] = key elif data_type == "sound": key, path = item.strip().split() mat, sampling_rate = soundfile.read(path) waveform, sampling_rate = torchaudio.load(path) mat = waveform[0] sample_dict[data_name] = mat sample_dict["sampling_rate"] = sampling_rate if data_name == "speech":