| | |
| | | float_dtype: str = "float32", |
| | | int_dtype: str = "long", |
| | | dest_sample_rate: int = 16000, |
| | | speed_perturb: tuple = None, |
| | | ): |
| | | assert check_argument_types() |
| | | if len(path_name_type_list) == 0: |
| | |
| | | self.float_dtype = float_dtype |
| | | self.int_dtype = int_dtype |
| | | self.dest_sample_rate = dest_sample_rate |
| | | self.speed_perturb = speed_perturb |
| | | |
| | | self.loader_dict = {} |
| | | self.debug_info = {} |
| | |
| | | loader_type: loader_type. sound, npy, text, etc |
| | | """ |
| | | if loader_type == "sound": |
| | | loader = SoundScpReader(path, self.dest_sample_rate, normalize=True, always_2d=False) |
| | | loader = SoundScpReader(path, self.dest_sample_rate, normalize=True, always_2d=False, speed_perturb=self.speed_perturb) |
| | | return AdapterForSoundScpReader(loader, self.float_dtype) |
| | | elif loader_type == "kaldi_ark": |
| | | loader = kaldiio.load_scp(path) |
| | |
| | | from pathlib import Path |
| | | from typing import Union |
| | | |
| | | import random |
| | | import numpy as np |
| | | import soundfile |
| | | import librosa |
| | | from typeguard import check_argument_types |
| | | |
| | | import torchaudio |
| | | |
| | | from funasr.fileio.read_text import read_2column_text |
| | | |
| | |
| | | always_2d: bool = False, |
| | | normalize: bool = False, |
| | | dest_sample_rate: int = 16000, |
| | | speed_perturb: tuple = None, |
| | | ): |
| | | assert check_argument_types() |
| | | self.fname = fname |
| | |
| | | self.normalize = normalize |
| | | self.data = read_2column_text(fname) |
| | | self.dest_sample_rate = dest_sample_rate |
| | | self.speed_perturb = speed_perturb |
| | | |
| | | def __getitem__(self, key): |
| | | wav = self.data[key] |
| | |
| | | wav, sr=self.dest_sample_rate, mono=not self.always_2d, dtype=self.dtype |
| | | ) |
| | | |
| | | if self.speed_perturb is not None: |
| | | speed = random.choice(self.speed_perturb) |
| | | if speed != 1.0: |
| | | array, _ = torchaudio.sox_effects.apply_effects_tensor( |
| | | array, rate, |
| | | [['speed', str(speed)], ['rate', str(rate)]]) |
| | | |
| | | return rate, array |
| | | |
| | | def get_path(self, key): |