| | |
| | | if self.normalize: |
| | | # soundfile.read normalizes data to [-1,1] if dtype is not given |
| | | array, rate = librosa.load( |
| | | wav, sr=self.dest_sample_rate, mono=not self.always_2d |
| | | wav, sr=self.dest_sample_rate, mono=self.always_2d |
| | | ) |
| | | else: |
| | | array, rate = librosa.load( |
| | | wav, sr=self.dest_sample_rate, mono=not self.always_2d, dtype=self.dtype |
| | | wav, sr=self.dest_sample_rate, mono=self.always_2d, dtype=self.dtype |
| | | ) |
| | | |
| | | if self.speed_perturb is not None: |
| | |
| | | array, _ = torchaudio.sox_effects.apply_effects_tensor( |
| | | torch.tensor(array).view(1, -1), rate, |
| | | [['speed', str(speed)], ['rate', str(rate)]]) |
| | | array = array.view(-1).numpy() |
| | | array = array.view(-1).numpy() |
| | | |
| | | if array.ndim==2: |
| | | array=array.transpose((1, 0)) |
| | | |
| | | return rate, array |
| | | |