python/FunASR-XL.git

			@@ -1,7 +1,6 @@
			import collections.abc
			from pathlib import Path
			from typing import Union
			from typing import Optional

			import random
			import numpy as np
			@@ -9,6 +8,7 @@
			import librosa
			from typeguard import check_argument_types

			import torch
			import torchaudio

			from funasr.fileio.read_text import read_2column_text
			@@ -36,7 +36,7 @@
			always_2d: bool = False,
			normalize: bool = False,
			dest_sample_rate: int = 16000,
			speed_perturb: Optional[list, tuple] = None,
			speed_perturb: Union[list, tuple] = None,
			):
			assert check_argument_types()
			self.fname = fname
			@@ -52,19 +52,23 @@
			if self.normalize:
			# soundfile.read normalizes data to [-1,1] if dtype is not given
			array, rate = librosa.load(
			wav, sr=self.dest_sample_rate, mono=not self.always_2d
			wav, sr=self.dest_sample_rate, mono=self.always_2d
			)
			else:
			array, rate = librosa.load(
			wav, sr=self.dest_sample_rate, mono=not self.always_2d, dtype=self.dtype
			wav, sr=self.dest_sample_rate, mono=self.always_2d, dtype=self.dtype
			)

			if self.speed_perturb is not None:
			speed = random.choice(self.speed_perturb)
			if speed != 1.0:
			array, _ = torchaudio.sox_effects.apply_effects_tensor(
			array, rate,
			torch.tensor(array).view(1, -1), rate,
			[['speed', str(speed)], ['rate', str(rate)]])
			array = array.view(-1).numpy()

			if array.ndim==2:
			array=array.transpose((1, 0))

			return rate, array