python/FunASR-XL.git

			@@ -107,7 +107,7 @@
			return value[()]


			def sound_loader(path, float_dtype=None):
			def sound_loader(path, dest_sample_rate=16000, float_dtype=None):
			# The file is as follows:
			# utterance_id_A /some/where/a.wav
			# utterance_id_B /some/where/a.flac
			@@ -115,7 +115,7 @@
			# NOTE(kamo): SoundScpReader doesn't support pipe-fashion
			# like Kaldi e.g. "cat a.wav \|".
			# NOTE(kamo): The audio signal is normalized to [-1,1] range.
			loader = SoundScpReader(path, normalize=True, always_2d=False)
			loader = SoundScpReader(path, dest_sample_rate=16000, normalize=True, always_2d=False)

			# SoundScpReader.__getitem__() returns Tuple[int, ndarray],
			# but ndarray is desired, so Adapter class is inserted here
			@@ -139,7 +139,7 @@
			DATA_TYPES = {
			"sound": dict(
			func=sound_loader,
			kwargs=["float_dtype"],
			kwargs=["dest_sample_rate","float_dtype"],
			help="Audio format types which supported by sndfile wav, flac, etc."
			"\n\n"
			" utterance_id_a a.wav\n"
			@@ -282,6 +282,7 @@
			int_dtype: str = "long",
			max_cache_size: Union[float, int, str] = 0.0,
			max_cache_fd: int = 0,
			dest_sample_rate: int = 16000,
			):
			assert check_argument_types()
			if len(path_name_type_list) == 0:
			@@ -295,6 +296,7 @@
			self.float_dtype = float_dtype
			self.int_dtype = int_dtype
			self.max_cache_fd = max_cache_fd
			self.dest_sample_rate = dest_sample_rate

			self.loader_dict = {}
			self.debug_info = {}
			@@ -335,6 +337,8 @@
			for key2 in dic["kwargs"]:
			if key2 == "loader_type":
			kwargs["loader_type"] = loader_type
			elif key2 == "dest_sample_rate" and loader_type=="sound":
			kwargs["dest_sample_rate"] = self.dest_sample_rate
			elif key2 == "float_dtype":
			kwargs["float_dtype"] = self.float_dtype
			elif key2 == "int_dtype":

			@@ -4,6 +4,7 @@

			import numpy as np
			import soundfile
			import librosa
			from typeguard import check_argument_types

			from funasr.fileio.read_text import read_2column_text
			@@ -30,6 +31,7 @@
			dtype=np.int16,
			always_2d: bool = False,
			normalize: bool = False,
			dest_sample_rate: int = 16000,
			):
			assert check_argument_types()
			self.fname = fname
			@@ -37,15 +39,18 @@
			self.always_2d = always_2d
			self.normalize = normalize
			self.data = read_2column_text(fname)
			self.dest_sample_rate = dest_sample_rate

			def __getitem__(self, key):
			wav = self.data[key]
			if self.normalize:
			# soundfile.read normalizes data to [-1,1] if dtype is not given
			array, rate = soundfile.read(wav, always_2d=self.always_2d)
			array, rate = librosa.load(
			wav, sr=self.dest_sample_rate, mono=not self.always_2d
			)
			else:
			array, rate = soundfile.read(
			wav, dtype=self.dtype, always_2d=self.always_2d
			array, rate = librosa.load(
			wav, sr=self.dest_sample_rate, mono=not self.always_2d, dtype=self.dtype
			)

			return rate, array

			@@ -1576,6 +1576,7 @@
			preprocess=iter_options.preprocess_fn,
			max_cache_size=iter_options.max_cache_size,
			max_cache_fd=iter_options.max_cache_fd,
			dest_sample_rate=args.frontend_conf["fs"],
			)
			cls.check_task_requirements(
			dataset, args.allow_variable_data_keys, train=iter_options.train

	funasr/datasets/dataset.py	10 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/fileio/sound_scp.py	11 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/tasks/abs_task.py	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史