| | |
| | | data_path_and_name_and_type, |
| | | dtype=dtype, |
| | | batch_size=batch_size, |
| | | fs=fs, |
| | | mc=True, |
| | | key_file=key_file, |
| | | num_workers=num_workers, |
| | | preprocess_fn=ASRTask.build_preprocess_fn(speech2text.asr_train_args, False), |
| | |
| | | return load_bytes(bytes) |
| | | |
| | | DATA_TYPES = { |
| | | "sound": lambda x: torchaudio.load(x)[0][0].numpy(), |
| | | "sound": lambda x: torchaudio.load(x)[0].numpy(), |
| | | "pcm": load_pcm, |
| | | "kaldi_ark": load_kaldi, |
| | | "bytes": load_bytes, |
| | |
| | | ] = None, |
| | | float_dtype: str = "float32", |
| | | fs: dict = None, |
| | | mc: bool = False, |
| | | int_dtype: str = "long", |
| | | key_file: str = None, |
| | | ): |
| | |
| | | self.int_dtype = int_dtype |
| | | self.key_file = key_file |
| | | self.fs = fs |
| | | self.mc = mc |
| | | |
| | | self.debug_info = {} |
| | | non_iterable_list = [] |
| | |
| | | array = torchaudio.transforms.Resample(orig_freq=audio_fs, |
| | | new_freq=model_fs)(array) |
| | | array = array.squeeze(0).numpy() |
| | | |
| | | data[name] = array |
| | | |
| | | if self.preprocess is not None: |
| | |
| | | model_fs = self.fs["model_fs"] |
| | | if audio_fs is not None and model_fs is not None: |
| | | array = torch.from_numpy(array) |
| | | array = array.unsqueeze(0) |
| | | array = torchaudio.transforms.Resample(orig_freq=audio_fs, |
| | | new_freq=model_fs)(array) |
| | | array = array.squeeze(0).numpy() |
| | | data[name] = array |
| | | if self.mc: |
| | | data[name] = array.transpose(0, 1).numpy() |
| | | else: |
| | | data[name] = array[0].numpy() |
| | | |
| | | if self.preprocess is not None: |
| | | data = self.preprocess(uid, data) |
| | |
| | | model_fs = self.fs["model_fs"] |
| | | if audio_fs is not None and model_fs is not None: |
| | | array = torch.from_numpy(array) |
| | | array = array.unsqueeze(0) |
| | | array = torchaudio.transforms.Resample(orig_freq=audio_fs, |
| | | new_freq=model_fs)(array) |
| | | array = array.squeeze(0).numpy() |
| | | data[name] = array |
| | | if _type == "sound": |
| | | if self.mc: |
| | | data[name] = array.transpose(0, 1).numpy() |
| | | else: |
| | | data[name] = array[0].numpy() |
| | | else: |
| | | data[name] = array |
| | | if self.non_iterable_dataset is not None: |
| | | # 2.b. Load data from non-iterable dataset |
| | | _, from_non_iterable = self.non_iterable_dataset[uid] |
| | |
| | | key_file: str = None, |
| | | batch_size: int = 1, |
| | | fs: dict = None, |
| | | mc: bool = False, |
| | | dtype: str = np.float32, |
| | | num_workers: int = 1, |
| | | allow_variable_data_keys: bool = False, |
| | |
| | | data_path_and_name_and_type, |
| | | float_dtype=dtype, |
| | | fs=fs, |
| | | mc=mc, |
| | | preprocess=preprocess_fn, |
| | | key_file=key_file, |
| | | ) |