| | |
| | | from typing import Collection |
| | | from typing import Dict |
| | | from typing import Mapping |
| | | from typing import Optional |
| | | from typing import Tuple |
| | | from typing import Union |
| | | from typing import Union, List, Tuple |
| | | |
| | | import kaldiio |
| | | import numpy as np |
| | | import torch |
| | | from torch.utils.data.dataset import Dataset |
| | | from typeguard import check_argument_types |
| | | from typeguard import check_return_type |
| | | |
| | | from funasr.fileio.npy_scp import NpyScpReader |
| | | from funasr.fileio.sound_scp import SoundScpReader |
| | |
| | | |
| | | class AdapterForSoundScpReader(collections.abc.Mapping): |
| | | def __init__(self, loader, dtype=None): |
| | | assert check_argument_types() |
| | | self.loader = loader |
| | | self.dtype = dtype |
| | | self.rate = None |
| | |
| | | speed_perturb: Union[list, tuple] = None, |
| | | mode: str = "train", |
| | | ): |
| | | assert check_argument_types() |
| | | if len(path_name_type_list) == 0: |
| | | raise ValueError( |
| | | '1 or more elements are required for "path_name_type_list"' |
| | |
| | | |
| | | def _build_loader( |
| | | self, path: str, loader_type: str |
| | | ) -> Mapping[str, Union[np.ndarray, torch.Tensor, str, numbers.Number]]: |
| | | ) -> Mapping[str, Union[np.ndarray, torch.Tensor, str, List[int], numbers.Number]]: |
| | | """Helper function to instantiate Loader. |
| | | |
| | | Args: |
| | |
| | | raise RuntimeError(f"{k} is duplicated ({path}:{linenum})") |
| | | text_loader[k] = v |
| | | return text_loader |
| | | elif loader_type == "text_int": |
| | | text_int_loader = {} |
| | | with open(path, "r", encoding="utf-8") as f: |
| | | for linenum, line in enumerate(f, 1): |
| | | sps = line.rstrip().split(maxsplit=1) |
| | | if len(sps) == 1: |
| | | k, v = sps[0], "" |
| | | else: |
| | | k, v = sps |
| | | if k in text_int_loader: |
| | | raise RuntimeError(f"{k} is duplicated ({path}:{linenum})") |
| | | text_int_loader[k] = [int(i) for i in v.split()] |
| | | return text_int_loader |
| | | else: |
| | | raise RuntimeError(f"Not supported: loader_type={loader_type}") |
| | | |
| | |
| | | return _mes |
| | | |
| | | def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]: |
| | | assert check_argument_types() |
| | | |
| | | # Change integer-id to string-id |
| | | if isinstance(uid, int): |
| | |
| | | data[name] = value |
| | | |
| | | retval = uid, data |
| | | assert check_return_type(retval) |
| | | return retval |