嘉渊
2023-06-17 a17c5cc3e7c21391da469bc8ac5f40de3e9056b4
funasr/datasets/small_datasets/dataset.py
@@ -9,9 +9,7 @@
from typing import Collection
from typing import Dict
from typing import Mapping
from typing import Optional
from typing import Tuple
from typing import Union
from typing import Union, List, Tuple
import kaldiio
import numpy as np
@@ -111,7 +109,7 @@
            float_dtype: str = "float32",
            int_dtype: str = "long",
            dest_sample_rate: int = 16000,
            speed_perturb: Optional[list, tuple] = None,
            speed_perturb: Union[list, tuple] = None,
            mode: str = "train",
    ):
        assert check_argument_types()
@@ -145,7 +143,7 @@
    def _build_loader(
            self, path: str, loader_type: str
    ) -> Mapping[str, Union[np.ndarray, torch.Tensor, str, numbers.Number]]:
    ) -> Mapping[str, Union[np.ndarray, torch.Tensor, str, List[int], numbers.Number]]:
        """Helper function to instantiate Loader.
        Args:
@@ -175,6 +173,19 @@
                        raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
                    text_loader[k] = v
            return text_loader
        elif loader_type == "text_int":
            text_int_loader = {}
            with open(path, "r", encoding="utf-8") as f:
                for linenum, line in enumerate(f, 1):
                    sps = line.rstrip().split(maxsplit=1)
                    if len(sps) == 1:
                        k, v = sps[0], ""
                    else:
                        k, v = sps
                    if k in text_int_loader:
                        raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
                    text_int_loader[k] = [int(i) for i in v.split()]
            return text_int_loader
        else:
            raise RuntimeError(f"Not supported: loader_type={loader_type}")