游雁
2023-11-16 4ace5a95b052d338947fc88809a440ccd55cf6b4
funasr/datasets/small_datasets/dataset.py
@@ -15,8 +15,6 @@
import numpy as np
import torch
from torch.utils.data.dataset import Dataset
from typeguard import check_argument_types
from typeguard import check_return_type
from funasr.fileio.npy_scp import NpyScpReader
from funasr.fileio.sound_scp import SoundScpReader
@@ -24,7 +22,6 @@
class AdapterForSoundScpReader(collections.abc.Mapping):
    def __init__(self, loader, dtype=None):
        assert check_argument_types()
        self.loader = loader
        self.dtype = dtype
        self.rate = None
@@ -112,7 +109,6 @@
            speed_perturb: Union[list, tuple] = None,
            mode: str = "train",
    ):
        assert check_argument_types()
        if len(path_name_type_list) == 0:
            raise ValueError(
                '1 or more elements are required for "path_name_type_list"'
@@ -173,8 +169,8 @@
                        raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
                    text_loader[k] = v
            return text_loader
        elif loader_type == "text_in":
            text_in_loader = {}
        elif loader_type == "text_int":
            text_int_loader = {}
            with open(path, "r", encoding="utf-8") as f:
                for linenum, line in enumerate(f, 1):
                    sps = line.rstrip().split(maxsplit=1)
@@ -182,10 +178,10 @@
                        k, v = sps[0], ""
                    else:
                        k, v = sps
                    if k in text_in_loader:
                    if k in text_int_loader:
                        raise RuntimeError(f"{k} is duplicated ({path}:{linenum})")
                    text_in_loader[k] = [int(i) for i in v.split()]
            return text_in_loader
                    text_int_loader[k] = [int(i) for i in v.split()]
            return text_int_loader
        else:
            raise RuntimeError(f"Not supported: loader_type={loader_type}")
@@ -207,7 +203,6 @@
        return _mes
    def __getitem__(self, uid: Union[str, int]) -> Tuple[str, Dict[str, np.ndarray]]:
        assert check_argument_types()
        # Change integer-id to string-id
        if isinstance(uid, int):
@@ -265,5 +260,4 @@
            data[name] = value
        retval = uid, data
        assert check_return_type(retval)
        return retval