| | |
| | | if data_path_and_name_and_type is None and raw_inputs is not None: |
| | | if isinstance(raw_inputs, torch.Tensor): |
| | | raw_inputs = raw_inputs.numpy() |
| | | data_path_and_name_and_type = [raw_inputs[0], "speech", "bytes"] |
| | | data_path_and_name_and_type = [raw_inputs[0], "speech", "sound"] |
| | | loader = EENDOLADiarTask.build_streaming_iterator( |
| | | data_path_and_name_and_type, |
| | | dtype=dtype, |
| | |
| | | n_layers: int, |
| | | n_units: int, |
| | | e_units: int = 2048, |
| | | h: int = 8, |
| | | h: int = 4, |
| | | dropout_rate: float = 0.1, |
| | | use_pos_emb: bool = False): |
| | | super(EENDOLATransformerEncoder, self).__init__() |
| | |
| | | "humanfriendly", |
| | | "scipy>=1.4.1", |
| | | # "filelock", |
| | | "librosa>=0.8.0", |
| | | "librosa==0.8.1", |
| | | "jamo==0.4.1", # For kss |
| | | "PyYAML>=5.1.2", |
| | | "soundfile>=0.10.2", |