| | |
| | | from funasr.utils.types import str2triple_str |
| | | from funasr.utils.types import str_or_none |
| | | |
| | | from modelscope.utils.logger import get_logger |
| | | logger = get_logger() |
| | | |
| | | class Speech2Diarization: |
| | | """Speech2Diarlization class |
| | | |
| | |
| | | if isinstance(raw_inputs, torch.Tensor): |
| | | raw_inputs = raw_inputs.numpy() |
| | | data_path_and_name_and_type = [raw_inputs[0], "speech", "bytes"] |
| | | logger.info(data_path_and_name_and_type) |
| | | loader = EENDOLADiarTask.build_streaming_iterator( |
| | | data_path_and_name_and_type, |
| | | dtype=dtype, |
| | |
| | | shuffle: bool = True, |
| | | threshold: float = 0.5, |
| | | **kwargs): |
| | | if self.frontend is not None: |
| | | speech = self.frontend(speech) |
| | | speech = [s[:s_len] for s, s_len in zip(speech, speech_lengths)] |
| | | emb = self.forward_encoder(speech, speech_lengths) |
| | | if shuffle: |