speech_asr
2023-03-15 fbec0f003d4de9e4b6ccb6bb58d2d4926a0ff332
funasr/bin/eend_ola_inference.py
@@ -27,7 +27,6 @@
from funasr.utils.types import str2triple_str
from funasr.utils.types import str_or_none
class Speech2Diarization:
    """Speech2Diarlization class
@@ -121,7 +120,7 @@
                Currently, the tags of espnet_model_zoo are supported.
        Returns:
            Speech2Xvector: Speech2Xvector instance.
            Speech2Diarization: Speech2Diarization instance.
        """
        if model_tag is not None:
@@ -146,7 +145,7 @@
        output_dir: Optional[str] = None,
        batch_size: int = 1,
        dtype: str = "float32",
        ngpu: int = 0,
        ngpu: int = 1,
        num_workers: int = 0,
        log_level: Union[int, str] = "INFO",
        key_file: Optional[str] = None,
@@ -179,7 +178,6 @@
        diar_model_file=diar_model_file,
        device=device,
        dtype=dtype,
        streaming=streaming,
    )
    logging.info("speech2diarization_kwargs: {}".format(speech2diar_kwargs))
    speech2diar = Speech2Diarization.from_pretrained(
@@ -209,7 +207,7 @@
        if data_path_and_name_and_type is None and raw_inputs is not None:
            if isinstance(raw_inputs, torch.Tensor):
                raw_inputs = raw_inputs.numpy()
            data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
            data_path_and_name_and_type = [raw_inputs[0], "speech", "bytes"]
        loader = EENDOLADiarTask.build_streaming_iterator(
            data_path_and_name_and_type,
            dtype=dtype,