speech_asr
2023-03-15 2cfe010d7b0f17877a271cc401e2c2f8f8d4c42c
funasr/bin/eend_ola_inference.py
@@ -27,6 +27,8 @@
from funasr.utils.types import str2triple_str
from funasr.utils.types import str_or_none
from modelscope.utils.logger import get_logger
logger = get_logger()
class Speech2Diarization:
    """Speech2Diarlization class
@@ -121,7 +123,7 @@
                Currently, the tags of espnet_model_zoo are supported.
        Returns:
            Speech2Xvector: Speech2Xvector instance.
            Speech2Diarization: Speech2Diarization instance.
        """
        if model_tag is not None:
@@ -179,7 +181,6 @@
        diar_model_file=diar_model_file,
        device=device,
        dtype=dtype,
        streaming=streaming,
    )
    logging.info("speech2diarization_kwargs: {}".format(speech2diar_kwargs))
    speech2diar = Speech2Diarization.from_pretrained(
@@ -210,6 +211,7 @@
            if isinstance(raw_inputs, torch.Tensor):
                raw_inputs = raw_inputs.numpy()
            data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
        logger.info(data_path_and_name_and_type)
        loader = EENDOLADiarTask.build_streaming_iterator(
            data_path_and_name_and_type,
            dtype=dtype,
@@ -229,6 +231,8 @@
            output_writer = open("{}/result.txt".format(output_path), "w")
        result_list = []
        for keys, batch in loader:
            logger.info("keys: {}".format(keys))
            logger.info("batch: {}".format(batch))
            assert isinstance(batch, dict), type(batch)
            assert all(isinstance(s, str) for s in keys), keys
            _bs = len(next(iter(batch.values())))