游雁
2023-10-10 580b11b57ac4b62f7e2acda73813a4e10e8e4cd3
funasr/bin/asr_inference_launch.py
@@ -55,6 +55,7 @@
                                        distribute_spk)
from funasr.build_utils.build_model_from_file import build_model_from_file
from funasr.utils.cluster_backend import ClusterBackend
from funasr.utils.modelscope_utils import get_cache_dir
from tqdm import tqdm
def inference_asr(
@@ -498,6 +499,7 @@
):
    ncpu = kwargs.get("ncpu", 1)
    torch.set_num_threads(ncpu)
    language = kwargs.get("model_lang", None)
    if word_lm_train_config is not None:
        raise NotImplementedError("Word LM is not implemented")
@@ -704,10 +706,13 @@
            text, token, token_int = result[0], result[1], result[2]
            time_stamp = result[4] if len(result[4]) > 0 else None
            if use_timestamp and time_stamp is not None and len(time_stamp):
                postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
            if language == "en-bpe":
                postprocessed_result = postprocess_utils.sentence_postprocess_sentencepiece(token)
            else:
                postprocessed_result = postprocess_utils.sentence_postprocess(token)
                if use_timestamp and time_stamp is not None and len(time_stamp):
                    postprocessed_result = postprocess_utils.sentence_postprocess(token, time_stamp)
                else:
                    postprocessed_result = postprocess_utils.sentence_postprocess(token)
            text_postprocessed = ""
            time_stamp_postprocessed = ""
            text_postprocessed_punc = postprocessed_result
@@ -787,7 +792,7 @@
        time_stamp_writer: bool = True,
        punc_infer_config: Optional[str] = None,
        punc_model_file: Optional[str] = None,
        sv_model_file: Optional[str] = "~/.cache/modelscope/hub/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/campplus_cn_common.bin",
        sv_model_file: Optional[str] = None,
        streaming: bool = False,
        embedding_node: str = "resnet1_dense",
        sv_threshold: float = 0.9465,
@@ -808,6 +813,9 @@
        level=log_level,
        format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
    )
    if sv_model_file is None:
        sv_model_file = "{}/damo/speech_paraformer-large-vad-punc-spk_asr_nat-zh-cn/campplus_cn_common.bin".format(get_cache_dir(None))
    if param_dict is not None:
        hotword_list_or_file = param_dict.get('hotword')
@@ -933,7 +941,7 @@
            #####  speaker_verification  #####
            ##################################
            # load sv model
            sv_model_dict = torch.load(sv_model_file.replace("~", os.environ['HOME']), map_location=torch.device('cpu'))
            sv_model_dict = torch.load(sv_model_file, map_location=torch.device('cpu'))
            sv_model = CAMPPlus()
            sv_model.load_state_dict(sv_model_dict)
            sv_model.eval()