speech_asr
2023-02-09 2c167ecef3421f1f5e876fd7c158f57faa1abceb
Merge branch 'dev' of https://github.com/alibaba-damo-academy/FunASR into dev
8个文件已修改
13个文件已添加
453 ■■■■■ 已修改文件
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-offline/finetune.py 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-offline/infer.py 13 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online/finetune.py 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online/infer.py 13 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline/finetune.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline/infer.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline/finetune.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline/infer.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/finetune.py 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/infer.py 13 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online/finetune.py 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online/infer.py 13 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-offline/finetune.py 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-offline/infer.py 13 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online/finetune.py 35 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online/infer.py 13 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline/finetune.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline/infer.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline/finetune.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline/infer.py 2 ●●● 补丁 | 查看 | 原始文档 | blame | 历史
funasr/bin/vad_inference_launch.py 149 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-offline/finetune.py
New file
@@ -0,0 +1,35 @@
import os
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from funasr.datasets.ms_dataset import MsDataset
def modelscope_finetune(params):
    if not os.path.exists(params["output_dir"]):
        os.makedirs(params["output_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["data_dir"])
    kwargs = dict(
        model=params["model"],
        model_revision=params["model_revision"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["output_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()
if __name__ == '__main__':
    params = {}
    params["output_dir"] = "./checkpoint"
    params["data_dir"] = "./data"
    params["batch_bins"] = 2000
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-offline"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-offline/infer.py
New file
@@ -0,0 +1,13 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
if __name__ == "__main__":
    audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_de.wav"
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-offline",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
    print(rec_result)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online/finetune.py
New file
@@ -0,0 +1,35 @@
import os
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from funasr.datasets.ms_dataset import MsDataset
def modelscope_finetune(params):
    if not os.path.exists(params["output_dir"]):
        os.makedirs(params["output_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["data_dir"])
    kwargs = dict(
        model=params["model"],
        model_revision=params["model_revision"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["output_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()
if __name__ == '__main__':
    params = {}
    params["output_dir"] = "./checkpoint"
    params["data_dir"] = "./data"
    params["batch_bins"] = 2000
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online/infer.py
New file
@@ -0,0 +1,13 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
if __name__ == "__main__":
    audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_de.wav"
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-de-16k-common-vocab3690-tensorflow1-online",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
    print(rec_result)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline/finetune.py
@@ -30,6 +30,6 @@
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-online"
    params["model"] = "damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline/infer.py
@@ -6,7 +6,7 @@
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-online",
        model="damo/speech_UniASR_asr_2pass-en-16k-common-vocab1080-tensorflow1-offline",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline/finetune.py
@@ -30,6 +30,6 @@
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-online"
    params["model"] = "damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline/infer.py
@@ -6,7 +6,7 @@
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-online",
        model="damo/speech_UniASR_asr_2pass-es-16k-common-vocab3445-tensorflow1-offline",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/finetune.py
New file
@@ -0,0 +1,35 @@
import os
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from funasr.datasets.ms_dataset import MsDataset
def modelscope_finetune(params):
    if not os.path.exists(params["output_dir"]):
        os.makedirs(params["output_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["data_dir"])
    kwargs = dict(
        model=params["model"],
        model_revision=params["model_revision"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["output_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()
if __name__ == '__main__':
    params = {}
    params["output_dir"] = "./checkpoint"
    params["data_dir"] = "./data"
    params["batch_bins"] = 2000
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline/infer.py
New file
@@ -0,0 +1,13 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
if __name__ == "__main__":
    audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fa.wav"
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-offline",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
    print(rec_result)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online/finetune.py
New file
@@ -0,0 +1,35 @@
import os
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from funasr.datasets.ms_dataset import MsDataset
def modelscope_finetune(params):
    if not os.path.exists(params["output_dir"]):
        os.makedirs(params["output_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["data_dir"])
    kwargs = dict(
        model=params["model"],
        model_revision=params["model_revision"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["output_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()
if __name__ == '__main__':
    params = {}
    params["output_dir"] = "./checkpoint"
    params["data_dir"] = "./data"
    params["batch_bins"] = 2000
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online/infer.py
New file
@@ -0,0 +1,13 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
if __name__ == "__main__":
    audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fa.wav"
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-fa-16k-common-vocab1257-pytorch-online",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
    print(rec_result)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-offline/finetune.py
New file
@@ -0,0 +1,35 @@
import os
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from funasr.datasets.ms_dataset import MsDataset
def modelscope_finetune(params):
    if not os.path.exists(params["output_dir"]):
        os.makedirs(params["output_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["data_dir"])
    kwargs = dict(
        model=params["model"],
        model_revision=params["model_revision"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["output_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()
if __name__ == '__main__':
    params = {}
    params["output_dir"] = "./checkpoint"
    params["data_dir"] = "./data"
    params["batch_bins"] = 2000
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-offline"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-offline/infer.py
New file
@@ -0,0 +1,13 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
if __name__ == "__main__":
    audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fr.wav"
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-offline",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
    print(rec_result)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online/finetune.py
New file
@@ -0,0 +1,35 @@
import os
from modelscope.metainfo import Trainers
from modelscope.trainers import build_trainer
from funasr.datasets.ms_dataset import MsDataset
def modelscope_finetune(params):
    if not os.path.exists(params["output_dir"]):
        os.makedirs(params["output_dir"], exist_ok=True)
    # dataset split ["train", "validation"]
    ds_dict = MsDataset.load(params["data_dir"])
    kwargs = dict(
        model=params["model"],
        model_revision=params["model_revision"],
        data_dir=ds_dict,
        dataset_type=params["dataset_type"],
        work_dir=params["output_dir"],
        batch_bins=params["batch_bins"],
        max_epoch=params["max_epoch"],
        lr=params["lr"])
    trainer = build_trainer(Trainers.speech_asr_trainer, default_args=kwargs)
    trainer.train()
if __name__ == '__main__':
    params = {}
    params["output_dir"] = "./checkpoint"
    params["data_dir"] = "./data"
    params["batch_bins"] = 2000
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online/infer.py
New file
@@ -0,0 +1,13 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
if __name__ == "__main__":
    audio_in = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_fr.wav"
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-fr-16k-common-vocab3472-tensorflow1-online",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
    print(rec_result)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline/finetune.py
@@ -30,6 +30,6 @@
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-online"
    params["model"] = "damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline/infer.py
@@ -6,7 +6,7 @@
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-online",
        model="damo/speech_UniASR_asr_2pass-ko-16k-common-vocab6400-tensorflow1-offline",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline/finetune.py
@@ -30,6 +30,6 @@
    params["dataset_type"] = "small"
    params["max_epoch"] = 50
    params["lr"] = 0.00005
    params["model"] = "damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-online"
    params["model"] = "damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline"
    params["model_revision"] = None
    modelscope_finetune(params)
egs_modelscope/asr/uniasr/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline/infer.py
@@ -6,7 +6,7 @@
    output_dir = "./results"
    inference_pipline = pipeline(
        task=Tasks.auto_speech_recognition,
        model="damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-online",
        model="damo/speech_UniASR_asr_2pass-ru-16k-common-vocab1664-tensorflow1-offline",
        output_dir=output_dir,
    )
    rec_result = inference_pipline(audio_in=audio_in)
funasr/bin/vad_inference_launch.py
New file
@@ -0,0 +1,149 @@
#!/usr/bin/env python3
# Copyright ESPnet (https://github.com/espnet/espnet). All Rights Reserved.
#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
import argparse
import logging
import os
import sys
from typing import Union, Dict, Any
from funasr.utils import config_argparse
from funasr.utils.cli_utils import get_commandline_args
from funasr.utils.types import str2bool
from funasr.utils.types import str2triple_str
from funasr.utils.types import str_or_none
def get_parser():
    parser = config_argparse.ArgumentParser(
        description="VAD Decoding",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    # Note(kamo): Use '_' instead of '-' as separator.
    # '-' is confusing if written in yaml.
    parser.add_argument(
        "--log_level",
        type=lambda x: x.upper(),
        default="INFO",
        choices=("CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "NOTSET"),
        help="The verbose level of logging",
    )
    parser.add_argument("--output_dir", type=str, required=True)
    parser.add_argument(
        "--ngpu",
        type=int,
        default=0,
        help="The number of gpus. 0 indicates CPU mode",
    )
    parser.add_argument(
        "--njob",
        type=int,
        default=1,
        help="The number of jobs for each gpu",
    )
    parser.add_argument(
        "--gpuid_list",
        type=str,
        default="",
        help="The visible gpus",
    )
    parser.add_argument("--seed", type=int, default=0, help="Random seed")
    parser.add_argument(
        "--dtype",
        default="float32",
        choices=["float16", "float32", "float64"],
        help="Data type",
    )
    parser.add_argument(
        "--num_workers",
        type=int,
        default=1,
        help="The number of workers used for DataLoader",
    )
    group = parser.add_argument_group("Input data related")
    group.add_argument(
        "--data_path_and_name_and_type",
        type=str2triple_str,
        required=True,
        action="append",
    )
    group.add_argument("--key_file", type=str_or_none)
    group.add_argument("--allow_variable_data_keys", type=str2bool, default=False)
    group = parser.add_argument_group("The model configuration related")
    group.add_argument(
        "--vad_infer_config",
        type=str,
        help="VAD infer configuration",
    )
    group.add_argument(
        "--vad_model_file",
        type=str,
        help="VAD model parameter file",
    )
    group.add_argument(
        "--vad_cmvn_file",
        type=str,
        help="Global CMVN file",
    )
    group.add_argument(
        "--vad_train_config",
        type=str,
        help="VAD training configuration",
    )
    group = parser.add_argument_group("The inference configuration related")
    group.add_argument(
        "--batch_size",
        type=int,
        default=1,
        help="The batch size for inference",
    )
    return parser
def inference_launch(mode, **kwargs):
    if mode == "vad":
        from funasr.bin.vad_inference import inference_modelscope
        return inference_modelscope(**kwargs)
    else:
        logging.info("Unknown decoding mode: {}".format(mode))
        return None
def main(cmd=None):
    print(get_commandline_args(), file=sys.stderr)
    parser = get_parser()
    parser.add_argument(
        "--mode",
        type=str,
        default="vad",
        help="The decoding mode",
    )
    args = parser.parse_args(cmd)
    kwargs = vars(args)
    kwargs.pop("config", None)
    # set logging messages
    logging.basicConfig(
        level=args.log_level,
        format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s",
    )
    logging.info("Decoding args: {}".format(kwargs))
    # gpu setting
    if args.ngpu > 0:
        jobid = int(args.output_dir.split(".")[-1])
        gpuid = args.gpuid_list.split(",")[(jobid - 1) // args.njob]
        os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
        os.environ["CUDA_VISIBLE_DEVICES"] = gpuid
    inference_launch(**kwargs)
if __name__ == "__main__":
    main()