python/FunASR-XL.git

parent: 7e63de52 | 补丁 | 提交 | ignore whitespace

confirm inference and demo files for SV and SD tasks

志浩

2023-05-17 3be66ec19abcff0082e9314d5ef2c4f52b5d154d

confirm inference and demo files for SV and SD tasks

6个文件已修改

	egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py	8 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/diar_inference_launch.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/sv_infer.py	1 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	funasr/bin/sv_inference_launch.py	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py

@@ -7,8 +7,9 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

# 初始化推理 pipeline
# 当以原始音频作为输入时使用配置文件 sond.yaml，并设置 mode 为sond_demo
# initialize the pipeline for inference
# when using the raw waveform files to inference, please use the config file `sond.yaml`
# and set mode to `sond_demo`
inference_diar_pipline = pipeline(
    mode="sond_demo",
    num_workers=0,
@@ -19,7 +20,8 @@
    sv_model_revision="master",
)

# 以 audio_list 作为输入，其中第一个音频为待检测语音，后面的音频为不同说话人的声纹注册语音
# use audio_list as the input, where the first one is the record to be detected
# and the following files are enrollments for different speakers
audio_list = [
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav",
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav",

 egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py

@@ -7,8 +7,9 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks

# 初始化推理 pipeline
# 当以原始音频作为输入时使用配置文件 sond.yaml，并设置 mode 为sond_demo
# initialize the pipeline for inference
# when using the raw waveform files to inference, please use the config file `sond.yaml`
# and set mode to `sond_demo`
inference_diar_pipline = pipeline(
    mode="sond_demo",
    num_workers=0,
@@ -19,7 +20,8 @@
    sv_model_revision="master",
)

# 以 audio_list 作为输入，其中第一个音频为待检测语音，后面的音频为不同说话人的声纹注册语音
# use audio_list as the input, where the first one is the record to be detected
# and the following files are enrollments for different speakers
audio_list = [
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
    "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",

 egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py

@@ -7,13 +7,13 @@
        model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
    )

    # 两个语音为相同说话人
    # the same speaker
    rec_result = inference_sv_pipline(audio_in=(
        'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
        'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav'))
    print("Similarity", rec_result["scores"])

    # 两个语音为不同说话人
    # different speaker
    rec_result = inference_sv_pipline(audio_in=(
        'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
        'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav'))

 funasr/bin/diar_inference_launch.py

@@ -38,7 +38,6 @@
from scipy.signal import medfilt
from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.diar import DiarTask
from funasr.tasks.asr import ASRTask
from funasr.tasks.diar import EENDOLADiarTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
@@ -187,7 +186,7 @@
                raise TypeError("raw_inputs must be a list or tuple in [speech, profile1, profile2, ...] ")
        else:
            # 3. Build data-iterator
            loader = ASRTask.build_streaming_iterator(
            loader = DiarTask.build_streaming_iterator(
                data_path_and_name_and_type,
                dtype=dtype,
                batch_size=batch_size,

 funasr/bin/sv_infer.py

@@ -23,7 +23,6 @@

from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.sv import SVTask
from funasr.tasks.asr import ASRTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse

 funasr/bin/sv_inference_launch.py

@@ -34,7 +34,6 @@

from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.sv import SVTask
from funasr.tasks.asr import ASRTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse
@@ -115,7 +114,7 @@
            data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
        
        # 3. Build data-iterator
        loader = ASRTask.build_streaming_iterator(
        loader = SVTask.build_streaming_iterator(
            data_path_and_name_and_type,
            dtype=dtype,
            batch_size=batch_size,

			@@ -7,8 +7,9 @@
			from modelscope.pipelines import pipeline
			from modelscope.utils.constant import Tasks

			# 初始化推理 pipeline
			# 当以原始音频作为输入时使用配置文件 sond.yaml，并设置 mode 为sond_demo
			# initialize the pipeline for inference
			# when using the raw waveform files to inference, please use the config file `sond.yaml`
			# and set mode to `sond_demo`
			inference_diar_pipline = pipeline(
			mode="sond_demo",
			num_workers=0,
			@@ -19,7 +20,8 @@
			sv_model_revision="master",
			)

			# 以 audio_list 作为输入，其中第一个音频为待检测语音，后面的音频为不同说话人的声纹注册语音
			# use audio_list as the input, where the first one is the record to be detected
			# and the following files are enrollments for different speakers
			audio_list = [
			"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav",
			"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav",

			@@ -7,13 +7,13 @@
			model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
			)

			# 两个语音为相同说话人
			# the same speaker
			rec_result = inference_sv_pipline(audio_in=(
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav'))
			print("Similarity", rec_result["scores"])

			# 两个语音为不同说话人
			# different speaker
			rec_result = inference_sv_pipline(audio_in=(
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
			'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav'))

			@@ -38,7 +38,6 @@
			from scipy.signal import medfilt
			from funasr.utils.cli_utils import get_commandline_args
			from funasr.tasks.diar import DiarTask
			from funasr.tasks.asr import ASRTask
			from funasr.tasks.diar import EENDOLADiarTask
			from funasr.torch_utils.device_funcs import to_device
			from funasr.torch_utils.set_all_random_seed import set_all_random_seed
			@@ -187,7 +186,7 @@
			raise TypeError("raw_inputs must be a list or tuple in [speech, profile1, profile2, ...] ")
			else:
			# 3. Build data-iterator
			loader = ASRTask.build_streaming_iterator(
			loader = DiarTask.build_streaming_iterator(
			data_path_and_name_and_type,
			dtype=dtype,
			batch_size=batch_size,

			@@ -23,7 +23,6 @@

			from funasr.utils.cli_utils import get_commandline_args
			from funasr.tasks.sv import SVTask
			from funasr.tasks.asr import ASRTask
			from funasr.torch_utils.device_funcs import to_device
			from funasr.torch_utils.set_all_random_seed import set_all_random_seed
			from funasr.utils import config_argparse

			@@ -34,7 +34,6 @@

			from funasr.utils.cli_utils import get_commandline_args
			from funasr.tasks.sv import SVTask
			from funasr.tasks.asr import ASRTask
			from funasr.torch_utils.device_funcs import to_device
			from funasr.torch_utils.set_all_random_seed import set_all_random_seed
			from funasr.utils import config_argparse
			@@ -115,7 +114,7 @@
			data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]

			# 3. Build data-iterator
			loader = ASRTask.build_streaming_iterator(
			loader = SVTask.build_streaming_iterator(
			data_path_and_name_and_type,
			dtype=dtype,
			batch_size=batch_size,