From e1ba6bc138b4e73875c64f35f98f3b15a0560e92 Mon Sep 17 00:00:00 2001
From: 嘉渊 <wangjiaming.wjm@alibaba-inc.com>
Date: 星期三, 17 五月 2023 15:16:06 +0800
Subject: [PATCH] Merge branch 'dev_infer' of https://github.com/alibaba/FunASR into dev_infer
---
funasr/bin/sv_inference_launch.py | 3 +--
egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py | 8 +++++---
funasr/bin/diar_inference_launch.py | 3 +--
funasr/bin/sv_infer.py | 1 -
egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py | 4 ++--
egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py | 8 +++++---
6 files changed, 14 insertions(+), 13 deletions(-)
diff --git a/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py b/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py
index 3db6f7d..9e80d2b 100644
--- a/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py
+++ b/egs_modelscope/speaker_diarization/speech_diarization_sond-en-us-callhome-8k-n16k4-pytorch/infer.py
@@ -7,8 +7,9 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-# 鍒濆鍖栨帹鐞� pipeline
-# 褰撲互鍘熷闊抽浣滀负杈撳叆鏃朵娇鐢ㄩ厤缃枃浠� sond.yaml锛屽苟璁剧疆 mode 涓簊ond_demo
+# initialize the pipeline for inference
+# when using the raw waveform files to inference, please use the config file `sond.yaml`
+# and set mode to `sond_demo`
inference_diar_pipline = pipeline(
mode="sond_demo",
num_workers=0,
@@ -19,7 +20,8 @@
sv_model_revision="master",
)
-# 浠� audio_list 浣滀负杈撳叆锛屽叾涓涓�涓煶棰戜负寰呮娴嬭闊筹紝鍚庨潰鐨勯煶棰戜负涓嶅悓璇磋瘽浜虹殑澹扮汗娉ㄥ唽璇煶
+# use audio_list as the input, where the first one is the record to be detected
+# and the following files are enrollments for different speakers
audio_list = [
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/record.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/spk_A.wav",
diff --git a/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py b/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py
index db10193..dc867b0 100644
--- a/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py
+++ b/egs_modelscope/speaker_diarization/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch/infer.py
@@ -7,8 +7,9 @@
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
-# 鍒濆鍖栨帹鐞� pipeline
-# 褰撲互鍘熷闊抽浣滀负杈撳叆鏃朵娇鐢ㄩ厤缃枃浠� sond.yaml锛屽苟璁剧疆 mode 涓簊ond_demo
+# initialize the pipeline for inference
+# when using the raw waveform files to inference, please use the config file `sond.yaml`
+# and set mode to `sond_demo`
inference_diar_pipline = pipeline(
mode="sond_demo",
num_workers=0,
@@ -19,7 +20,8 @@
sv_model_revision="master",
)
-# 浠� audio_list 浣滀负杈撳叆锛屽叾涓涓�涓煶棰戜负寰呮娴嬭闊筹紝鍚庨潰鐨勯煶棰戜负涓嶅悓璇磋瘽浜虹殑澹扮汗娉ㄥ唽璇煶
+# use audio_list as the input, where the first one is the record to be detected
+# and the following files are enrollments for different speakers
audio_list = [
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
"https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
diff --git a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py
index c51313d..7a53827 100644
--- a/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py
+++ b/egs_modelscope/speaker_verification/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch/infer_sv.py
@@ -7,13 +7,13 @@
model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
)
- # 涓や釜璇煶涓虹浉鍚岃璇濅汉
+ # the same speaker
rec_result = inference_sv_pipline(audio_in=(
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_same.wav'))
print("Similarity", rec_result["scores"])
- # 涓や釜璇煶涓轰笉鍚岃璇濅汉
+ # different speaker
rec_result = inference_sv_pipline(audio_in=(
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_enroll.wav',
'https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/sv_example_different.wav'))
diff --git a/funasr/bin/diar_inference_launch.py b/funasr/bin/diar_inference_launch.py
index 69d37d6..e0d900e 100755
--- a/funasr/bin/diar_inference_launch.py
+++ b/funasr/bin/diar_inference_launch.py
@@ -38,7 +38,6 @@
from scipy.signal import medfilt
from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.diar import DiarTask
-from funasr.tasks.asr import ASRTask
from funasr.tasks.diar import EENDOLADiarTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
@@ -187,7 +186,7 @@
raise TypeError("raw_inputs must be a list or tuple in [speech, profile1, profile2, ...] ")
else:
# 3. Build data-iterator
- loader = ASRTask.build_streaming_iterator(
+ loader = DiarTask.build_streaming_iterator(
data_path_and_name_and_type,
dtype=dtype,
batch_size=batch_size,
diff --git a/funasr/bin/sv_infer.py b/funasr/bin/sv_infer.py
index 9761497..1517bfa 100755
--- a/funasr/bin/sv_infer.py
+++ b/funasr/bin/sv_infer.py
@@ -23,7 +23,6 @@
from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.sv import SVTask
-from funasr.tasks.asr import ASRTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse
diff --git a/funasr/bin/sv_inference_launch.py b/funasr/bin/sv_inference_launch.py
index 8e00730..dbddd9f 100755
--- a/funasr/bin/sv_inference_launch.py
+++ b/funasr/bin/sv_inference_launch.py
@@ -34,7 +34,6 @@
from funasr.utils.cli_utils import get_commandline_args
from funasr.tasks.sv import SVTask
-from funasr.tasks.asr import ASRTask
from funasr.torch_utils.device_funcs import to_device
from funasr.torch_utils.set_all_random_seed import set_all_random_seed
from funasr.utils import config_argparse
@@ -115,7 +114,7 @@
data_path_and_name_and_type = [raw_inputs, "speech", "waveform"]
# 3. Build data-iterator
- loader = ASRTask.build_streaming_iterator(
+ loader = SVTask.build_streaming_iterator(
data_path_and_name_and_type,
dtype=dtype,
batch_size=batch_size,
--
Gitblit v1.9.1