| | |
| | | """ |
| | | Author: Speech Lab, Alibaba Group, China |
| | | SOND: Speaker Overlap-aware Neural Diarization for Multi-party Meeting Analysis |
| | | https://arxiv.org/abs/2211.10243 |
| | | """ |
| | | |
| | | from modelscope.pipelines import pipeline |
| | | from modelscope.utils.constant import Tasks |
| | | |
| | | # 初始化推理 pipeline |
| | | # 当以原始音频作为输入时使用配置文件 sond.yaml,并设置 mode 为sond_demo |
| | | # initialize the pipeline for inference |
| | | # when using the raw waveform files to inference, please use the config file `sond.yaml` |
| | | # and set mode to `sond_demo` |
| | | inference_diar_pipline = pipeline( |
| | | mode="sond_demo", |
| | | num_workers=0, |
| | |
| | | diar_model_config="sond.yaml", |
| | | model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch', |
| | | sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch", |
| | | sv_model_revision="master", |
| | | sv_model_revision="v1.2.2", |
| | | ) |
| | | |
| | | # 以 audio_list 作为输入,其中第一个音频为待检测语音,后面的音频为不同说话人的声纹注册语音 |
| | | # use audio_list as the input, where the first one is the record to be detected |
| | | # and the following files are enrollments for different speakers |
| | | audio_list = [ |
| | | "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav", |
| | | "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav", |