嘉渊
2023-04-24 6427c834dfd97b1f05c6659cdc7ccf010bf82fe1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks
import numpy as np
import os
 
 
def test_wav_cpu_infer():
    output_dir = "./outputs"
    data_path_and_name_and_type = [
        "data/unit_test/test_wav.scp,speech,sound",
        "data/unit_test/test_profile.scp,profile,kaldi_ark",
    ]
    diar_pipeline = pipeline(
        task=Tasks.speaker_diarization,
        model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
        mode="sond",
        output_dir=output_dir,
        num_workers=0,
        log_level="WARNING",
    )
    results = diar_pipeline(data_path_and_name_and_type)
    print(results)
 
 
def test_wav_gpu_infer():
    output_dir = "./outputs"
    data_path_and_name_and_type = [
        "data/unit_test/test_wav.scp,speech,sound",
        "data/unit_test/test_profile.scp,profile,kaldi_ark",
    ]
    diar_pipeline = pipeline(
        task=Tasks.speaker_diarization,
        model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
        mode="sond",
        output_dir=output_dir,
        num_workers=0,
        log_level="WARNING",
    )
    results = diar_pipeline(data_path_and_name_and_type)
    print(results)
 
 
def test_without_profile_gpu_infer():
    raw_inputs = [
        "data/unit_test/raw_inputs/record.wav",
        "data/unit_test/raw_inputs/spk1.wav",
        "data/unit_test/raw_inputs/spk2.wav",
        "data/unit_test/raw_inputs/spk3.wav",
        "data/unit_test/raw_inputs/spk4.wav"
    ]
    diar_pipeline = pipeline(
        task=Tasks.speaker_diarization,
        model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
        mode="sond_demo",
        sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
        sv_model_revision="master",
        num_workers=0,
        log_level="WARNING",
        param_dict={},
    )
    results = diar_pipeline(raw_inputs)
    print(results)
 
 
def test_url_without_profile_gpu_infer():
    raw_inputs = [
        "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/record.wav",
        "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk1.wav",
        "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk2.wav",
        "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk3.wav",
        "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_data/speaker_diarization/spk4.wav",
    ]
    diar_pipeline = pipeline(
        task=Tasks.speaker_diarization,
        model='damo/speech_diarization_sond-zh-cn-alimeeting-16k-n16k4-pytorch',
        mode="sond_demo",
        sv_model="damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch",
        sv_model_revision="master",
        num_workers=0,
        log_level="WARNING",
        param_dict={},
    )
    results = diar_pipeline(raw_inputs)
    print(results)
 
 
if __name__ == '__main__':
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    test_wav_cpu_infer()
    test_wav_gpu_infer()
    test_without_profile_gpu_infer()
    test_url_without_profile_gpu_infer()