from modelscope.pipelines import pipeline
|
from modelscope.utils.constant import Tasks
|
import os
|
import argparse
|
import numpy as np
|
|
|
def main():
|
parser = argparse.ArgumentParser()
|
parser.add_argument("--eval_dir", type=str, default="eval")
|
parser.add_argument("--trials", type=str, default="eval/lists/trials.lst.speech")
|
parser.add_argument("--out_dir", type=str, default="./")
|
parser.add_argument("--verbose", action="store_true", default=False)
|
args = parser.parse_args()
|
|
if not os.path.exists(args.out_dir):
|
os.makedirs(args.out_dir)
|
|
inference_sv_pipline = pipeline(
|
task=Tasks.speaker_verification,
|
model='damo/speech_xvector_sv-zh-cn-cnceleb-16k-spk3465-pytorch'
|
)
|
|
trials_list = [x.strip() for x in open(args.trials, "r").readlines()]
|
enroll_list = set([x.split(" ")[0] for x in trials_list])
|
test_list = set([x.split(" ")[1] for x in trials_list])
|
|
print("extract embeddings for {} enrollments".format(len(enroll_list)))
|
enroll_embedding = {}
|
for enroll in enroll_list:
|
spk_embedding = inference_sv_pipline(
|
audio_in=os.path.join(args.eval_dir, "enroll", enroll+".wav")
|
)["spk_embedding"]
|
enroll_embedding[enroll] = spk_embedding
|
|
test_embedding = {}
|
print("extract embeddings for {} tests".format(len(test_list)))
|
for test in test_list:
|
spk_embedding = inference_sv_pipline(
|
audio_in=os.path.join(args.eval_dir, "test", test+".wav")
|
)["spk_embedding"]
|
test_embedding[test] = spk_embedding
|
|
print("calculate scores for {} trials".format(len(trials_list)))
|
fd = open(os.path.join(args.out_dir, "scores"), "w")
|
for trial in trials_list:
|
spk, utt, _ = trial.split(" ")
|
spk_emb = enroll_embedding[spk]
|
utt_emb = test_embedding[utt]
|
score = np.sum(spk_emb * utt_emb) / (np.linalg.norm(spk_emb) * np.linalg.norm(utt_emb))
|
fd.write("{} {} {:.5f}\n".format(spk, utt, score))
|
fd.close()
|
|
from funasr.utils.compute_eer import compute_eer
|
from funasr.utils.compute_min_dcf import compute_min_dcf
|
eer, threshold = compute_eer(args.trials, os.path.join(args.out_dir, "scores"))
|
print("EER is {:.4f} at threshold {:.4f}".format(eer * 100.0, threshold))
|
|
mindcf, threshold = compute_min_dcf(
|
os.path.join(args.out_dir, "scores"), args.trials,
|
c_miss=10, p_target=0.01
|
)
|
print("minDCF is {0:.4f} at threshold {1:.4f} (p-target={2}, c-miss={3}, c-fa={4})\n".format(
|
mindcf, threshold, 0.01, 10, 1
|
))
|
|
|
if __name__ == '__main__':
|
main()
|